482 lines
14 KiB
Smalltalk
482 lines
14 KiB
Smalltalk
"
|
|
I'm a profile from accounts on Twitter: http://twitter.com
|
|
|
|
I store information about twitter profiles.
|
|
|
|
|
|
"
|
|
Class {
|
|
#name : #TwitterProfile,
|
|
#superclass : #Object,
|
|
#instVars : [
|
|
'url',
|
|
'avatar',
|
|
'following',
|
|
'followers',
|
|
'totalTweets',
|
|
'favs',
|
|
'bio',
|
|
'name',
|
|
'screenName',
|
|
'joinDate',
|
|
'location',
|
|
'lastTweets',
|
|
'mostTweets'
|
|
],
|
|
#category : #'Dataviz-Twitter'
|
|
}
|
|
|
|
{ #category : #'data cleaning' }
|
|
TwitterProfile >> asDateAndTime: aString [
|
|
"Converts aString into a date, considering the format and language used in Twitter profile
|
|
main page to present dates"
|
|
|
|
| hour day month year monthIndex date dateArray |
|
|
dateArray := aString findBetweenSubStrs: #('-' 'de').
|
|
hour := dateArray at: 1.
|
|
day := dateArray at: 2.
|
|
month := ((dateArray at: 3) copyReplaceAll: ' ' with: '') copyUpTo: $. .
|
|
year := dateArray at: 4.
|
|
|
|
monthIndex := Dictionary new.
|
|
monthIndex
|
|
at: 'ene' put: '01';
|
|
at: 'feb' put: '02';
|
|
at: 'mar' put: '03';
|
|
at: 'abr' put: '04';
|
|
at: 'may' put: '05';
|
|
at: 'jun' put: '06';
|
|
at: 'jul' put: '07';
|
|
at: 'ago' put: '08';
|
|
at: 'sep' put: '09';
|
|
at: 'oct' put: '10';
|
|
at: 'nov' put: '11';
|
|
at: 'dic' put: '12'.
|
|
|
|
month := monthIndex at: month.
|
|
date := (year,'-',month,'-',day",'-',hour") asDateAndTime.
|
|
^date
|
|
|
|
]
|
|
|
|
{ #category : #'data cleaning' }
|
|
TwitterProfile >> asNumber: aString [
|
|
"Converst the aString into a number, considering the proper styles used by twitter to present that numbers
|
|
in the main profile page"
|
|
|
|
| number temporalNumber |
|
|
(aString includes: $.)
|
|
ifTrue: [number := (aString copyWithoutAll: '.') asNumber. ^number.]
|
|
ifFalse: [number := aString asNumber].
|
|
(aString includes: $K)
|
|
ifTrue: [
|
|
temporalNumber := (aString copyWithoutAll: 'K').
|
|
temporalNumber := temporalNumber copyReplaceAll: ',' with: '.'.
|
|
number := (temporalNumber asNumber * 1000) asInteger.
|
|
^number]
|
|
ifFalse: [number := aString asNumber].
|
|
(aString includes: $M)
|
|
ifTrue: [
|
|
temporalNumber := (aString copyWithoutAll: 'K').
|
|
temporalNumber := temporalNumber copyReplaceAll: ',' with: '.'.
|
|
number := (temporalNumber asNumber * 1000000) asInteger.
|
|
^number]
|
|
ifFalse: [number := aString asNumber].
|
|
^number
|
|
]
|
|
|
|
{ #category : #accessing }
|
|
TwitterProfile >> avatar [
|
|
^ avatar
|
|
]
|
|
|
|
{ #category : #accessing }
|
|
TwitterProfile >> avatar: anObject [
|
|
avatar := anObject
|
|
]
|
|
|
|
{ #category : #accessing }
|
|
TwitterProfile >> bio [
|
|
^ bio
|
|
]
|
|
|
|
{ #category : #accessing }
|
|
TwitterProfile >> bio: anObject [
|
|
bio := anObject
|
|
]
|
|
|
|
{ #category : #'data scrapping' }
|
|
TwitterProfile >> detectJPEGAvatarFrom: aHtmlString [
|
|
"Finds the avatar in a twitter's main page profile, scales it (200x200), cast it agains different formats (jpeg, png) and returns it"
|
|
|
|
| avatarUrl |
|
|
avatarUrl := ((aHtmlString findAllTagsByClass: 'ProfileAvatar-image') at: 1) attributeAt: 'src'.
|
|
avatarUrl := avatarUrl copyReplaceAll: '400x400' with: '200x200'.
|
|
((avatarUrl asLowercase endsWith: '.jpeg') or: (avatarUrl asLowercase endsWith: '.jpg'))
|
|
ifTrue: [^ true ] ifFalse: [ ^ false ]
|
|
]
|
|
|
|
{ #category : #accessing }
|
|
TwitterProfile >> favs [
|
|
^ favs
|
|
]
|
|
|
|
{ #category : #accessing }
|
|
TwitterProfile >> favs: anObject [
|
|
favs := anObject
|
|
]
|
|
|
|
{ #category : #accessing }
|
|
TwitterProfile >> followers [
|
|
^ followers
|
|
]
|
|
|
|
{ #category : #accessing }
|
|
TwitterProfile >> followers: anObject [
|
|
followers := anObject
|
|
]
|
|
|
|
{ #category : #accessing }
|
|
TwitterProfile >> following [
|
|
^ following
|
|
]
|
|
|
|
{ #category : #accessing }
|
|
TwitterProfile >> following: anObject [
|
|
following := anObject
|
|
]
|
|
|
|
{ #category : #initialization }
|
|
TwitterProfile >> initialize [
|
|
"Creates a new TwitterProfile object"
|
|
super initialize.
|
|
|
|
]
|
|
|
|
{ #category : #accessing }
|
|
TwitterProfile >> joinDate [
|
|
^ joinDate
|
|
]
|
|
|
|
{ #category : #accessing }
|
|
TwitterProfile >> joinDate: anObject [
|
|
joinDate := anObject
|
|
]
|
|
|
|
{ #category : #accessing }
|
|
TwitterProfile >> lastTweets [
|
|
^ lastTweets
|
|
]
|
|
|
|
{ #category : #accessing }
|
|
TwitterProfile >> lastTweets: anObject [
|
|
lastTweets := anObject
|
|
]
|
|
|
|
{ #category : #'data storage / persistence' }
|
|
TwitterProfile >> loadDataFor: aProfileName fromDatabase: aDataBaseFile [
|
|
| db queryResults |
|
|
|
|
"openning connection"
|
|
db := UDBCSQLite3Connection on: aDataBaseFile.
|
|
db open.
|
|
"Querying the data base"
|
|
queryResults := (db execute: 'SELECT * FROM profiles WHERE screenName="',aProfileName,'";') rows at: 1.
|
|
db close.
|
|
self
|
|
screenName: (queryResults at: 'screenName');
|
|
name: (queryResults at: 'name');
|
|
avatar: (queryResults at: 'avatar');
|
|
bio: (queryResults at: 'bio');
|
|
favs: (queryResults at: 'favs');
|
|
followers: (queryResults at: 'followers');
|
|
following: (queryResults at: 'following')
|
|
]
|
|
|
|
{ #category : #'data storage / persistence' }
|
|
TwitterProfile >> loadDataFromFile: aFileReference [
|
|
"Opens the twitter profile from aFileReference stored in the STON format"
|
|
| tempProfile |
|
|
tempProfile := (STON fromString: aFileReference contents).
|
|
avatar := tempProfile avatar.
|
|
bio := tempProfile bio.
|
|
favs := tempProfile favs.
|
|
followers := tempProfile followers.
|
|
following := tempProfile following.
|
|
joinDate := tempProfile joinDate.
|
|
lastTweets := tempProfile lastTweets.
|
|
location := tempProfile location.
|
|
mostTweets := tempProfile mostTweets.
|
|
name := tempProfile name.
|
|
screenName := tempProfile screenName.
|
|
totalTweets := tempProfile totalTweets.
|
|
url := tempProfile url.
|
|
]
|
|
|
|
{ #category : #'data storage / persistence' }
|
|
TwitterProfile >> loadDataFromUrl: anUrl [
|
|
"Opens the twitter profile from aFileReference stored in the STON format"
|
|
|
|
| client profileData |
|
|
client := ZnClient new.
|
|
client get: anUrl.
|
|
client isSuccess
|
|
ifTrue: [
|
|
profileData := ReadStream on: (client contents asString).
|
|
self loadDataFromFile: profileData.
|
|
].
|
|
]
|
|
|
|
{ #category : #accessing }
|
|
TwitterProfile >> location [
|
|
^ location
|
|
]
|
|
|
|
{ #category : #accessing }
|
|
TwitterProfile >> location: anObject [
|
|
location := anObject
|
|
]
|
|
|
|
{ #category : #accessing }
|
|
TwitterProfile >> mostTweets [
|
|
^ mostTweets
|
|
]
|
|
|
|
{ #category : #accessing }
|
|
TwitterProfile >> mostTweets: anObject [
|
|
mostTweets := anObject
|
|
]
|
|
|
|
{ #category : #accessing }
|
|
TwitterProfile >> name [
|
|
^ name
|
|
]
|
|
|
|
{ #category : #accessing }
|
|
TwitterProfile >> name: anObject [
|
|
name := anObject
|
|
]
|
|
|
|
{ #category : #'data storage / persistence' }
|
|
TwitterProfile >> populateDataBase: aDataBaseFile [
|
|
"I populate a SQLite database file with myself data"
|
|
| db |
|
|
"openning connection"
|
|
db := UDBCSQLite3Connection on: aDataBaseFile.
|
|
db open.
|
|
"Creating the data base tweets schema"
|
|
db execute:
|
|
'create table if not exists profiles (
|
|
screenName text primary key,
|
|
name text,
|
|
avatar blob,
|
|
bio text,
|
|
favs integer,
|
|
followers integer,
|
|
following integer,
|
|
location text
|
|
);'.
|
|
"Populating the database"
|
|
db execute: 'INSERT INTO profiles values (?, ?, ?, ?, ?, ?, ?, ?);'
|
|
with: {
|
|
self screenName.
|
|
self name.
|
|
self avatar.
|
|
self bio.
|
|
self favs.
|
|
self followers.
|
|
self following.
|
|
self location}.
|
|
db close.
|
|
]
|
|
|
|
{ #category : #'data storage / persistence' }
|
|
TwitterProfile >> saveToFile: aFileReference [
|
|
"Saves the twitter profile to aFileReference in the STON format"
|
|
|
|
| stream |
|
|
stream := aFileReference writeStream.
|
|
stream nextPutAll: (STON toStringPretty: self).
|
|
]
|
|
|
|
{ #category : #dataweek }
|
|
TwitterProfile >> sayBye [
|
|
"Just says hello to all the people which is listening. A dummy example on how to create new messages"
|
|
Transcript open.
|
|
Transcript show: 'Adios, perfil de Twitter, despidiéndose. Pásala bueno ;-)'
|
|
|
|
|
|
]
|
|
|
|
{ #category : #dataweek }
|
|
TwitterProfile >> sayHello [
|
|
"Just says hello to all the people which is listening. A dummy example on how to create new messages"
|
|
Transcript open.
|
|
Transcript show: 'Hola! soy un perfil de Twitter :-)'
|
|
|
|
|
|
]
|
|
|
|
{ #category : #'data scrapping' }
|
|
TwitterProfile >> scrapAvatarForProfile: aProfileName [
|
|
"Scraps data from aProfileName and fills out the TwitterProfile.
|
|
The profile name is the last part of a twitter profile url
|
|
(i.e: 'https://twitter.com/aProfileName')."
|
|
|
|
| client source anUrl |
|
|
anUrl := 'https://twitter.com/', aProfileName.
|
|
client := ZnClient new.
|
|
client get: anUrl.
|
|
client isSuccess
|
|
ifTrue:[
|
|
source := Soup fromString: (client) contents asString.
|
|
avatar := self scrapAvatarFrom: source.
|
|
]
|
|
ifFalse:[self inform: 'Algo salió mal. Verifique su conexión a Internet y que el contenido buscado estén disponibles'].
|
|
^ self avatar
|
|
|
|
]
|
|
|
|
{ #category : #'data scrapping' }
|
|
TwitterProfile >> scrapAvatarFrom: aHtmlString [
|
|
"Finds the avatar in a twitter's main page profile, scales it (200x200), cast it agains different formats (jpeg, png) and returns it"
|
|
|
|
| avatarUrl avatarImage |
|
|
avatarUrl := ((aHtmlString findAllTagsByClass: 'ProfileAvatar-image') at: 1) attributeAt: 'src'.
|
|
avatarUrl := avatarUrl copyReplaceAll: '400x400' with: '200x200'.
|
|
avatarImage := ImageReadWriter formFromStream: (ZnEasy get: avatarUrl) contents readStream.
|
|
^ avatarImage.
|
|
]
|
|
|
|
{ #category : #'data scrapping' }
|
|
TwitterProfile >> scrapDataForProfile: aProfileName [
|
|
"Scraps data from aProfileName and fills out the TwitterProfile.
|
|
The profile name is the last part of a twitter profile url
|
|
(i.e: 'https://twitter.com/aProfileName')."
|
|
|
|
| client source numericalData anUrl |
|
|
anUrl := 'https://twitter.com/', aProfileName.
|
|
client := ZnClient new.
|
|
client get: anUrl.
|
|
client isSuccess
|
|
ifTrue:[
|
|
source := Soup fromString: (client) contents asString.
|
|
numericalData := (source findAllTagsByClass: 'ProfileNav-value') collect:[:each | each text].
|
|
totalTweets := self asNumber: (numericalData at: 1).
|
|
following := self asNumber: (numericalData at: 2).
|
|
followers := self asNumber: (numericalData at: 3).
|
|
"favs := self asNumber: (numericalData at: 4)."
|
|
bio := ((source findAllTagsByClass: 'ProfileHeaderCard-bio') at: 1) next contents.
|
|
avatar := self scrapAvatarFrom: source.
|
|
name := ((source findAllTagsByClass: 'ProfileHeaderCard-nameLink') at: 1) next contents.
|
|
screenName := aProfileName.
|
|
"joinDate := self asDateAndTime: (((source findAllTagsByClass: 'ProfileHeaderCard-joinDateText') at: 1) attributeAt: 'title')."
|
|
url := ((source findAllTagsByClass: 'ProfileHeaderCard-urlText') at: 1) next attributeAt: 'title'.
|
|
location := ((source findAllTagsByClass: 'ProfileHeaderCard-locationText') at: 1) next contents.
|
|
lastTweets := (source findAllTagsByClass: 'ProfileTweet-text') collect:[:each | each text].
|
|
]
|
|
ifFalse:[self inform: 'Algo salió mal. Verifique su conexión a Internet y que el contenido buscado estén disponibles'].
|
|
^self
|
|
|
|
]
|
|
|
|
{ #category : #'data scrapping' }
|
|
TwitterProfile >> scrapFollowersForProfile: aProfileName [
|
|
"Scraps data from a predefined profile name"
|
|
|
|
| client source numericalData anUrl |
|
|
anUrl := 'https://twitter.com/', aProfileName.
|
|
client := ZnClient new.
|
|
client get: anUrl.
|
|
client isSuccess
|
|
ifTrue:[
|
|
source := Soup fromString: (client) contents asString.
|
|
numericalData := (source findAllTagsByClass: 'ProfileNav-value') collect:[:each | each text].
|
|
followers := self asNumber: (numericalData at: 3).
|
|
]
|
|
ifFalse:[self inform: 'Algo salió mal. Verifique su conexión a Internet y que el contenido buscado estén disponibles'].
|
|
^ self followers
|
|
]
|
|
|
|
{ #category : #'data scrapping' }
|
|
TwitterProfile >> scrapTweetsFromFile: aFile [
|
|
"Scraps tweets data from aFile, wich contains tweets scrapped from a public profile."
|
|
|
|
| source |
|
|
aFile isNotNil
|
|
ifTrue:[
|
|
source := Soup fromString: aFile contents asString.
|
|
mostTweets := (source findAllTagsByClass: 'Tweet-text') collect:[:each | each text].
|
|
]
|
|
ifFalse:[self inform: 'Algo salió mal. Verifique que el contenido del archivo sea el esperado'].
|
|
^self
|
|
|
|
]
|
|
|
|
{ #category : #accessing }
|
|
TwitterProfile >> screenName [
|
|
^ screenName
|
|
]
|
|
|
|
{ #category : #accessing }
|
|
TwitterProfile >> screenName: anObject [
|
|
screenName := anObject
|
|
]
|
|
|
|
{ #category : #'data visualization' }
|
|
TwitterProfile >> showWordCloud [
|
|
"Creates a tag cloud from tweets of the present twitter profile. Is supposed to be run on a Twitter profile which is not empty"
|
|
| semiRawText tweetsSource uninterestingWords cookedText1 cookedText2 tagView |
|
|
|
|
uninterestingWords := #( 'ahí' 'al' 'amp' 'ante' 'aquí' 'así' 'bit' 'cc' 'co' 'com' 'como' 'cómo' 'con' 'cual' 'cuando' 'cuándo' 'da' 'de' 'del' 'desde' 'días' 'do' 'el' 'en' 'entre' 'era' 'es' 'esa' 'ese' 'eso' 'esta' 'está' 'estamos' 'están' 'estarán' 'este' 'esto' 'estos' 'estoy' 'fb' 'fbid' 'girará' 'gl' 'gt' 'goo' 'ha' 'han' 'hay' 'he' 'hoy' 'http' 'https' 'hasta' 'la' 'las' 'le' 'les' 'lo' 'los' 'ly' 'mas' 'más' 'mi' 'muy' 'nbsp' 'ni' 'no' 'nos' 'org' 'otros' 'para' 'pero' 'pic' 'por' 'pues' 're' 'se' 'sea' 'ser' 'será' 'si' 'sin' 'sobre' 'solo' 'sólo' 'son' 'soy' 'su' 'sus' 'te' 'this' 'tiene' 'torno' 'tt' 'tu' 'twitter' 'type' 'php' 'pm' 'que' 'qué' 'quot'a'ud' 'un' 'una' 'uno' 'usted' 'utm' 'va' 'van' 'wp' 'www' 'ya' 'yo' 'youtu').
|
|
|
|
mostTweets ifNotNil: [ tweetsSource := mostTweets ] ifNil: [ tweetsSource := lastTweets ].
|
|
semiRawText := tweetsSource inject: '' into: [:text :each | text, each asString, ' ' ].
|
|
cookedText1 := semiRawText splitOn: [:x | (x isLetter) not ].
|
|
cookedText1 := cookedText1
|
|
reject: [:k |
|
|
k size < 2
|
|
or:
|
|
[uninterestingWords includes:k asLowercase]].
|
|
cookedText2 := String streamContents:[:s| cookedText1 asStringOn:s delimiter: String space].
|
|
|
|
tagView := RTNameCloud new addString: cookedText2.
|
|
tagView build.
|
|
tagView view add: (RTBitmap new form: (self avatar)) element.
|
|
|
|
RTRectanglePackLayout new on: tagView view elements. "<-- since I added a new element I have to layout it myself"
|
|
|
|
^ tagView view
|
|
|
|
]
|
|
|
|
{ #category : #accessing }
|
|
TwitterProfile >> totalTweets [
|
|
^ totalTweets
|
|
]
|
|
|
|
{ #category : #accessing }
|
|
TwitterProfile >> totalTweets: anObject [
|
|
totalTweets := anObject
|
|
]
|
|
|
|
{ #category : #accessing }
|
|
TwitterProfile >> tweets [
|
|
^ tweets
|
|
]
|
|
|
|
{ #category : #accessing }
|
|
TwitterProfile >> tweets: anObject [
|
|
tweets := anObject
|
|
]
|
|
|
|
{ #category : #accessing }
|
|
TwitterProfile >> url [
|
|
^ url
|
|
]
|
|
|
|
{ #category : #accessing }
|
|
TwitterProfile >> url: anObject [
|
|
url := anObject
|
|
]
|