Grafoscopio/src/Dataviz/TwitterProfile.class.st

482 lines
14 KiB
Smalltalk

"
I'm a profile from accounts on Twitter: http://twitter.com
I store information about twitter profiles.
"
Class {
#name : #TwitterProfile,
#superclass : #Object,
#instVars : [
'url',
'avatar',
'following',
'followers',
'totalTweets',
'favs',
'bio',
'name',
'screenName',
'joinDate',
'location',
'lastTweets',
'mostTweets'
],
#category : #'Dataviz-Twitter'
}
{ #category : #'data cleaning' }
TwitterProfile >> asDateAndTime: aString [
"Converts aString into a date, considering the format and language used in Twitter profile
main page to present dates"
| hour day month year monthIndex date dateArray |
dateArray := aString findBetweenSubStrs: #('-' 'de').
hour := dateArray at: 1.
day := dateArray at: 2.
month := ((dateArray at: 3) copyReplaceAll: ' ' with: '') copyUpTo: $. .
year := dateArray at: 4.
monthIndex := Dictionary new.
monthIndex
at: 'ene' put: '01';
at: 'feb' put: '02';
at: 'mar' put: '03';
at: 'abr' put: '04';
at: 'may' put: '05';
at: 'jun' put: '06';
at: 'jul' put: '07';
at: 'ago' put: '08';
at: 'sep' put: '09';
at: 'oct' put: '10';
at: 'nov' put: '11';
at: 'dic' put: '12'.
month := monthIndex at: month.
date := (year,'-',month,'-',day",'-',hour") asDateAndTime.
^date
]
{ #category : #'data cleaning' }
TwitterProfile >> asNumber: aString [
"Converst the aString into a number, considering the proper styles used by twitter to present that numbers
in the main profile page"
| number temporalNumber |
(aString includes: $.)
ifTrue: [number := (aString copyWithoutAll: '.') asNumber. ^number.]
ifFalse: [number := aString asNumber].
(aString includes: $K)
ifTrue: [
temporalNumber := (aString copyWithoutAll: 'K').
temporalNumber := temporalNumber copyReplaceAll: ',' with: '.'.
number := (temporalNumber asNumber * 1000) asInteger.
^number]
ifFalse: [number := aString asNumber].
(aString includes: $M)
ifTrue: [
temporalNumber := (aString copyWithoutAll: 'K').
temporalNumber := temporalNumber copyReplaceAll: ',' with: '.'.
number := (temporalNumber asNumber * 1000000) asInteger.
^number]
ifFalse: [number := aString asNumber].
^number
]
{ #category : #accessing }
TwitterProfile >> avatar [
^ avatar
]
{ #category : #accessing }
TwitterProfile >> avatar: anObject [
avatar := anObject
]
{ #category : #accessing }
TwitterProfile >> bio [
^ bio
]
{ #category : #accessing }
TwitterProfile >> bio: anObject [
bio := anObject
]
{ #category : #'data scrapping' }
TwitterProfile >> detectJPEGAvatarFrom: aHtmlString [
"Finds the avatar in a twitter's main page profile, scales it (200x200), cast it agains different formats (jpeg, png) and returns it"
| avatarUrl |
avatarUrl := ((aHtmlString findAllTagsByClass: 'ProfileAvatar-image') at: 1) attributeAt: 'src'.
avatarUrl := avatarUrl copyReplaceAll: '400x400' with: '200x200'.
((avatarUrl asLowercase endsWith: '.jpeg') or: (avatarUrl asLowercase endsWith: '.jpg'))
ifTrue: [^ true ] ifFalse: [ ^ false ]
]
{ #category : #accessing }
TwitterProfile >> favs [
^ favs
]
{ #category : #accessing }
TwitterProfile >> favs: anObject [
favs := anObject
]
{ #category : #accessing }
TwitterProfile >> followers [
^ followers
]
{ #category : #accessing }
TwitterProfile >> followers: anObject [
followers := anObject
]
{ #category : #accessing }
TwitterProfile >> following [
^ following
]
{ #category : #accessing }
TwitterProfile >> following: anObject [
following := anObject
]
{ #category : #initialization }
TwitterProfile >> initialize [
"Creates a new TwitterProfile object"
super initialize.
]
{ #category : #accessing }
TwitterProfile >> joinDate [
^ joinDate
]
{ #category : #accessing }
TwitterProfile >> joinDate: anObject [
joinDate := anObject
]
{ #category : #accessing }
TwitterProfile >> lastTweets [
^ lastTweets
]
{ #category : #accessing }
TwitterProfile >> lastTweets: anObject [
lastTweets := anObject
]
{ #category : #'data storage / persistence' }
TwitterProfile >> loadDataFor: aProfileName fromDatabase: aDataBaseFile [
| db queryResults |
"openning connection"
db := UDBCSQLite3Connection on: aDataBaseFile.
db open.
"Querying the data base"
queryResults := (db execute: 'SELECT * FROM profiles WHERE screenName="',aProfileName,'";') rows at: 1.
db close.
self
screenName: (queryResults at: 'screenName');
name: (queryResults at: 'name');
avatar: (queryResults at: 'avatar');
bio: (queryResults at: 'bio');
favs: (queryResults at: 'favs');
followers: (queryResults at: 'followers');
following: (queryResults at: 'following')
]
{ #category : #'data storage / persistence' }
TwitterProfile >> loadDataFromFile: aFileReference [
"Opens the twitter profile from aFileReference stored in the STON format"
| tempProfile |
tempProfile := (STON fromString: aFileReference contents).
avatar := tempProfile avatar.
bio := tempProfile bio.
favs := tempProfile favs.
followers := tempProfile followers.
following := tempProfile following.
joinDate := tempProfile joinDate.
lastTweets := tempProfile lastTweets.
location := tempProfile location.
mostTweets := tempProfile mostTweets.
name := tempProfile name.
screenName := tempProfile screenName.
totalTweets := tempProfile totalTweets.
url := tempProfile url.
]
{ #category : #'data storage / persistence' }
TwitterProfile >> loadDataFromUrl: anUrl [
"Opens the twitter profile from aFileReference stored in the STON format"
| client profileData |
client := ZnClient new.
client get: anUrl.
client isSuccess
ifTrue: [
profileData := ReadStream on: (client contents asString).
self loadDataFromFile: profileData.
].
]
{ #category : #accessing }
TwitterProfile >> location [
^ location
]
{ #category : #accessing }
TwitterProfile >> location: anObject [
location := anObject
]
{ #category : #accessing }
TwitterProfile >> mostTweets [
^ mostTweets
]
{ #category : #accessing }
TwitterProfile >> mostTweets: anObject [
mostTweets := anObject
]
{ #category : #accessing }
TwitterProfile >> name [
^ name
]
{ #category : #accessing }
TwitterProfile >> name: anObject [
name := anObject
]
{ #category : #'data storage / persistence' }
TwitterProfile >> populateDataBase: aDataBaseFile [
"I populate a SQLite database file with myself data"
| db |
"openning connection"
db := UDBCSQLite3Connection on: aDataBaseFile.
db open.
"Creating the data base tweets schema"
db execute:
'create table if not exists profiles (
screenName text primary key,
name text,
avatar blob,
bio text,
favs integer,
followers integer,
following integer,
location text
);'.
"Populating the database"
db execute: 'INSERT INTO profiles values (?, ?, ?, ?, ?, ?, ?, ?);'
with: {
self screenName.
self name.
self avatar.
self bio.
self favs.
self followers.
self following.
self location}.
db close.
]
{ #category : #'data storage / persistence' }
TwitterProfile >> saveToFile: aFileReference [
"Saves the twitter profile to aFileReference in the STON format"
| stream |
stream := aFileReference writeStream.
stream nextPutAll: (STON toStringPretty: self).
]
{ #category : #dataweek }
TwitterProfile >> sayBye [
"Just says hello to all the people which is listening. A dummy example on how to create new messages"
Transcript open.
Transcript show: 'Adios, perfil de Twitter, despidiéndose. Pásala bueno ;-)'
]
{ #category : #dataweek }
TwitterProfile >> sayHello [
"Just says hello to all the people which is listening. A dummy example on how to create new messages"
Transcript open.
Transcript show: 'Hola! soy un perfil de Twitter :-)'
]
{ #category : #'data scrapping' }
TwitterProfile >> scrapAvatarForProfile: aProfileName [
"Scraps data from aProfileName and fills out the TwitterProfile.
The profile name is the last part of a twitter profile url
(i.e: 'https://twitter.com/aProfileName')."
| client source anUrl |
anUrl := 'https://twitter.com/', aProfileName.
client := ZnClient new.
client get: anUrl.
client isSuccess
ifTrue:[
source := Soup fromString: (client) contents asString.
avatar := self scrapAvatarFrom: source.
]
ifFalse:[self inform: 'Algo salió mal. Verifique su conexión a Internet y que el contenido buscado estén disponibles'].
^ self avatar
]
{ #category : #'data scrapping' }
TwitterProfile >> scrapAvatarFrom: aHtmlString [
"Finds the avatar in a twitter's main page profile, scales it (200x200), cast it agains different formats (jpeg, png) and returns it"
| avatarUrl avatarImage |
avatarUrl := ((aHtmlString findAllTagsByClass: 'ProfileAvatar-image') at: 1) attributeAt: 'src'.
avatarUrl := avatarUrl copyReplaceAll: '400x400' with: '200x200'.
avatarImage := ImageReadWriter formFromStream: (ZnEasy get: avatarUrl) contents readStream.
^ avatarImage.
]
{ #category : #'data scrapping' }
TwitterProfile >> scrapDataForProfile: aProfileName [
"Scraps data from aProfileName and fills out the TwitterProfile.
The profile name is the last part of a twitter profile url
(i.e: 'https://twitter.com/aProfileName')."
| client source numericalData anUrl |
anUrl := 'https://twitter.com/', aProfileName.
client := ZnClient new.
client get: anUrl.
client isSuccess
ifTrue:[
source := Soup fromString: (client) contents asString.
numericalData := (source findAllTagsByClass: 'ProfileNav-value') collect:[:each | each text].
totalTweets := self asNumber: (numericalData at: 1).
following := self asNumber: (numericalData at: 2).
followers := self asNumber: (numericalData at: 3).
"favs := self asNumber: (numericalData at: 4)."
bio := ((source findAllTagsByClass: 'ProfileHeaderCard-bio') at: 1) next contents.
avatar := self scrapAvatarFrom: source.
name := ((source findAllTagsByClass: 'ProfileHeaderCard-nameLink') at: 1) next contents.
screenName := aProfileName.
"joinDate := self asDateAndTime: (((source findAllTagsByClass: 'ProfileHeaderCard-joinDateText') at: 1) attributeAt: 'title')."
url := ((source findAllTagsByClass: 'ProfileHeaderCard-urlText') at: 1) next attributeAt: 'title'.
location := ((source findAllTagsByClass: 'ProfileHeaderCard-locationText') at: 1) next contents.
lastTweets := (source findAllTagsByClass: 'ProfileTweet-text') collect:[:each | each text].
]
ifFalse:[self inform: 'Algo salió mal. Verifique su conexión a Internet y que el contenido buscado estén disponibles'].
^self
]
{ #category : #'data scrapping' }
TwitterProfile >> scrapFollowersForProfile: aProfileName [
"Scraps data from a predefined profile name"
| client source numericalData anUrl |
anUrl := 'https://twitter.com/', aProfileName.
client := ZnClient new.
client get: anUrl.
client isSuccess
ifTrue:[
source := Soup fromString: (client) contents asString.
numericalData := (source findAllTagsByClass: 'ProfileNav-value') collect:[:each | each text].
followers := self asNumber: (numericalData at: 3).
]
ifFalse:[self inform: 'Algo salió mal. Verifique su conexión a Internet y que el contenido buscado estén disponibles'].
^ self followers
]
{ #category : #'data scrapping' }
TwitterProfile >> scrapTweetsFromFile: aFile [
"Scraps tweets data from aFile, wich contains tweets scrapped from a public profile."
| source |
aFile isNotNil
ifTrue:[
source := Soup fromString: aFile contents asString.
mostTweets := (source findAllTagsByClass: 'Tweet-text') collect:[:each | each text].
]
ifFalse:[self inform: 'Algo salió mal. Verifique que el contenido del archivo sea el esperado'].
^self
]
{ #category : #accessing }
TwitterProfile >> screenName [
^ screenName
]
{ #category : #accessing }
TwitterProfile >> screenName: anObject [
screenName := anObject
]
{ #category : #'data visualization' }
TwitterProfile >> showWordCloud [
"Creates a tag cloud from tweets of the present twitter profile. Is supposed to be run on a Twitter profile which is not empty"
| semiRawText tweetsSource uninterestingWords cookedText1 cookedText2 tagView |
uninterestingWords := #( 'ahí' 'al' 'amp' 'ante' 'aquí' 'así' 'bit' 'cc' 'co' 'com' 'como' 'cómo' 'con' 'cual' 'cuando' 'cuándo' 'da' 'de' 'del' 'desde' 'días' 'do' 'el' 'en' 'entre' 'era' 'es' 'esa' 'ese' 'eso' 'esta' 'está' 'estamos' 'están' 'estarán' 'este' 'esto' 'estos' 'estoy' 'fb' 'fbid' 'girará' 'gl' 'gt' 'goo' 'ha' 'han' 'hay' 'he' 'hoy' 'http' 'https' 'hasta' 'la' 'las' 'le' 'les' 'lo' 'los' 'ly' 'mas' 'más' 'mi' 'muy' 'nbsp' 'ni' 'no' 'nos' 'org' 'otros' 'para' 'pero' 'pic' 'por' 'pues' 're' 'se' 'sea' 'ser' 'será' 'si' 'sin' 'sobre' 'solo' 'sólo' 'son' 'soy' 'su' 'sus' 'te' 'this' 'tiene' 'torno' 'tt' 'tu' 'twitter' 'type' 'php' 'pm' 'que' 'qué' 'quot'a'ud' 'un' 'una' 'uno' 'usted' 'utm' 'va' 'van' 'wp' 'www' 'ya' 'yo' 'youtu').
mostTweets ifNotNil: [ tweetsSource := mostTweets ] ifNil: [ tweetsSource := lastTweets ].
semiRawText := tweetsSource inject: '' into: [:text :each | text, each asString, ' ' ].
cookedText1 := semiRawText splitOn: [:x | (x isLetter) not ].
cookedText1 := cookedText1
reject: [:k |
k size < 2
or:
[uninterestingWords includes:k asLowercase]].
cookedText2 := String streamContents:[:s| cookedText1 asStringOn:s delimiter: String space].
tagView := RTNameCloud new addString: cookedText2.
tagView build.
tagView view add: (RTBitmap new form: (self avatar)) element.
RTRectanglePackLayout new on: tagView view elements. "<-- since I added a new element I have to layout it myself"
^ tagView view
]
{ #category : #accessing }
TwitterProfile >> totalTweets [
^ totalTweets
]
{ #category : #accessing }
TwitterProfile >> totalTweets: anObject [
totalTweets := anObject
]
{ #category : #accessing }
TwitterProfile >> tweets [
^ tweets
]
{ #category : #accessing }
TwitterProfile >> tweets: anObject [
tweets := anObject
]
{ #category : #accessing }
TwitterProfile >> url [
^ url
]
{ #category : #accessing }
TwitterProfile >> url: anObject [
url := anObject
]