478 lines
16 KiB
Smalltalk
478 lines
16 KiB
Smalltalk
"
|
|
Please comment me using the following template inspired by Class Responsibility Collaborator (CRC) design:
|
|
|
|
I'm Tweets a helper class to work with collections of tweet objects.
|
|
|
|
For the Responsibility part: Three sentences about my main responsibility, what I'm doing, what services do I offer.
|
|
|
|
For the Collaborators Part: State my main collaborators and one line about how I interact with them.
|
|
|
|
Public API and Key Messages
|
|
|
|
- message one
|
|
- message two
|
|
- what is the way to create instances is a plus.
|
|
|
|
One simple example is simply gorgeous.
|
|
|
|
Internal Representation and Key Implementation Points.
|
|
|
|
Instance Variables
|
|
tweets: <Object>
|
|
|
|
|
|
Implementation Points
|
|
"
|
|
Class {
|
|
#name : #TwitterMessages,
|
|
#superclass : #Object,
|
|
#instVars : [
|
|
'messages'
|
|
],
|
|
#category : #'Dataviz-Twitter'
|
|
}
|
|
|
|
{ #category : #'data visualization' }
|
|
TwitterMessages >> activityHistogramFor: aProfileName in: aDataBaseFile [
|
|
"I draw a histogram of the tweeter activity for a given profile name with data stored in
|
|
aDataBaseFile.
|
|
The database stores the individual tweets for this profile, with their type (tweet, retweet
|
|
or reply), unique url and date.
|
|
A proper schema of the data base still needs to be published.
|
|
Is the one used in all references to aDataBaseFile."
|
|
|
|
| sample activityDataArray monthOfFirstTweet activityDataCollection histogramData plot |
|
|
sample := TwitterMessages new.
|
|
activityDataArray := sample monthlyActivityDataFor: aProfileName in: aDataBaseFile.
|
|
(activityDataArray size > 0)
|
|
ifFalse: [
|
|
self inform: 'There is no data for ', aProfileName, ' in the database'
|
|
]
|
|
ifTrue:[
|
|
"Detecting where happened the first tweet and storing only retweets over this value, will delete the
|
|
outliers which correspond to retweets of original tweets far away of the period of the sample"
|
|
monthOfFirstTweet := activityDataArray detect: [ :each | (each value at: 1) > 0].
|
|
activityDataCollection := OrderedCollection new.
|
|
activityDataArray do: [ :each |
|
|
(each key >= monthOfFirstTweet key) ifTrue: [
|
|
activityDataCollection add: {
|
|
each key asString .
|
|
each value at: 1 .
|
|
each value at: 2 .
|
|
each value at: 3 }
|
|
]
|
|
].
|
|
|
|
"This part was adapted from the awesome roassal examples"
|
|
plot := RTGrapher new.
|
|
histogramData := RTMultipleData new.
|
|
histogramData barShape color: Color green.
|
|
histogramData
|
|
points: activityDataCollection;
|
|
addMetric: #second;
|
|
addMetric: #third;
|
|
addMetric: #fourth.
|
|
|
|
"Horizontal text"
|
|
"d barChartWithBarCenteredTitle: #first."
|
|
|
|
"Rotated text with integer axis"
|
|
histogramData barChartUsing: (RTBarLabelFactory new label: #first; fontSize: 7).
|
|
plot add: histogramData.
|
|
plot axisY noDecimal.
|
|
^ plot
|
|
]
|
|
]
|
|
|
|
{ #category : #'as yet unclassified' }
|
|
TwitterMessages >> ifEmpty: aBlockClosure [
|
|
self messages ifEmpty: aBlockClosure.
|
|
]
|
|
|
|
{ #category : #'data scrapping' }
|
|
TwitterMessages >> importFromJSONLocalFile: aFilePath [
|
|
"I import all the tweets for aJSONFile and convert them in tweets inside a TweetCollection"
|
|
| stream truncated jsonData currentTweet |
|
|
stream := aFilePath readStream.
|
|
"We need to truncate the original file to quite the first line, which is the name of the
|
|
exported array, so NeoJSONReader doesn't complain"
|
|
truncated := WriteStream on: String new.
|
|
stream contents lines allButFirstDo: [ :each | truncated nextPutAll: each ].
|
|
jsonData := NeoJSONReader fromString: truncated contents asString.
|
|
jsonData do: [:each |
|
|
currentTweet := Tweet new.
|
|
currentTweet
|
|
message: (each at: 'text');
|
|
profile: ((each at: 'user') at: 'screen_name');
|
|
date: ((each at: 'created_at') copyFrom: 1 to: 19) asDateAndTime.
|
|
currentTweet detectMessageTypeFrom: each.
|
|
currentTweet detectRetweetedProfileFrom: each.
|
|
currentTweet detectRepliedProfilesFrom: each.
|
|
"Detecting hashtags"
|
|
"(((each at: 'entities') at: 'hashtags') size > 0)
|
|
ifTrue: [
|
|
(each at: 'entities') at: 'hashtags'
|
|
]."
|
|
self messages add: currentTweet.
|
|
].
|
|
]
|
|
|
|
{ #category : #'data scrapping' }
|
|
TwitterMessages >> importFromJSONRemoteFile: aFileUrl [
|
|
"I import all the tweets for aFileUrl and convert them in tweets inside a TweetCollection"
|
|
| tweetsFile |
|
|
tweetsFile := FileLocator temp asFileReference / (aFileUrl splitOn: '/') last.
|
|
tweetsFile exists ifTrue: [ tweetsFile delete ].
|
|
ZnClient new
|
|
get: aFileUrl;
|
|
downloadTo: FileLocator temp asFileReference.
|
|
self importFromJSONLocalFile: tweetsFile.
|
|
]
|
|
|
|
{ #category : #'as yet unclassified' }
|
|
TwitterMessages >> isEmpty [
|
|
self messages isEmpty.
|
|
]
|
|
|
|
{ #category : #'data queries' }
|
|
TwitterMessages >> loadTweetsFor: aProfileName from: aDataBaseFile [
|
|
"I select all the tweets for aProfileName in a given database"
|
|
| db queryResults temporalTweet |
|
|
|
|
"openning connection"
|
|
db := UDBCSQLite3Connection on: aDataBaseFile.
|
|
db open.
|
|
"Querying the data base"
|
|
queryResults := (db execute: 'select * from tweets where profile="',aProfileName,'";') rows.
|
|
db close.
|
|
|
|
queryResults do: [ :each |
|
|
temporalTweet := Tweet new.
|
|
temporalTweet
|
|
url: (each at: 'url');
|
|
date: (TimeStampMethodConverter fromUnixTime: (each at: 'date')) asUTC;
|
|
type: (each at: 'type');
|
|
message: (each at: 'message');
|
|
profile: (each at: 'profile').
|
|
self tweets add: temporalTweet
|
|
].
|
|
]
|
|
|
|
{ #category : #'data queries' }
|
|
TwitterMessages >> mentionedProfilesByFrequencyUpTo: aPercentage [
|
|
"I shown the words by frequency stripped until a percentage of the total size of such words
|
|
is reached."
|
|
| totalSize copiedSize queryResults |
|
|
queryResults := self repliedProfilesByFrequency.
|
|
totalSize := queryResults size.
|
|
copiedSize := (totalSize * aPercentage / 100) floor.
|
|
^ queryResults copyFrom: 1 to: copiedSize.
|
|
|
|
]
|
|
|
|
{ #category : #accessing }
|
|
TwitterMessages >> messages [
|
|
^ messages ifNil: [messages := OrderedCollection new]
|
|
]
|
|
|
|
{ #category : #accessing }
|
|
TwitterMessages >> messages: anOrderedCollection [
|
|
messages := anOrderedCollection
|
|
]
|
|
|
|
{ #category : #'data queries' }
|
|
TwitterMessages >> monthlyActivityDataFor: aProfileName in: aDataBaseFile [
|
|
"I present a histogram of the tweets that differenciates tweets, retweets and replies,
|
|
for a given profile in a given SQLite database (for the moment I supposse that the profile exist there
|
|
and data base schema is correct)"
|
|
| db queryResults firstMonth lastMonth currentMonth activityCalendar monthOfFirstTweet |
|
|
|
|
"openning connection"
|
|
db := UDBCSQLite3Connection on: aDataBaseFile.
|
|
db open.
|
|
"Querying the data base"
|
|
db execute: 'create temporary table profile_tweets as select * from tweets where profile="',aProfileName,'";'.
|
|
queryResults := (db execute:
|
|
'SELECT strftime("%Y-%m",datetime(date, "unixepoch","localtime")) as month, type, count(*) as amount
|
|
FROM profile_tweets GROUP BY strftime("%Y-%m",datetime(date, "unixepoch","localtime")), type;') rows.
|
|
db execute: 'drop table if exists profile_tweets;'.
|
|
db close.
|
|
activityCalendar := Dictionary new.
|
|
(queryResults size > 0)
|
|
ifFalse: [
|
|
self inform: 'There is no data for that profile in the database'
|
|
]
|
|
ifTrue: [
|
|
"Detecting where happened the first tweet and storing only retweets over this value, will delete the
|
|
outliers which correspond to retweets of original tweets far away of the period of the sample"
|
|
monthOfFirstTweet := queryResults detect: [ :each | (each at: 'month') notNil].
|
|
firstMonth := ((monthOfFirstTweet at: 'month'), '-01') asDate asMonth.
|
|
lastMonth := ((queryResults last at: 'month'), '-01') asDate asMonth.
|
|
currentMonth := firstMonth.
|
|
[ currentMonth = (lastMonth next)]
|
|
whileFalse:[
|
|
activityCalendar at: (currentMonth) put: { 0 . 0 . 0 }.
|
|
currentMonth := currentMonth next.
|
|
].
|
|
queryResults do: [ :each |
|
|
(each at: 'type') = 'tweet' & ((each at: 'month') notNil) ifTrue: [
|
|
currentMonth := ((each at: 'month'), '-01') asDate asMonth.
|
|
(activityCalendar at: currentMonth)
|
|
at: 1
|
|
put: (((activityCalendar at: currentMonth) at: 1) + (each at: 'amount'))
|
|
].
|
|
(each at: 'type') = 'retweet' ifTrue: [
|
|
currentMonth := ((each at: 'month'), '-01') asDate asMonth.
|
|
(activityCalendar at: currentMonth)
|
|
at: 2
|
|
put: (((activityCalendar at: currentMonth) at: 2) + (each at: 'amount'))
|
|
].
|
|
(each at: 'type') = 'reply' ifTrue: [
|
|
currentMonth := ((each at: 'month'), '-01') asDate asMonth.
|
|
(activityCalendar at: currentMonth)
|
|
at: 3
|
|
put: (((activityCalendar at: currentMonth) at: 3) + (each at: 'amount'))
|
|
]
|
|
].
|
|
].
|
|
^ (activityCalendar associations sorted)
|
|
]
|
|
|
|
{ #category : #'data storage / persistance' }
|
|
TwitterMessages >> populateDataBase: aDataBaseFile [
|
|
"I populate a SQLite database file with my tweets data"
|
|
| db |
|
|
"openning connection"
|
|
db := UDBCSQLite3Connection on: aDataBaseFile.
|
|
db open.
|
|
"Creating the data base tweets schema"
|
|
db execute:
|
|
'create table if not exists tweets (
|
|
url text primary key,
|
|
profile text,
|
|
date integer,
|
|
type text,
|
|
message text
|
|
);'.
|
|
"Populating the database"
|
|
self tweets do: [:each |
|
|
db execute: 'INSERT INTO tweets values (?, ?, ?, ?, ?);'
|
|
with: {
|
|
each url .
|
|
each profile .
|
|
each date ifNotNil: [each date asUnixTime asString ].
|
|
each type .
|
|
each message }.
|
|
].
|
|
db close.
|
|
]
|
|
|
|
{ #category : #'as yet unclassified' }
|
|
TwitterMessages >> repliedProfilesByFrequency [
|
|
| words builder replies |
|
|
replies := (self splitByType at: 'replies') ifNil: [ ^ self ].
|
|
words := ''.
|
|
replies do: [ :message |
|
|
words := words, ' ', (message repliedUsers) ].
|
|
builder := RTNameCloud new.
|
|
builder dictionary: "RTEnglishDictionary new unnecessaryWords," self class new.
|
|
builder addString: words.
|
|
^ builder sortedAssociations
|
|
]
|
|
|
|
{ #category : #'data queries' }
|
|
TwitterMessages >> repliesSize [
|
|
^ self sizesByType at: 'repliesSize'.
|
|
]
|
|
|
|
{ #category : #'data queries' }
|
|
TwitterMessages >> retweetedProfilesByFrequency [
|
|
| words builder retweets |
|
|
retweets := (self splitByType at: 'retweets') ifNil: [ ^ self ].
|
|
words := ''.
|
|
retweets do: [ :message |
|
|
words := words, ' ', (message retweetedUser) ].
|
|
builder := RTNameCloud new.
|
|
builder dictionary: "RTEnglishDictionary new unnecessaryWords," self class new.
|
|
builder addString: words.
|
|
^ builder sortedAssociations
|
|
]
|
|
|
|
{ #category : #'data queries' }
|
|
TwitterMessages >> retweetedProfilesByFrequencyUpTo: aPercentage [
|
|
"I shown the words by frequency stripped until a percentage of the total size of such words
|
|
is reached."
|
|
| totalSize copiedSize queriedArray |
|
|
queriedArray := self retweetedProfilesByFrequency.
|
|
totalSize := queriedArray size.
|
|
copiedSize := (totalSize * aPercentage / 100) floor.
|
|
^ queriedArray copyFrom: 1 to: copiedSize.
|
|
]
|
|
|
|
{ #category : #'data queries' }
|
|
TwitterMessages >> retweetsSize [
|
|
^ self sizesByType at: 'retweetsSize'.
|
|
]
|
|
|
|
{ #category : #'data visualization' }
|
|
TwitterMessages >> ringOverview [
|
|
"I present a overview of the tweets as a ring that differenciates tweets, retweets and replies"
|
|
| totalTweets replies retweets ring |
|
|
replies := 0.
|
|
retweets := 0.
|
|
messages do: [ :each |
|
|
(each type = 'reply') ifTrue: [replies := replies + 1].
|
|
(each type = 'retweets') ifTrue: [retweets := retweets + 1]].
|
|
totalTweets := (self tweets size) - replies - retweets.
|
|
ring := RTPieBuilder new.
|
|
ring interaction popup.
|
|
ring shape current
|
|
innerRadius: 80;
|
|
externalRadius: 100.
|
|
ring objects: {totalTweets . retweets . replies}.
|
|
(ring slice: #value)ifNotNil: [ :group |
|
|
group do: [:each | each @ (RTDraggable groupToDrag: group)]
|
|
].
|
|
ring normalizer distinctColor.
|
|
ring build.
|
|
^ ring.
|
|
]
|
|
|
|
{ #category : #'data visualization' }
|
|
TwitterMessages >> ringOverviewFor: aProfileName in: aDataBaseFile [
|
|
"I present a overview of the tweets as a ring that differenciates tweets, retweets and replies,
|
|
for a given profile in a given SQLite database (for the moment I supposse that the profile exist there
|
|
and data base schema is correct)"
|
|
| db totalTweets replies retweets ring |
|
|
|
|
"openning connection"
|
|
db := NBSQLite3Connection on: aDataBaseFile.
|
|
db open.
|
|
"Querying the data base"
|
|
retweets := (db execute: 'select * from tweets where profile="', aProfileName ,'" and type="retweet";') rows size.
|
|
replies := (db execute: 'select * from tweets where profile="', aProfileName ,'" and type="reply";') rows size.
|
|
totalTweets := (db execute: 'select * from tweets where profile="', aProfileName ,'";') rows size - retweets - replies.
|
|
db close.
|
|
(totalTweets > 0)
|
|
ifFalse: [
|
|
self inform: 'There are no tweets in the database for that profile'
|
|
]
|
|
ifTrue: [
|
|
ring := RTPieBuilder new.
|
|
ring interaction popup.
|
|
ring shape current
|
|
innerRadius: 80;
|
|
externalRadius: 100.
|
|
ring objects: {totalTweets . retweets . replies}.
|
|
(ring slice: #value)ifNotNil: [ :group |
|
|
group do: [:each | each @ (RTDraggable groupToDrag: group)]
|
|
].
|
|
ring normalizer distinctColor.
|
|
ring build.
|
|
^ ring].
|
|
]
|
|
|
|
{ #category : #'data scrapping' }
|
|
TwitterMessages >> scrapTweetsFromHtmlFile: aHtmlFile [
|
|
"I scraps tweets from a downloaded html file.
|
|
On how to download such file for any given public twitter profile look at:
|
|
http://blog.databigbang.com/scraping-web-sites-which-dynamically-load-data/
|
|
"
|
|
| tweetsDump htmlTree tweetsHtml tweet unixTime answersArray tweetsTemp profile |
|
|
|
|
tweetsDump := aHtmlFile readStream.
|
|
htmlTree := Soup fromString: tweetsDump contents.
|
|
profile := (((htmlTree findAllTagsByClass: 'ProfileHeaderCard-screennameLink') at: 1) attributeAt: 'href') copyReplaceAll: '/' with: ''.
|
|
tweetsHtml := htmlTree findAllTagsByClass: 'tweet'.
|
|
tweetsTemp := OrderedCollection new.
|
|
tweetsHtml allButLastDo: [:each |
|
|
tweet := Tweet new.
|
|
(each findAllTagsByClass: '_timestamp') size > 0
|
|
ifTrue: [
|
|
unixTime := (((each findAllTagsByClass: '_timestamp') at: 1) attributeAt: 'data-time') asInteger.
|
|
tweet date: (TimeStamp fromUnixTime: unixTime) asUTC
|
|
].
|
|
answersArray := each findAllTagsByClass: 'js-retweet-text'.
|
|
(answersArray size = 1)
|
|
ifTrue: [tweet type: 'retweet']
|
|
ifFalse: [
|
|
(each attributeAt: 'data-is-reply-to') isString
|
|
ifTrue: [tweet type: 'reply']
|
|
ifFalse: [tweet type: 'tweet']
|
|
].
|
|
tweet url: (each attributeAt: 'data-permalink-path').
|
|
(each findAllTagsByClass: 'TweetTextSize') size > 0
|
|
ifTrue: [tweet message: (((each findAllTagsByClass: 'TweetTextSize') at: 1) text)].
|
|
tweet profile: profile.
|
|
tweetsTemp add: tweet.
|
|
].
|
|
self tweets: tweetsTemp.
|
|
]
|
|
|
|
{ #category : #'data queries' }
|
|
TwitterMessages >> sizesByType [
|
|
"I split the messages making differences between tweets, retweets and replies"
|
|
| splitted |
|
|
splitted := self splitByType.
|
|
^ Dictionary new
|
|
at: 'tweetsSize' put: (splitted at: 'tweets') size;
|
|
at: 'retweetsSize' put: (splitted at: 'retweets') size;
|
|
at: 'repliesSize' put: (splitted at: 'replies') size;
|
|
yourself
|
|
]
|
|
|
|
{ #category : #'data queries' }
|
|
TwitterMessages >> splitByType [
|
|
"I split the messages making differences between tweets, retweets and replies"
|
|
| retweets replies tweets |
|
|
retweets := self messages select: [ :message | message type = 'retweet' ].
|
|
replies := self messages select: [ :message | message type = 'reply' ].
|
|
tweets := self messages copyWithoutAll: (retweets, replies ).
|
|
^ Dictionary new
|
|
at: 'tweets' put: tweets;
|
|
at: 'retweets' put: retweets;
|
|
at: 'replies' put: replies;
|
|
yourself
|
|
]
|
|
|
|
{ #category : #utility }
|
|
TwitterMessages >> sumSplittedSizes [
|
|
^ self splitByType sum: [ :each | each size ]
|
|
]
|
|
|
|
{ #category : #'data queries' }
|
|
TwitterMessages >> tweetsSize [
|
|
^ self sizesByType at: 'tweetsSize'.
|
|
]
|
|
|
|
{ #category : #utility }
|
|
TwitterMessages >> unnecessaryWords [
|
|
^ #('a' 'amp' 'ante' 'así' 'cc' 'con' 'como' 'cuando' 'de' 'del' 'dentro' 'desde' 'el' 'en' 'En' 'es' 'está' 'ha' 'han' 'hay' 'la' 'La' 'las' 'Las' 'lo' 'los' 'más' 'mi' 'ni' 'No' 'nos' 'o' 'On' 'p' 'para' 'Para' 'por' 'q' 'que' 'quienes' 'quieres' 'RT' 'se' 'sea' 'Si' 'sin' 'son' 'su' 'sus' 'tan' 'tu' 'un' 'una' 'vía' 'we' 'y' 'yo' 'z')
|
|
, RTEnglishDictionary new unnecessaryWords
|
|
]
|
|
|
|
{ #category : #'data queries' }
|
|
TwitterMessages >> wordsByFrequencyInTweets [
|
|
| words builder |
|
|
self messages ifNil: [ ^ self ].
|
|
words := ''.
|
|
self messages do: [ :tweet |
|
|
words := words, ' ', (tweet message) ].
|
|
builder := RTNameCloud new.
|
|
builder dictionary: self class new.
|
|
builder addString: words.
|
|
^ builder sortedAssociations
|
|
]
|
|
|
|
{ #category : #'data queries' }
|
|
TwitterMessages >> wordsByFrequencyInTweetsUpTo: aPercentage [
|
|
"I shown the words by frequency stripped until a percentage of the total size of such words
|
|
is reached."
|
|
| totalSize copiedSize queryResults |
|
|
queryResults := self wordsByFrequencyInTweets.
|
|
totalSize := queryResults size.
|
|
copiedSize := (totalSize * aPercentage / 100) floor.
|
|
^ queryResults copyFrom: 1 to: copiedSize.
|
|
|
|
]
|