Creating download tweets from oldest up to a page, modifying histograms exporters and external word cloud without mask.

This commit is contained in:
ruidajo 2022-04-27 23:51:53 -05:00
parent ac963c753f
commit 06b0d5748d
11 changed files with 76 additions and 26 deletions

View File

@ -0,0 +1,33 @@
accessing
collectRawTweetsFromOldestUpToPage: anInteger
| pagesDict response customQuery |
pagesDict := self getPagesContentsFromOldestUpto: anInteger.
response := TweetsCollection new.
customQuery := Dictionary new
at: 'parameters' put: pagesDict keys;
at: 'date' put: DateAndTime now;
yourself.
response query: customQuery.
pagesDict keysAndValuesDo: [ :key :rawTweets | | temp |
temp := (rawTweets xpath: '//div[@class="timeline-item "]') asOrderedCollection
collect: [ :xmlElement | xmlElement postCopy ].
temp do: [ :tweet | | tempTweet |
tempTweet := Tweet new fromNitterHtmlItem: tweet.
tempTweet metadata
at: DateAndTime now asString put: key;
yourself.
response add: tempTweet.
]
].
response messages: (response messages select: [ :tweet | tweet isNotNil ]).
response messages doWithIndex: [ :tweet :i |
| current previous |
current := response messages at: i.
i < response lastIndex ifTrue: [
previous := response messages at: i + 1.
current timelines
at: self userName put: previous id;
yourself ]].
^ response.

View File

@ -5,6 +5,7 @@ exportQuotesHistogram
quotesOccurrences := self quotesSortedByOccurrences.
labels := quotesOccurrences keys.
labels := labels collect: [ :profiles | ('@', profiles) ].
quotesHistogram := RSChart new.
quotesHistogram extent: 800@200.
diagram := RSBarPlot new
@ -15,11 +16,11 @@ exportQuotesHistogram
quotesHistogram addDecoration: (RSHorizontalTick new
fromNames: labels;
labelRotation: 0;
fontSize: 80 /quotesOccurrences size;
fontSize: 72 /quotesOccurrences size;
yourself).
quotesHistogram addDecoration: (RSVerticalTick new
integer;
fontSize: 80 /quotesOccurrences size).
fontSize: 72 /quotesOccurrences size).
quotesHistogram build.
quotesHistogram canvas pdfExporter

View File

@ -10,6 +10,7 @@ exportQuotesHistogramWithBars: aNumberOfBars
quotes removeKeys: keysToRemove.
labels := quotes keys.
labels := labels collect: [ :profiles | ('@', profiles) ].
quotesHistogram := RSChart new.
quotesHistogram extent: 800@200.
diagram := RSBarPlot new
@ -20,11 +21,11 @@ exportQuotesHistogramWithBars: aNumberOfBars
quotesHistogram addDecoration: (RSHorizontalTick new
fromNames: labels;
labelRotation: 0;
fontSize: 80 /quotes size;
fontSize: 72 /quotes size;
yourself).
quotesHistogram addDecoration: (RSVerticalTick new
integer;
fontSize: 80 /quotes size).
fontSize: 72 /quotes size).
quotesHistogram build.
quotesHistogram canvas pdfExporter

View File

@ -15,11 +15,11 @@ exportRepliesHistogramWithBars: aNumberOfBars
tweetsHistogram addDecoration: (RSHorizontalTick new
fromNames: tweetsByTimeSpan keys;
labelRotation: 0;
fontSize: 80 /aNumberOfBars;
fontSize: 72 /aNumberOfBars;
yourself).
tweetsHistogram addDecoration: (RSVerticalTick new
integer;
fontSize: 80 /aNumberOfBars).
fontSize: 72 /aNumberOfBars).
tweetsHistogram build.
tweetsHistogram canvas pngExporter
zoomToShapes;

View File

@ -6,6 +6,7 @@ exportRetweetsHistogram
retweetsOccurrences := self retweetsSortedByOccurrences.
retweetColor := (Color r:(217/255) g:(56/255) b: (124/255)).
labels := retweetsOccurrences keys.
labels := labels collect: [ :profiles | ('@', profiles) ].
retweetsHistogram := RSChart new.
retweetsHistogram extent: 800@200.
diagram := RSBarPlot new
@ -16,11 +17,11 @@ exportRetweetsHistogram
retweetsHistogram addDecoration: (RSHorizontalTick new
fromNames: labels;
labelRotation: 0;
fontSize: 80 /retweetsOccurrences size;
fontSize: 72 /retweetsOccurrences size;
yourself).
retweetsHistogram addDecoration: (RSVerticalTick new
integer;
fontSize: 80 /retweetsOccurrences size).
fontSize: 72 /retweetsOccurrences size).
retweetsHistogram build.
retweetsHistogram canvas pdfExporter

View File

@ -10,6 +10,7 @@ exportRetweetsHistogramWithBars: aNumberOfBars
retweets removeKeys: keysToRemove.
labels := retweets keys.
labels := labels collect: [ :profiles | ('@', profiles) ].
retweetsHistogram := RSChart new.
retweetsHistogram extent: 800@200.
diagram := RSBarPlot new
@ -20,11 +21,11 @@ exportRetweetsHistogramWithBars: aNumberOfBars
retweetsHistogram addDecoration: (RSHorizontalTick new
fromNames: labels;
labelRotation: 0;
fontSize: 80 /retweets size;
fontSize: 72 /retweets size;
yourself).
retweetsHistogram addDecoration: (RSVerticalTick new
integer;
fontSize: 80 /retweets size).
fontSize: 72 /retweets size).
retweetsHistogram build.
retweetsHistogram canvas pdfExporter

View File

@ -15,11 +15,11 @@ exportTweetsHistogramWithBars: aNumberOfBars
tweetsHistogram addDecoration: (RSHorizontalTick new
fromNames: tweetsByTimeSpan keys;
labelRotation: 0;
fontSize: 80 /aNumberOfBars;
fontSize: 72 /aNumberOfBars;
yourself).
tweetsHistogram addDecoration: (RSVerticalTick new
integer;
fontSize: 80 /aNumberOfBars).
fontSize: 72 /aNumberOfBars).
tweetsHistogram build.
tweetsHistogram canvas pngExporter
zoomToShapes;

View File

@ -17,7 +17,8 @@ externalWordCloud
'--background' . 'white' .
'--mode' . 'RGBA' .
'--stopwords' . '../../../commons/stopwords-es.txt'.
'--mask' . '../../../commons/nube-mascara.jpg'};
"'--mask' . '../../../commons/nube-mascara.jpg'"
};
workingDirectory: self folder fullName;
redirectStdout;
redirectStderr;

View File

@ -0,0 +1,19 @@
accessing
getPagesContentsFrom: anURL Upto: anInteger
"I retroactively get all pages contents until a specified page number.
TO DO: should this be splitted back to two methods, one getting the page urls and other its content?
or do we always be getting the cursor urls and its contents all the time.
[ ] Benchmark alternative approaches."
| response nextPageLink previousPageLink |
response := OrderedDictionary new.
response at: anURL put: (self documentTreeFor: anURL).
previousPageLink := anURL.
anInteger - 1 timesRepeat: [ | pageCursor |
pageCursor := self pageCursorFor:previousPageLink.
nextPageLink := self userNameLink, '/with_replies', pageCursor keys first.
response at: nextPageLink put: (XMLHTMLParser parse:nextPageLink asUrl retrieveContents).
previousPageLink := nextPageLink
].
^ response

View File

@ -0,0 +1,4 @@
accessing
getPagesContentsFromOldestUpto: anInteger
^ self getPagesContentsFrom: ((self messages oldest metadata select: [ :item | item beginsWith: 'https://' ]) values first) Upto: anInteger

View File

@ -5,15 +5,4 @@ getPagesContentsUpto: anInteger
TO DO: should this be splitted back to two methods, one getting the page urls and other its content?
or do we always be getting the cursor urls and its contents all the time.
[ ] Benchmark alternative approaches."
| response nextPageLink previousPageLink |
response := OrderedDictionary new.
response at: (self userNameLink, '/with_replies') put: self documentTree.
previousPageLink := (self userNameLink, '/with_replies').
anInteger - 1 timesRepeat: [ | pageCursor |
pageCursor := self pageCursorFor:previousPageLink.
nextPageLink := self userNameLink, '/with_replies', pageCursor keys first.
response at: nextPageLink put: (XMLHTMLParser parse:nextPageLink asUrl retrieveContents).
previousPageLink := nextPageLink
].
^ response
^ self getPagesContentsFrom: (self userNameLink, '/with_replies') Upto: anInteger