Creating download tweets from oldest up to a page, modifying histograms exporters and external word cloud without mask.

This commit is contained in:
ruidajo 2022-04-27 23:51:53 -05:00
parent ac963c753f
commit 06b0d5748d
11 changed files with 76 additions and 26 deletions

View File

@ -0,0 +1,33 @@
accessing
collectRawTweetsFromOldestUpToPage: anInteger
| pagesDict response customQuery |
pagesDict := self getPagesContentsFromOldestUpto: anInteger.
response := TweetsCollection new.
customQuery := Dictionary new
at: 'parameters' put: pagesDict keys;
at: 'date' put: DateAndTime now;
yourself.
response query: customQuery.
pagesDict keysAndValuesDo: [ :key :rawTweets | | temp |
temp := (rawTweets xpath: '//div[@class="timeline-item "]') asOrderedCollection
collect: [ :xmlElement | xmlElement postCopy ].
temp do: [ :tweet | | tempTweet |
tempTweet := Tweet new fromNitterHtmlItem: tweet.
tempTweet metadata
at: DateAndTime now asString put: key;
yourself.
response add: tempTweet.
]
].
response messages: (response messages select: [ :tweet | tweet isNotNil ]).
response messages doWithIndex: [ :tweet :i |
| current previous |
current := response messages at: i.
i < response lastIndex ifTrue: [
previous := response messages at: i + 1.
current timelines
at: self userName put: previous id;
yourself ]].
^ response.

View File

@ -5,6 +5,7 @@ exportQuotesHistogram
quotesOccurrences := self quotesSortedByOccurrences. quotesOccurrences := self quotesSortedByOccurrences.
labels := quotesOccurrences keys. labels := quotesOccurrences keys.
labels := labels collect: [ :profiles | ('@', profiles) ].
quotesHistogram := RSChart new. quotesHistogram := RSChart new.
quotesHistogram extent: 800@200. quotesHistogram extent: 800@200.
diagram := RSBarPlot new diagram := RSBarPlot new
@ -15,11 +16,11 @@ exportQuotesHistogram
quotesHistogram addDecoration: (RSHorizontalTick new quotesHistogram addDecoration: (RSHorizontalTick new
fromNames: labels; fromNames: labels;
labelRotation: 0; labelRotation: 0;
fontSize: 80 /quotesOccurrences size; fontSize: 72 /quotesOccurrences size;
yourself). yourself).
quotesHistogram addDecoration: (RSVerticalTick new quotesHistogram addDecoration: (RSVerticalTick new
integer; integer;
fontSize: 80 /quotesOccurrences size). fontSize: 72 /quotesOccurrences size).
quotesHistogram build. quotesHistogram build.
quotesHistogram canvas pdfExporter quotesHistogram canvas pdfExporter

View File

@ -10,6 +10,7 @@ exportQuotesHistogramWithBars: aNumberOfBars
quotes removeKeys: keysToRemove. quotes removeKeys: keysToRemove.
labels := quotes keys. labels := quotes keys.
labels := labels collect: [ :profiles | ('@', profiles) ].
quotesHistogram := RSChart new. quotesHistogram := RSChart new.
quotesHistogram extent: 800@200. quotesHistogram extent: 800@200.
diagram := RSBarPlot new diagram := RSBarPlot new
@ -20,11 +21,11 @@ exportQuotesHistogramWithBars: aNumberOfBars
quotesHistogram addDecoration: (RSHorizontalTick new quotesHistogram addDecoration: (RSHorizontalTick new
fromNames: labels; fromNames: labels;
labelRotation: 0; labelRotation: 0;
fontSize: 80 /quotes size; fontSize: 72 /quotes size;
yourself). yourself).
quotesHistogram addDecoration: (RSVerticalTick new quotesHistogram addDecoration: (RSVerticalTick new
integer; integer;
fontSize: 80 /quotes size). fontSize: 72 /quotes size).
quotesHistogram build. quotesHistogram build.
quotesHistogram canvas pdfExporter quotesHistogram canvas pdfExporter

View File

@ -15,11 +15,11 @@ exportRepliesHistogramWithBars: aNumberOfBars
tweetsHistogram addDecoration: (RSHorizontalTick new tweetsHistogram addDecoration: (RSHorizontalTick new
fromNames: tweetsByTimeSpan keys; fromNames: tweetsByTimeSpan keys;
labelRotation: 0; labelRotation: 0;
fontSize: 80 /aNumberOfBars; fontSize: 72 /aNumberOfBars;
yourself). yourself).
tweetsHistogram addDecoration: (RSVerticalTick new tweetsHistogram addDecoration: (RSVerticalTick new
integer; integer;
fontSize: 80 /aNumberOfBars). fontSize: 72 /aNumberOfBars).
tweetsHistogram build. tweetsHistogram build.
tweetsHistogram canvas pngExporter tweetsHistogram canvas pngExporter
zoomToShapes; zoomToShapes;

View File

@ -6,6 +6,7 @@ exportRetweetsHistogram
retweetsOccurrences := self retweetsSortedByOccurrences. retweetsOccurrences := self retweetsSortedByOccurrences.
retweetColor := (Color r:(217/255) g:(56/255) b: (124/255)). retweetColor := (Color r:(217/255) g:(56/255) b: (124/255)).
labels := retweetsOccurrences keys. labels := retweetsOccurrences keys.
labels := labels collect: [ :profiles | ('@', profiles) ].
retweetsHistogram := RSChart new. retweetsHistogram := RSChart new.
retweetsHistogram extent: 800@200. retweetsHistogram extent: 800@200.
diagram := RSBarPlot new diagram := RSBarPlot new
@ -16,11 +17,11 @@ exportRetweetsHistogram
retweetsHistogram addDecoration: (RSHorizontalTick new retweetsHistogram addDecoration: (RSHorizontalTick new
fromNames: labels; fromNames: labels;
labelRotation: 0; labelRotation: 0;
fontSize: 80 /retweetsOccurrences size; fontSize: 72 /retweetsOccurrences size;
yourself). yourself).
retweetsHistogram addDecoration: (RSVerticalTick new retweetsHistogram addDecoration: (RSVerticalTick new
integer; integer;
fontSize: 80 /retweetsOccurrences size). fontSize: 72 /retweetsOccurrences size).
retweetsHistogram build. retweetsHistogram build.
retweetsHistogram canvas pdfExporter retweetsHistogram canvas pdfExporter

View File

@ -10,6 +10,7 @@ exportRetweetsHistogramWithBars: aNumberOfBars
retweets removeKeys: keysToRemove. retweets removeKeys: keysToRemove.
labels := retweets keys. labels := retweets keys.
labels := labels collect: [ :profiles | ('@', profiles) ].
retweetsHistogram := RSChart new. retweetsHistogram := RSChart new.
retweetsHistogram extent: 800@200. retweetsHistogram extent: 800@200.
diagram := RSBarPlot new diagram := RSBarPlot new
@ -20,11 +21,11 @@ exportRetweetsHistogramWithBars: aNumberOfBars
retweetsHistogram addDecoration: (RSHorizontalTick new retweetsHistogram addDecoration: (RSHorizontalTick new
fromNames: labels; fromNames: labels;
labelRotation: 0; labelRotation: 0;
fontSize: 80 /retweets size; fontSize: 72 /retweets size;
yourself). yourself).
retweetsHistogram addDecoration: (RSVerticalTick new retweetsHistogram addDecoration: (RSVerticalTick new
integer; integer;
fontSize: 80 /retweets size). fontSize: 72 /retweets size).
retweetsHistogram build. retweetsHistogram build.
retweetsHistogram canvas pdfExporter retweetsHistogram canvas pdfExporter

View File

@ -15,11 +15,11 @@ exportTweetsHistogramWithBars: aNumberOfBars
tweetsHistogram addDecoration: (RSHorizontalTick new tweetsHistogram addDecoration: (RSHorizontalTick new
fromNames: tweetsByTimeSpan keys; fromNames: tweetsByTimeSpan keys;
labelRotation: 0; labelRotation: 0;
fontSize: 80 /aNumberOfBars; fontSize: 72 /aNumberOfBars;
yourself). yourself).
tweetsHistogram addDecoration: (RSVerticalTick new tweetsHistogram addDecoration: (RSVerticalTick new
integer; integer;
fontSize: 80 /aNumberOfBars). fontSize: 72 /aNumberOfBars).
tweetsHistogram build. tweetsHistogram build.
tweetsHistogram canvas pngExporter tweetsHistogram canvas pngExporter
zoomToShapes; zoomToShapes;

View File

@ -16,8 +16,9 @@ externalWordCloud
'--height' . '357' . '--height' . '357' .
'--background' . 'white' . '--background' . 'white' .
'--mode' . 'RGBA' . '--mode' . 'RGBA' .
'--stopwords' . '../../../commons/stopwords-es.txt' . '--stopwords' . '../../../commons/stopwords-es.txt'.
'--mask' . '../../../commons/nube-mascara.jpg'}; "'--mask' . '../../../commons/nube-mascara.jpg'"
};
workingDirectory: self folder fullName; workingDirectory: self folder fullName;
redirectStdout; redirectStdout;
redirectStderr; redirectStderr;

View File

@ -0,0 +1,19 @@
accessing
getPagesContentsFrom: anURL Upto: anInteger
"I retroactively get all pages contents until a specified page number.
TO DO: should this be splitted back to two methods, one getting the page urls and other its content?
or do we always be getting the cursor urls and its contents all the time.
[ ] Benchmark alternative approaches."
| response nextPageLink previousPageLink |
response := OrderedDictionary new.
response at: anURL put: (self documentTreeFor: anURL).
previousPageLink := anURL.
anInteger - 1 timesRepeat: [ | pageCursor |
pageCursor := self pageCursorFor:previousPageLink.
nextPageLink := self userNameLink, '/with_replies', pageCursor keys first.
response at: nextPageLink put: (XMLHTMLParser parse:nextPageLink asUrl retrieveContents).
previousPageLink := nextPageLink
].
^ response

View File

@ -0,0 +1,4 @@
accessing
getPagesContentsFromOldestUpto: anInteger
^ self getPagesContentsFrom: ((self messages oldest metadata select: [ :item | item beginsWith: 'https://' ]) values first) Upto: anInteger

View File

@ -5,15 +5,4 @@ getPagesContentsUpto: anInteger
TO DO: should this be splitted back to two methods, one getting the page urls and other its content? TO DO: should this be splitted back to two methods, one getting the page urls and other its content?
or do we always be getting the cursor urls and its contents all the time. or do we always be getting the cursor urls and its contents all the time.
[ ] Benchmark alternative approaches." [ ] Benchmark alternative approaches."
| response nextPageLink previousPageLink | ^ self getPagesContentsFrom: (self userNameLink, '/with_replies') Upto: anInteger
response := OrderedDictionary new.
response at: (self userNameLink, '/with_replies') put: self documentTree.
previousPageLink := (self userNameLink, '/with_replies').
anInteger - 1 timesRepeat: [ | pageCursor |
pageCursor := self pageCursorFor:previousPageLink.
nextPageLink := self userNameLink, '/with_replies', pageCursor keys first.
response at: nextPageLink put: (XMLHTMLParser parse:nextPageLink asUrl retrieveContents).
previousPageLink := nextPageLink
].
^ response