From a515a24a81432b9facdeea5de9cddd59e669a928 Mon Sep 17 00:00:00 2001 From: Offray Luna Date: Sun, 15 May 2022 08:50:22 -0500 Subject: [PATCH] Refactoring tweets scrapping: getting oldest ones. --- .../collectRawTweetsFromOldestUpToPage..st | 30 +------------------ .../instance/oldestTweetPageCursor.st | 3 ++ 2 files changed, 4 insertions(+), 29 deletions(-) create mode 100644 Socialmetrica.package/NitterUser.class/instance/oldestTweetPageCursor.st diff --git a/Socialmetrica.package/NitterUser.class/instance/collectRawTweetsFromOldestUpToPage..st b/Socialmetrica.package/NitterUser.class/instance/collectRawTweetsFromOldestUpToPage..st index d91e317..7a36cdc 100644 --- a/Socialmetrica.package/NitterUser.class/instance/collectRawTweetsFromOldestUpToPage..st +++ b/Socialmetrica.package/NitterUser.class/instance/collectRawTweetsFromOldestUpToPage..st @@ -1,33 +1,5 @@ accessing collectRawTweetsFromOldestUpToPage: anInteger - | pagesDict response customQuery | - pagesDict := self getPagesContentsFromOldestUpto: anInteger. - response := TweetsCollection new. - customQuery := Dictionary new - at: 'parameters' put: pagesDict keys; - at: 'date' put: DateAndTime now; - yourself. - response query: customQuery. - pagesDict keysAndValuesDo: [ :key :rawTweets | | temp | - temp := (rawTweets xpath: '//div[@class="timeline-item "]') asOrderedCollection - collect: [ :xmlElement | xmlElement postCopy ]. - temp do: [ :tweet | | tempTweet | - tempTweet := Tweet new fromNitterHtmlItem: tweet. - tempTweet metadata - at: DateAndTime now asString put: key; - yourself. - response add: tempTweet. - ] - ]. - response messages: (response messages select: [ :tweet | tweet isNotNil ]). - response messages doWithIndex: [ :tweet :i | - | current previous | - current := response messages at: i. - i < response lastIndex ifTrue: [ - previous := response messages at: i + 1. - current timelines - at: self userName put: previous id; - yourself ]]. - ^ response. + ^ self collectRawTweetsFrom: self oldestTweetPageCursor upToPage: anInteger \ No newline at end of file diff --git a/Socialmetrica.package/NitterUser.class/instance/oldestTweetPageCursor.st b/Socialmetrica.package/NitterUser.class/instance/oldestTweetPageCursor.st new file mode 100644 index 0000000..07b07ff --- /dev/null +++ b/Socialmetrica.package/NitterUser.class/instance/oldestTweetPageCursor.st @@ -0,0 +1,3 @@ +accessing +oldestTweetPageCursor + ^ (self oldestTweet metadata select: [ :item | item isString and: [ item beginsWith: 'https://' ]]) values first value \ No newline at end of file