Refactoring tweets scrapping: getting oldest ones.

This commit is contained in:
Offray Vladimir Luna Cárdenas 2022-05-15 08:50:22 -05:00
parent a61de2ecb7
commit a515a24a81
2 changed files with 4 additions and 29 deletions

View File

@ -1,33 +1,5 @@
accessing accessing
collectRawTweetsFromOldestUpToPage: anInteger collectRawTweetsFromOldestUpToPage: anInteger
| pagesDict response customQuery | ^ self collectRawTweetsFrom: self oldestTweetPageCursor upToPage: anInteger
pagesDict := self getPagesContentsFromOldestUpto: anInteger.
response := TweetsCollection new.
customQuery := Dictionary new
at: 'parameters' put: pagesDict keys;
at: 'date' put: DateAndTime now;
yourself.
response query: customQuery.
pagesDict keysAndValuesDo: [ :key :rawTweets | | temp |
temp := (rawTweets xpath: '//div[@class="timeline-item "]') asOrderedCollection
collect: [ :xmlElement | xmlElement postCopy ].
temp do: [ :tweet | | tempTweet |
tempTweet := Tweet new fromNitterHtmlItem: tweet.
tempTweet metadata
at: DateAndTime now asString put: key;
yourself.
response add: tempTweet.
]
].
response messages: (response messages select: [ :tweet | tweet isNotNil ]).
response messages doWithIndex: [ :tweet :i |
| current previous |
current := response messages at: i.
i < response lastIndex ifTrue: [
previous := response messages at: i + 1.
current timelines
at: self userName put: previous id;
yourself ]].
^ response.

View File

@ -0,0 +1,3 @@
accessing
oldestTweetPageCursor
^ (self oldestTweet metadata select: [ :item | item isString and: [ item beginsWith: 'https://' ]]) values first value