Refactoring tweets scrapping: getting oldest ones.
This commit is contained in:
parent
a61de2ecb7
commit
a515a24a81
@ -1,33 +1,5 @@
|
||||
accessing
|
||||
collectRawTweetsFromOldestUpToPage: anInteger
|
||||
|
||||
| pagesDict response customQuery |
|
||||
pagesDict := self getPagesContentsFromOldestUpto: anInteger.
|
||||
response := TweetsCollection new.
|
||||
customQuery := Dictionary new
|
||||
at: 'parameters' put: pagesDict keys;
|
||||
at: 'date' put: DateAndTime now;
|
||||
yourself.
|
||||
response query: customQuery.
|
||||
pagesDict keysAndValuesDo: [ :key :rawTweets | | temp |
|
||||
temp := (rawTweets xpath: '//div[@class="timeline-item "]') asOrderedCollection
|
||||
collect: [ :xmlElement | xmlElement postCopy ].
|
||||
temp do: [ :tweet | | tempTweet |
|
||||
tempTweet := Tweet new fromNitterHtmlItem: tweet.
|
||||
tempTweet metadata
|
||||
at: DateAndTime now asString put: key;
|
||||
yourself.
|
||||
response add: tempTweet.
|
||||
]
|
||||
].
|
||||
response messages: (response messages select: [ :tweet | tweet isNotNil ]).
|
||||
response messages doWithIndex: [ :tweet :i |
|
||||
| current previous |
|
||||
current := response messages at: i.
|
||||
i < response lastIndex ifTrue: [
|
||||
previous := response messages at: i + 1.
|
||||
current timelines
|
||||
at: self userName put: previous id;
|
||||
yourself ]].
|
||||
^ response.
|
||||
^ self collectRawTweetsFrom: self oldestTweetPageCursor upToPage: anInteger
|
||||
|
@ -0,0 +1,3 @@
|
||||
accessing
|
||||
oldestTweetPageCursor
|
||||
^ (self oldestTweet metadata select: [ :item | item isString and: [ item beginsWith: 'https://' ]]) values first value
|
Loading…
Reference in New Issue
Block a user