Refactoring tweets scrapping: getting oldest ones.
This commit is contained in:
parent
a61de2ecb7
commit
a515a24a81
@ -1,33 +1,5 @@
|
|||||||
accessing
|
accessing
|
||||||
collectRawTweetsFromOldestUpToPage: anInteger
|
collectRawTweetsFromOldestUpToPage: anInteger
|
||||||
|
|
||||||
| pagesDict response customQuery |
|
^ self collectRawTweetsFrom: self oldestTweetPageCursor upToPage: anInteger
|
||||||
pagesDict := self getPagesContentsFromOldestUpto: anInteger.
|
|
||||||
response := TweetsCollection new.
|
|
||||||
customQuery := Dictionary new
|
|
||||||
at: 'parameters' put: pagesDict keys;
|
|
||||||
at: 'date' put: DateAndTime now;
|
|
||||||
yourself.
|
|
||||||
response query: customQuery.
|
|
||||||
pagesDict keysAndValuesDo: [ :key :rawTweets | | temp |
|
|
||||||
temp := (rawTweets xpath: '//div[@class="timeline-item "]') asOrderedCollection
|
|
||||||
collect: [ :xmlElement | xmlElement postCopy ].
|
|
||||||
temp do: [ :tweet | | tempTweet |
|
|
||||||
tempTweet := Tweet new fromNitterHtmlItem: tweet.
|
|
||||||
tempTweet metadata
|
|
||||||
at: DateAndTime now asString put: key;
|
|
||||||
yourself.
|
|
||||||
response add: tempTweet.
|
|
||||||
]
|
|
||||||
].
|
|
||||||
response messages: (response messages select: [ :tweet | tweet isNotNil ]).
|
|
||||||
response messages doWithIndex: [ :tweet :i |
|
|
||||||
| current previous |
|
|
||||||
current := response messages at: i.
|
|
||||||
i < response lastIndex ifTrue: [
|
|
||||||
previous := response messages at: i + 1.
|
|
||||||
current timelines
|
|
||||||
at: self userName put: previous id;
|
|
||||||
yourself ]].
|
|
||||||
^ response.
|
|
||||||
|
|
@ -0,0 +1,3 @@
|
|||||||
|
accessing
|
||||||
|
oldestTweetPageCursor
|
||||||
|
^ (self oldestTweet metadata select: [ :item | item isString and: [ item beginsWith: 'https://' ]]) values first value
|
Loading…
Reference in New Issue
Block a user