Improving nitter user getting and parsing tweets.

This commit is contained in:
ruidajo 2022-04-11 14:18:52 -05:00
parent bc168c4f0a
commit db1a8e0502
9 changed files with 65 additions and 41 deletions

View File

@ -0,0 +1,16 @@
accessing
collectRawTweetsForPages: anInteger
| pagesDict rawResponse response |
pagesDict := self pageURLs: anInteger.
rawResponse := OrderedCollection new.
pagesDict values do: [ :each |
rawResponse addAll: ((each xpath: '//div[@class="timeline-item "]') asOrderedCollection
collect: [ :xmlElement | xmlElement postCopy ])
].
response := TweetsCollection new.
response query: pagesDict keys.
response tweets: (rawResponse collect: [ :tweet | Tweet new fromNitterHtmlItem: tweet ]).
^ response.

View File

@ -1,3 +1,3 @@
operation
documentTree
^ XMLHTMLParser parse: self userNameLink asUrl retrieveContents
^ self documentTreeFor: self userNameLink

View File

@ -0,0 +1,3 @@
accessing
documentTreeFor: anUrl
^ XMLHTMLParser parse:anUrl asUrl retrieveContents

View File

@ -1,37 +0,0 @@
accessing
numberOfURLsForLoadingTweets: number
| collectionURLs count asURLs urlAndTweets |
number = 1 ifTrue: [ ^ self ].
urlAndTweets := OrderedDictionary new.
collectionURLs := { self userNameLink } asOrderedCollection.
urlAndTweets at: 'tweets' put: self lastTweetsFromHtml.
count := 1.
number - count timesRepeat: [
| tempDoc docTree urlString |
tempDoc := XMLHTMLParser parse:
(collectionURLs at: count) asUrl retrieveContents.
urlString := self userNameLink
,
((tempDoc xPath: '//a[.="Load more"]') @ 'href')
stringValue.
docTree := XMLHTMLParser parse: urlString asUrl retrieveContents.
collectionURLs add: urlString.
urlAndTweets
at: 'tweets-' , (urlString splitOn: 'cursor=') second
put:
((docTree xpath: '//div[@class="timeline-item "]')
asOrderedCollection collect: [ :xmlElement |
xmlElement postCopy ]).
count := count + 1 ].
asURLs := collectionURLs collect: [ :string | string asUrl ].
urlAndTweets at: 'urls' put: asURLs.
^ urlAndTweets

View File

@ -0,0 +1,10 @@
accessing
pageCursorFor: anUrl
| response value key |
response := Dictionary new.
value := self documentTreeFor: anUrl.
key := ((value xpath: '//a[.="Load more"]') @ 'href')stringValue.
^ response
at: key put: value;
yourself

View File

@ -0,0 +1,14 @@
accessing
pageDocTrees: anInteger
| response nextPageLink previousPageLink |
response := OrderedDictionary new.
previousPageLink := self userNameLink.
response add: previousPageLink.
anInteger - 1 timesRepeat: [
nextPageLink := self userNameLink, (self pageCursorFor:previousPageLink) value.
response add: nextPageLink.
previousPageLink := nextPageLink
].
^ response

View File

@ -0,0 +1,14 @@
accessing
pageURLs: anInteger
| response nextPageLink previousPageLink dicTemp |
response := OrderedDictionary new.
response at: self userNameLink put: self documentTree.
previousPageLink := self userNameLink.
anInteger - 1 timesRepeat: [
nextPageLink := self userNameLink, (self pageCursorFor:previousPageLink) keys first.
response at: nextPageLink put: (self pageCursorFor:previousPageLink) values first.
previousPageLink := nextPageLink
].
^ response

View File

@ -24,7 +24,7 @@ metricsFromNitterHtml: xmlItem
item asString includesSubstring: 'heart' ]) stringValue trimmed
copyReplaceAll: ','
with: '').
metadata
self metadata
at: 'pinned'
put: (xmlItem xpath: '//div[@class="pinned"]') stringValue trimmed = 'Pinned Tweet'

View File

@ -1,4 +1,8 @@
accessing
store
ReStore isConnected ifFalse: [ self class storeDB]. "Starting the ReStore singleton."
self tweets do: [:each | ReStore evaluateAsTransaction: [ each store ] ].
self tweets do: [:each | ReStore evaluateAsTransaction: [
each store.
"each user id isInDB? ifFalse: [ each user store ]"
]
].