Improving nitter user getting and parsing tweets.
This commit is contained in:
parent
bc168c4f0a
commit
db1a8e0502
@ -0,0 +1,16 @@
|
||||
accessing
|
||||
collectRawTweetsForPages: anInteger
|
||||
|
||||
| pagesDict rawResponse response |
|
||||
pagesDict := self pageURLs: anInteger.
|
||||
rawResponse := OrderedCollection new.
|
||||
pagesDict values do: [ :each |
|
||||
rawResponse addAll: ((each xpath: '//div[@class="timeline-item "]') asOrderedCollection
|
||||
collect: [ :xmlElement | xmlElement postCopy ])
|
||||
|
||||
].
|
||||
response := TweetsCollection new.
|
||||
response query: pagesDict keys.
|
||||
response tweets: (rawResponse collect: [ :tweet | Tweet new fromNitterHtmlItem: tweet ]).
|
||||
^ response.
|
||||
|
@ -1,3 +1,3 @@
|
||||
operation
|
||||
documentTree
|
||||
^ XMLHTMLParser parse: self userNameLink asUrl retrieveContents
|
||||
^ self documentTreeFor: self userNameLink
|
@ -0,0 +1,3 @@
|
||||
accessing
|
||||
documentTreeFor: anUrl
|
||||
^ XMLHTMLParser parse:anUrl asUrl retrieveContents
|
@ -1,37 +0,0 @@
|
||||
accessing
|
||||
numberOfURLsForLoadingTweets: number
|
||||
|
||||
| collectionURLs count asURLs urlAndTweets |
|
||||
number = 1 ifTrue: [ ^ self ].
|
||||
|
||||
urlAndTweets := OrderedDictionary new.
|
||||
collectionURLs := { self userNameLink } asOrderedCollection.
|
||||
|
||||
urlAndTweets at: 'tweets' put: self lastTweetsFromHtml.
|
||||
|
||||
count := 1.
|
||||
number - count timesRepeat: [
|
||||
| tempDoc docTree urlString |
|
||||
tempDoc := XMLHTMLParser parse:
|
||||
(collectionURLs at: count) asUrl retrieveContents.
|
||||
|
||||
urlString := self userNameLink
|
||||
,
|
||||
((tempDoc xPath: '//a[.="Load more"]') @ 'href')
|
||||
stringValue.
|
||||
docTree := XMLHTMLParser parse: urlString asUrl retrieveContents.
|
||||
collectionURLs add: urlString.
|
||||
|
||||
urlAndTweets
|
||||
at: 'tweets-' , (urlString splitOn: 'cursor=') second
|
||||
put:
|
||||
((docTree xpath: '//div[@class="timeline-item "]')
|
||||
asOrderedCollection collect: [ :xmlElement |
|
||||
xmlElement postCopy ]).
|
||||
count := count + 1 ].
|
||||
|
||||
asURLs := collectionURLs collect: [ :string | string asUrl ].
|
||||
|
||||
urlAndTweets at: 'urls' put: asURLs.
|
||||
|
||||
^ urlAndTweets
|
@ -0,0 +1,10 @@
|
||||
accessing
|
||||
pageCursorFor: anUrl
|
||||
|
||||
| response value key |
|
||||
response := Dictionary new.
|
||||
value := self documentTreeFor: anUrl.
|
||||
key := ((value xpath: '//a[.="Load more"]') @ 'href')stringValue.
|
||||
^ response
|
||||
at: key put: value;
|
||||
yourself
|
@ -0,0 +1,14 @@
|
||||
accessing
|
||||
pageDocTrees: anInteger
|
||||
|
||||
| response nextPageLink previousPageLink |
|
||||
|
||||
response := OrderedDictionary new.
|
||||
previousPageLink := self userNameLink.
|
||||
response add: previousPageLink.
|
||||
anInteger - 1 timesRepeat: [
|
||||
nextPageLink := self userNameLink, (self pageCursorFor:previousPageLink) value.
|
||||
response add: nextPageLink.
|
||||
previousPageLink := nextPageLink
|
||||
].
|
||||
^ response
|
14
Socialmetrica.package/NitterUser.class/instance/pageURLs..st
Normal file
14
Socialmetrica.package/NitterUser.class/instance/pageURLs..st
Normal file
@ -0,0 +1,14 @@
|
||||
accessing
|
||||
pageURLs: anInteger
|
||||
|
||||
| response nextPageLink previousPageLink dicTemp |
|
||||
|
||||
response := OrderedDictionary new.
|
||||
response at: self userNameLink put: self documentTree.
|
||||
previousPageLink := self userNameLink.
|
||||
anInteger - 1 timesRepeat: [
|
||||
nextPageLink := self userNameLink, (self pageCursorFor:previousPageLink) keys first.
|
||||
response at: nextPageLink put: (self pageCursorFor:previousPageLink) values first.
|
||||
previousPageLink := nextPageLink
|
||||
].
|
||||
^ response
|
@ -25,6 +25,6 @@ metricsFromNitterHtml: xmlItem
|
||||
copyReplaceAll: ','
|
||||
with: '').
|
||||
|
||||
metadata
|
||||
self metadata
|
||||
at: 'pinned'
|
||||
put: (xmlItem xpath: '//div[@class="pinned"]') stringValue trimmed = 'Pinned Tweet'
|
@ -1,4 +1,8 @@
|
||||
accessing
|
||||
store
|
||||
ReStore isConnected ifFalse: [ self class storeDB]. "Starting the ReStore singleton."
|
||||
self tweets do: [:each | ReStore evaluateAsTransaction: [ each store ] ].
|
||||
self tweets do: [:each | ReStore evaluateAsTransaction: [
|
||||
each store.
|
||||
"each user id isInDB? ifFalse: [ each user store ]"
|
||||
]
|
||||
].
|
Loading…
Reference in New Issue
Block a user