Improving nitter user getting and parsing tweets.
This commit is contained in:
parent
bc168c4f0a
commit
db1a8e0502
@ -0,0 +1,16 @@
|
|||||||
|
accessing
|
||||||
|
collectRawTweetsForPages: anInteger
|
||||||
|
|
||||||
|
| pagesDict rawResponse response |
|
||||||
|
pagesDict := self pageURLs: anInteger.
|
||||||
|
rawResponse := OrderedCollection new.
|
||||||
|
pagesDict values do: [ :each |
|
||||||
|
rawResponse addAll: ((each xpath: '//div[@class="timeline-item "]') asOrderedCollection
|
||||||
|
collect: [ :xmlElement | xmlElement postCopy ])
|
||||||
|
|
||||||
|
].
|
||||||
|
response := TweetsCollection new.
|
||||||
|
response query: pagesDict keys.
|
||||||
|
response tweets: (rawResponse collect: [ :tweet | Tweet new fromNitterHtmlItem: tweet ]).
|
||||||
|
^ response.
|
||||||
|
|
@ -1,3 +1,3 @@
|
|||||||
operation
|
operation
|
||||||
documentTree
|
documentTree
|
||||||
^ XMLHTMLParser parse: self userNameLink asUrl retrieveContents
|
^ self documentTreeFor: self userNameLink
|
@ -0,0 +1,3 @@
|
|||||||
|
accessing
|
||||||
|
documentTreeFor: anUrl
|
||||||
|
^ XMLHTMLParser parse:anUrl asUrl retrieveContents
|
@ -1,37 +0,0 @@
|
|||||||
accessing
|
|
||||||
numberOfURLsForLoadingTweets: number
|
|
||||||
|
|
||||||
| collectionURLs count asURLs urlAndTweets |
|
|
||||||
number = 1 ifTrue: [ ^ self ].
|
|
||||||
|
|
||||||
urlAndTweets := OrderedDictionary new.
|
|
||||||
collectionURLs := { self userNameLink } asOrderedCollection.
|
|
||||||
|
|
||||||
urlAndTweets at: 'tweets' put: self lastTweetsFromHtml.
|
|
||||||
|
|
||||||
count := 1.
|
|
||||||
number - count timesRepeat: [
|
|
||||||
| tempDoc docTree urlString |
|
|
||||||
tempDoc := XMLHTMLParser parse:
|
|
||||||
(collectionURLs at: count) asUrl retrieveContents.
|
|
||||||
|
|
||||||
urlString := self userNameLink
|
|
||||||
,
|
|
||||||
((tempDoc xPath: '//a[.="Load more"]') @ 'href')
|
|
||||||
stringValue.
|
|
||||||
docTree := XMLHTMLParser parse: urlString asUrl retrieveContents.
|
|
||||||
collectionURLs add: urlString.
|
|
||||||
|
|
||||||
urlAndTweets
|
|
||||||
at: 'tweets-' , (urlString splitOn: 'cursor=') second
|
|
||||||
put:
|
|
||||||
((docTree xpath: '//div[@class="timeline-item "]')
|
|
||||||
asOrderedCollection collect: [ :xmlElement |
|
|
||||||
xmlElement postCopy ]).
|
|
||||||
count := count + 1 ].
|
|
||||||
|
|
||||||
asURLs := collectionURLs collect: [ :string | string asUrl ].
|
|
||||||
|
|
||||||
urlAndTweets at: 'urls' put: asURLs.
|
|
||||||
|
|
||||||
^ urlAndTweets
|
|
@ -0,0 +1,10 @@
|
|||||||
|
accessing
|
||||||
|
pageCursorFor: anUrl
|
||||||
|
|
||||||
|
| response value key |
|
||||||
|
response := Dictionary new.
|
||||||
|
value := self documentTreeFor: anUrl.
|
||||||
|
key := ((value xpath: '//a[.="Load more"]') @ 'href')stringValue.
|
||||||
|
^ response
|
||||||
|
at: key put: value;
|
||||||
|
yourself
|
@ -0,0 +1,14 @@
|
|||||||
|
accessing
|
||||||
|
pageDocTrees: anInteger
|
||||||
|
|
||||||
|
| response nextPageLink previousPageLink |
|
||||||
|
|
||||||
|
response := OrderedDictionary new.
|
||||||
|
previousPageLink := self userNameLink.
|
||||||
|
response add: previousPageLink.
|
||||||
|
anInteger - 1 timesRepeat: [
|
||||||
|
nextPageLink := self userNameLink, (self pageCursorFor:previousPageLink) value.
|
||||||
|
response add: nextPageLink.
|
||||||
|
previousPageLink := nextPageLink
|
||||||
|
].
|
||||||
|
^ response
|
14
Socialmetrica.package/NitterUser.class/instance/pageURLs..st
Normal file
14
Socialmetrica.package/NitterUser.class/instance/pageURLs..st
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
accessing
|
||||||
|
pageURLs: anInteger
|
||||||
|
|
||||||
|
| response nextPageLink previousPageLink dicTemp |
|
||||||
|
|
||||||
|
response := OrderedDictionary new.
|
||||||
|
response at: self userNameLink put: self documentTree.
|
||||||
|
previousPageLink := self userNameLink.
|
||||||
|
anInteger - 1 timesRepeat: [
|
||||||
|
nextPageLink := self userNameLink, (self pageCursorFor:previousPageLink) keys first.
|
||||||
|
response at: nextPageLink put: (self pageCursorFor:previousPageLink) values first.
|
||||||
|
previousPageLink := nextPageLink
|
||||||
|
].
|
||||||
|
^ response
|
@ -25,6 +25,6 @@ metricsFromNitterHtml: xmlItem
|
|||||||
copyReplaceAll: ','
|
copyReplaceAll: ','
|
||||||
with: '').
|
with: '').
|
||||||
|
|
||||||
metadata
|
self metadata
|
||||||
at: 'pinned'
|
at: 'pinned'
|
||||||
put: (xmlItem xpath: '//div[@class="pinned"]') stringValue trimmed = 'Pinned Tweet'
|
put: (xmlItem xpath: '//div[@class="pinned"]') stringValue trimmed = 'Pinned Tweet'
|
@ -1,4 +1,8 @@
|
|||||||
accessing
|
accessing
|
||||||
store
|
store
|
||||||
ReStore isConnected ifFalse: [ self class storeDB]. "Starting the ReStore singleton."
|
ReStore isConnected ifFalse: [ self class storeDB]. "Starting the ReStore singleton."
|
||||||
self tweets do: [:each | ReStore evaluateAsTransaction: [ each store ] ].
|
self tweets do: [:each | ReStore evaluateAsTransaction: [
|
||||||
|
each store.
|
||||||
|
"each user id isInDB? ifFalse: [ each user store ]"
|
||||||
|
]
|
||||||
|
].
|
Loading…
Reference in New Issue
Block a user