Fixing tweet form nitter html and collecting tweets for nitteruser.

This commit is contained in:
ruidajo 2022-04-12 18:13:31 -05:00
parent bcf67809a8
commit a445100e58
4 changed files with 25 additions and 10 deletions

View File

@ -1,18 +1,32 @@
accessing accessing
collectRawTweetsForPages: anInteger collectRawTweetsForPages: anInteger
| pagesDict response | | pagesDict response customQuery |
pagesDict := self getPagesContentsUpto: anInteger. pagesDict := self getPagesContentsUpto: anInteger.
response := TweetsCollection new. response := TweetsCollection new.
response query: pagesDict keys. customQuery := Dictionary new
at: 'parameters' put: pagesDict keys;
at: 'date' put: DateAndTime now;
yourself.
response query: customQuery.
pagesDict keysAndValuesDo: [ :key :rawTweets | | temp | pagesDict keysAndValuesDo: [ :key :rawTweets | | temp |
temp := (rawTweets xpath: '//div[@class="timeline-item "]') asOrderedCollection temp := (rawTweets xpath: '//div[@class="timeline-item "]') asOrderedCollection
collect: [ :xmlElement | xmlElement postCopy ]. collect: [ :xmlElement | xmlElement postCopy ].
temp do: [ :tweet | | tempTweet | temp do: [ :tweet | | tempTweet |
tempTweet := Tweet new fromNitterHtmlItem: tweet. tempTweet := Tweet new fromNitterHtmlItem: tweet.
tempTweet queries add: key. tempTweet metadata
at: DateAndTime now put:key;
yourself.
response tweets add: tempTweet. response tweets add: tempTweet.
] ]
]. ].
response tweets doWithIndex: [ :tweet :i |
| current previous |
current := response tweets at: i.
i < response tweets size ifTrue: [
previous := response tweets at: i + 1.
current timelines
at: self userName put: previous id;
yourself ]].
^ response. ^ response.

View File

@ -1,10 +1,11 @@
accessing accessing
reStoreDefinition reStoreDefinition
^ super reStoreDefinition ^ super reStoreDefinition
defineAsID: #id; define: #id as: String;
define: #text as: String; define: #text as: String;
define: #created as: String; define: #created as: String;
define: #authorId as: String; define: #authorId as: String;
define: #timelines as: (Dictionary of: String -> String); define: #timelines as: (Dictionary of: String -> String);
define: #metrics as: (Dictionary of: String -> String); define: #metrics as: (Dictionary of: String -> String);
define: #metadata as: (Dictionary of: String -> String);
yourself yourself

View File

@ -13,8 +13,8 @@ fromNitterHtmlItem: xmlItem
copyReplaceAll: 'UTC' copyReplaceAll: 'UTC'
with: '+00:00') asDateAndTime. with: '+00:00') asDateAndTime.
created := timeTemp. created := timeTemp.
text := (xmlItem xpath: '//div[@class="tweet-content media-body"]') text := ((xmlItem xpath: '//div[@class="tweet-content media-body"]') asString allButLast)
stringValue. copyReplaceAll: 'a XPathNodeSet(' with: '' .
id := authorAndId last copyReplaceAll: '#m"' with: ''. id := authorAndId last copyReplaceAll: '#m"' with: ''.
authorId := self user id. authorId := self user id.
self metricsFromNitterHtml: xmlItem self metricsFromNitterHtml: xmlItem

View File

@ -4,19 +4,19 @@ metricsFromNitterHtml: xmlItem
Most times they store numbers, but sometimes, retweets store a userName" Most times they store numbers, but sometimes, retweets store a userName"
self metrics self metrics
at: 'comment' at: 'comment'
put: (((xmlItem xpath: '//div[@class="icon-container"]') put: (((xmlItem xpath: '//div[@class="tweet-stats"]//div[@class="icon-container"]')
select: [ :item | item asString includesSubstring: 'comment' ]) select: [ :item | item asString includesSubstring: 'comment' ])
stringValue trimmed copyReplaceAll: ',' with: ''); stringValue trimmed copyReplaceAll: ',' with: '');
at: 'retweet' at: 'retweet'
put: (((xmlItem xpath: '//div[@class="icon-container"]') put: (((xmlItem xpath: '//div[@class="tweet-stats"]//div[@class="icon-container"]')
select: [ :item | item asString includesSubstring: 'retweet' ]) select: [ :item | item asString includesSubstring: 'retweet' ])
stringValue trimmed copyReplaceAll: ',' with: ''); stringValue trimmed copyReplaceAll: ',' with: '');
at: 'quote' at: 'quote'
put: (((xmlItem xpath: '//div[@class="icon-container"]') put: (((xmlItem xpath: '//div[@class="tweet-stats"]//div[@class="icon-container"]')
select: [ :item | item asString includesSubstring: 'quote' ]) select: [ :item | item asString includesSubstring: 'quote' ])
stringValue trimmed copyReplaceAll: ',' with: ''); stringValue trimmed copyReplaceAll: ',' with: '');
at: 'heart' at: 'heart'
put: (((xmlItem xpath: '//div[@class="icon-container"]') put: (((xmlItem xpath: '//div[@class="tweet-stats"]//div[@class="icon-container"]')
select: [ :item | item asString includesSubstring: 'heart' ]) select: [ :item | item asString includesSubstring: 'heart' ])
stringValue trimmed copyReplaceAll: ',' with: ''). stringValue trimmed copyReplaceAll: ',' with: '').
self metadata self metadata