Creating metadata parsing from nitter html.

This commit is contained in:
ruidajo 2022-04-18 18:44:52 -05:00
parent 906b29a7ef
commit a687e1b688
2 changed files with 34 additions and 24 deletions

View File

@ -0,0 +1,14 @@
accessing
metadataFromNitterHtml: xmlItem
self metadata
at: 'pinned'
put: (xmlItem xpath: '//div[@class="pinned"]') stringValue trimmed
= 'Pinned Tweet';
at: 'replie to' put: (xmlItem xpath:
'//div[@class="tweet-body"]//div[@class="replying-to"]')
stringValue trimmed;
at: 'quote' put: (((xmlItem xpath:
'//div[@class="tweet-body"]//div[@class="quote quote-big"]//a[@class="quote-link"]')
postCopy asString splitOn: 'href="/') second removeSuffix:
'"/>)')

View File

@ -1,28 +1,24 @@
accessing accessing
metricsFromNitterHtml: xmlItem metricsFromNitterHtml: xmlItem
"TO DO: Metrics scrapping are not consistent. "TO DO: Metrics scrapping are not consistent.
Most times they store numbers, but sometimes, retweets store a userName" Most times they store numbers, but sometimes, retweets store a userName"
self metrics
at: 'comment' self metrics
put: (((xmlItem xpath: '//div[@class="tweet-stats"]//div[@class="icon-container"]') at: 'comment' put: (((xmlItem xpath:
select: [ :item | item asString includesSubstring: 'comment' ]) '//div[@class="tweet-stats"]//div[@class="icon-container"]')
stringValue trimmed copyReplaceAll: ',' with: ''); select: [ :item | item asString includesSubstring: 'comment' ])
at: 'retweet' stringValue trimmed copyReplaceAll: ',' with: '');
put: (((xmlItem xpath: '//div[@class="tweet-stats"]//div[@class="icon-container"]') at: 'retweet' put: (((xmlItem xpath:
select: [ :item | item asString includesSubstring: 'retweet' ]) '//div[@class="tweet-stats"]//div[@class="icon-container"]')
stringValue trimmed copyReplaceAll: ',' with: ''); select: [ :item | item asString includesSubstring: 'retweet' ])
at: 'quote' stringValue trimmed copyReplaceAll: ',' with: '');
put: (((xmlItem xpath: '//div[@class="tweet-stats"]//div[@class="icon-container"]') at: 'quote' put: (((xmlItem xpath:
select: [ :item | item asString includesSubstring: 'quote' ]) '//div[@class="tweet-stats"]//div[@class="icon-container"]')
stringValue trimmed copyReplaceAll: ',' with: ''); select: [ :item | item asString includesSubstring: 'quote' ])
at: 'heart' stringValue trimmed copyReplaceAll: ',' with: '');
put: (((xmlItem xpath: '//div[@class="tweet-stats"]//div[@class="icon-container"]') at: 'heart' put: (((xmlItem xpath:
select: [ :item | item asString includesSubstring: 'heart' ]) '//div[@class="tweet-stats"]//div[@class="icon-container"]')
stringValue trimmed copyReplaceAll: ',' with: ''). select: [ :item | item asString includesSubstring: 'heart' ])
self metadata stringValue trimmed copyReplaceAll: ',' with: '').
at: 'pinned' self metadataFromNitterHtml: xmlItem
put: (xmlItem xpath: '//div[@class="pinned"]') stringValue trimmed = 'Pinned Tweet';
at: 'replie to'
put: (xmlItem xpath: '//div[@class="tweet-body"]//div[@class="replying-to"]') stringValue trimmed;
at: 'quote'
put: ((((((xmlItem xpath: '//div[@class="tweet-body"]//div[@class="quote quote-big"]//a[@class="quote-link"]') postCopy) asString) splitOn: 'href="/') second) removeSuffix: '"/>)')