Creating metadata parsing from nitter html.

This commit is contained in:
ruidajo 2022-04-18 18:44:52 -05:00
parent 906b29a7ef
commit a687e1b688
2 changed files with 34 additions and 24 deletions

View File

@ -0,0 +1,14 @@
accessing
metadataFromNitterHtml: xmlItem
self metadata
at: 'pinned'
put: (xmlItem xpath: '//div[@class="pinned"]') stringValue trimmed
= 'Pinned Tweet';
at: 'replie to' put: (xmlItem xpath:
'//div[@class="tweet-body"]//div[@class="replying-to"]')
stringValue trimmed;
at: 'quote' put: (((xmlItem xpath:
'//div[@class="tweet-body"]//div[@class="quote quote-big"]//a[@class="quote-link"]')
postCopy asString splitOn: 'href="/') second removeSuffix:
'"/>)')

View File

@ -1,28 +1,24 @@
accessing accessing
metricsFromNitterHtml: xmlItem metricsFromNitterHtml: xmlItem
"TO DO: Metrics scrapping are not consistent. "TO DO: Metrics scrapping are not consistent.
Most times they store numbers, but sometimes, retweets store a userName" Most times they store numbers, but sometimes, retweets store a userName"
self metrics self metrics
at: 'comment' at: 'comment' put: (((xmlItem xpath:
put: (((xmlItem xpath: '//div[@class="tweet-stats"]//div[@class="icon-container"]') '//div[@class="tweet-stats"]//div[@class="icon-container"]')
select: [ :item | item asString includesSubstring: 'comment' ]) select: [ :item | item asString includesSubstring: 'comment' ])
stringValue trimmed copyReplaceAll: ',' with: ''); stringValue trimmed copyReplaceAll: ',' with: '');
at: 'retweet' at: 'retweet' put: (((xmlItem xpath:
put: (((xmlItem xpath: '//div[@class="tweet-stats"]//div[@class="icon-container"]') '//div[@class="tweet-stats"]//div[@class="icon-container"]')
select: [ :item | item asString includesSubstring: 'retweet' ]) select: [ :item | item asString includesSubstring: 'retweet' ])
stringValue trimmed copyReplaceAll: ',' with: ''); stringValue trimmed copyReplaceAll: ',' with: '');
at: 'quote' at: 'quote' put: (((xmlItem xpath:
put: (((xmlItem xpath: '//div[@class="tweet-stats"]//div[@class="icon-container"]') '//div[@class="tweet-stats"]//div[@class="icon-container"]')
select: [ :item | item asString includesSubstring: 'quote' ]) select: [ :item | item asString includesSubstring: 'quote' ])
stringValue trimmed copyReplaceAll: ',' with: ''); stringValue trimmed copyReplaceAll: ',' with: '');
at: 'heart' at: 'heart' put: (((xmlItem xpath:
put: (((xmlItem xpath: '//div[@class="tweet-stats"]//div[@class="icon-container"]') '//div[@class="tweet-stats"]//div[@class="icon-container"]')
select: [ :item | item asString includesSubstring: 'heart' ]) select: [ :item | item asString includesSubstring: 'heart' ])
stringValue trimmed copyReplaceAll: ',' with: ''). stringValue trimmed copyReplaceAll: ',' with: '').
self metadata self metadataFromNitterHtml: xmlItem
at: 'pinned'
put: (xmlItem xpath: '//div[@class="pinned"]') stringValue trimmed = 'Pinned Tweet';
at: 'replie to'
put: (xmlItem xpath: '//div[@class="tweet-body"]//div[@class="replying-to"]') stringValue trimmed;
at: 'quote'
put: ((((((xmlItem xpath: '//div[@class="tweet-body"]//div[@class="quote quote-big"]//a[@class="quote-link"]') postCopy) asString) splitOn: 'href="/') second) removeSuffix: '"/>)')