From d9af46ec37fabd8cf524deeca52363b4c1d52b34 Mon Sep 17 00:00:00 2001 From: ruidajo Date: Fri, 8 Apr 2022 22:02:27 -0500 Subject: [PATCH] Improving tweet parsing from nitter timeline item. --- .../instance/fromNitterTimelineItem..st | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/Socialmetrica.package/Tweet.class/instance/fromNitterTimelineItem..st b/Socialmetrica.package/Tweet.class/instance/fromNitterTimelineItem..st index 84df717..ecd75d4 100644 --- a/Socialmetrica.package/Tweet.class/instance/fromNitterTimelineItem..st +++ b/Socialmetrica.package/Tweet.class/instance/fromNitterTimelineItem..st @@ -1,12 +1,28 @@ accessing fromNitterTimelineItem: xmlItem - | author authorAndId | + | author authorAndId metricsTemp timeTemp | authorAndId := (((xmlItem xpath: '//a[@class="tweet-link"]') asString splitOn: 'href="' ) second splitOn: '/') reject: [ :i | i isEmpty or: [i = '>)']]. author := authorAndId first. user := NitterUser new userName: author . - "created := (xmlItem xpath: 'pubDate') stringValue. - text := (xmlItem xpath: 'description') stringValue." + + timeTemp := (((xmlItem xpath: '//span[@class="tweet-date"]')asString splitOn: 'title="') second splitOn: '">')first. + timeTemp := ((timeTemp copyReplaceAll: ' ยท ' with: ' ') copyReplaceAll: 'UTC' with: '+00:00') asDateAndTime. + created := timeTemp. + + text := (xmlItem xpath: '//div[@class="tweet-content media-body"]')stringValue. id := authorAndId last copyReplaceAll: '#m"' with: ''. - authorId := self user id. \ No newline at end of file + authorId := self user id. + + metricsTemp := Dictionary new . + metricsTemp + at: 'comment' + put: (xmlItem xpath: '//div[@class="icon-container"]') second stringValue trimmed; + at: 'retweet' + put: (xmlItem xpath: '//div[@class="icon-container"]') third stringValue trimmed; + at: 'quote' + put: (xmlItem xpath: '//div[@class="icon-container"]') fourth stringValue trimmed; + at: 'heart' + put: (xmlItem xpath: '//div[@class="icon-container"]') fifth stringValue trimmed. + metrics := metricsTemp. \ No newline at end of file