Finishing Basic Nitter profile data extraction.

This commit is contained in:
Offray Vladimir Luna Cárdenas 2021-12-31 22:30:18 -05:00
parent 679e33318f
commit 15db771f80
2 changed files with 8 additions and 3 deletions

View File

@ -1,3 +1,3 @@
accessing accessing
createdAtShorted createdAtShorted
^ self createdAt copyFrom: 1 to: 7 ^ self createdAt asString copyFrom: 1 to: 7

View File

@ -1,9 +1,14 @@
accessing accessing
fromNitterProfile: userNameString fromNitterProfile: userNameString
| rssFeed title | | nitterProfileLink rssFeed title nitterDocTree joinDateString |
rssFeed := RSSTools createRSSFeedFor: 'https://nitter.net/', userNameString, '/rss'. nitterProfileLink := 'https://nitter.net/', userNameString.
rssFeed := RSSTools createRSSFeedFor: nitterProfileLink, '/rss'.
title := rssFeed requiredItems title. title := rssFeed requiredItems title.
name := (title splitOn: '/') first trimmed. name := (title splitOn: '/') first trimmed.
"Tecnically we could just do 'userName' = 'userNameString'. But we want to capture also how it is expressed in the RSS." "Tecnically we could just do 'userName' = 'userNameString'. But we want to capture also how it is expressed in the RSS."
userName := ((title splitOn: '/') second trimmed) allButFirst. "Taking out the '@' sign." userName := ((title splitOn: '/') second trimmed) allButFirst. "Taking out the '@' sign."
profileImageUrl := (rssFeed xmlDocument xpath: '//image/url') stringValue copyReplaceAll: '%2F' with: '/'. profileImageUrl := (rssFeed xmlDocument xpath: '//image/url') stringValue copyReplaceAll: '%2F' with: '/'.
nitterDocTree := (XMLDOMParser on: nitterProfileLink asUrl retrieveContents) parseDocument.
description := (nitterDocTree xpath: '//div[@class="profile-bio"]') stringValue.
joinDateString := ((nitterDocTree xpath: '//div[@class="profile-joindate"]/span/@title') stringValue).
createdAt := (ZTimestampFormat fromString:'4:05 PM - 03 Feb 2001') parse: joinDateString.