Creating word cloud data by language and a BUG at install commons.

This commit is contained in:
ruidajo 2022-06-09 13:43:48 -05:00
parent 07516e4dc6
commit 722b2d2066
3 changed files with 30 additions and 25 deletions

View File

@ -2,9 +2,10 @@ accessing
installCommons installCommons
| commonFiles folder | | commonFiles folder |
commonFiles := #( commonFiles := #(
'https://mutabit.com/repos.fossil/mutabit/uv/wiki/commons/nube-mascara.jpg'
'https://mutabit.com/repos.fossil/mutabit/raw?name=wiki/commons/stopwords-es.txt&ci=tip' 'https://mutabit.com/repos.fossil/mutabit/raw?name=wiki/commons/stopwords-es.txt&ci=tip'
'https://mutabit.com/repos.fossil/mutabit/raw?name=wiki/commons/stopwords-en.txt&ci=tip'
'https://mutabit.com/repos.fossil/mutabit/uv/wiki/commons/nube-mascara.jpg'
'https://mutabit.com/repos.fossil/mutabit/uv/wiki/commons/logo-mutabit-negro.png'). 'https://mutabit.com/repos.fossil/mutabit/uv/wiki/commons/logo-mutabit-negro.png').
folder := TweetsCollection dataStore / 'commons'. folder := TweetsCollection dataStore / 'commons'.
folder exists folder exists
@ -13,8 +14,8 @@ installCommons
commonFiles do: [ :fileUrl | commonFiles do: [ :fileUrl |
ZnClient new ZnClient new
url: fileUrl; url: fileUrl;
downloadTo: folder]. downloadTo: folder.
(folder children detect: [ :file | file basename includesSubstring: 'raw' ]) (folder children select: [ :file | file basename includesSubstring: 'raw' ])
renameTo: (((commonFiles second splitOn: 'raw?') second splitOn: '/') last removeSuffix: '&ci=tip'). renameTo: (((fileUrl splitOn: 'raw?') second splitOn: '/') last removeSuffix: '&ci=tip')
^ folder ].
^ folder

View File

@ -1,21 +1,4 @@
accessing accessing
wordcloudData wordcloudData
| stopwords stopwordsCapitalized occurrencesWords wordAndValue | ^ self wordcloudDataLanguage: 'en'
stopwords := (TweetsCollection dataStore / 'commons' / 'stopwords-es.txt') contents splitOn: Character lf.
stopwordsCapitalized := stopwords collect: [:each | each first asString asUppercase, each allButFirst asLowercase ].
occurrencesWords := ((((self writeWordsFile contents) splitOn: ' ') asBag asDictionary)
associations asSortedCollection: [:x :y | x value > y value]) asOrderedDictionary.
occurrencesWords removeKeys: stopwords.
occurrencesWords removeKeys: stopwordsCapitalized.
occurrencesWords removeKey: ''.
occurrencesWords size > 50 ifTrue: [
occurrencesWords := (occurrencesWords associations copyFrom: 1 to: 50) asOrderedDictionary ].
wordAndValue := OrderedCollection new.
occurrencesWords keysAndValuesDo: [ :k :v |
wordAndValue add: ('{name:', ($' asString), k, ($' asString), ',value:', v asString, '}')
].
^ {'[', ((',' join: wordAndValue) copyWithout: Character lf), ']'.
occurrencesWords}

View File

@ -0,0 +1,21 @@
accessing
wordcloudDataLanguage: language
| stopwords stopwordsCapitalized occurrencesWords wordAndValue |
stopwords := (TweetsCollection dataStore / 'commons' / ('stopwords-', language, '.txt')) contents splitOn: Character lf.
stopwordsCapitalized := stopwords collect: [:each | each first asString asUppercase, each allButFirst asLowercase ].
occurrencesWords := ((((self writeWordsFile contents) splitOn: ' ') asBag asDictionary)
associations asSortedCollection: [:x :y | x value > y value]) asOrderedDictionary.
occurrencesWords removeKeys: stopwords.
occurrencesWords removeKeys: stopwordsCapitalized.
occurrencesWords removeKey: ''.
occurrencesWords size > 50 ifTrue: [
occurrencesWords := (occurrencesWords associations copyFrom: 1 to: 50) asOrderedDictionary ].
wordAndValue := OrderedCollection new.
occurrencesWords keysAndValuesDo: [ :k :v |
wordAndValue add: ('{name:', ($' asString), k, ($' asString), ',value:', v asString, '}')
].
^ {'[', ((',' join: wordAndValue) copyWithout: Character lf), ']'.
occurrencesWords}