Creating word cloud data by language and a BUG at install commons.
This commit is contained in:
parent
07516e4dc6
commit
722b2d2066
@ -2,9 +2,10 @@ accessing
|
||||
installCommons
|
||||
|
||||
| commonFiles folder |
|
||||
commonFiles := #(
|
||||
'https://mutabit.com/repos.fossil/mutabit/uv/wiki/commons/nube-mascara.jpg'
|
||||
commonFiles := #(
|
||||
'https://mutabit.com/repos.fossil/mutabit/raw?name=wiki/commons/stopwords-es.txt&ci=tip'
|
||||
'https://mutabit.com/repos.fossil/mutabit/raw?name=wiki/commons/stopwords-en.txt&ci=tip'
|
||||
'https://mutabit.com/repos.fossil/mutabit/uv/wiki/commons/nube-mascara.jpg'
|
||||
'https://mutabit.com/repos.fossil/mutabit/uv/wiki/commons/logo-mutabit-negro.png').
|
||||
folder := TweetsCollection dataStore / 'commons'.
|
||||
folder exists
|
||||
@ -13,8 +14,8 @@ installCommons
|
||||
commonFiles do: [ :fileUrl |
|
||||
ZnClient new
|
||||
url: fileUrl;
|
||||
downloadTo: folder].
|
||||
(folder children detect: [ :file | file basename includesSubstring: 'raw' ])
|
||||
renameTo: (((commonFiles second splitOn: 'raw?') second splitOn: '/') last removeSuffix: '&ci=tip').
|
||||
^ folder
|
||||
|
||||
downloadTo: folder.
|
||||
(folder children select: [ :file | file basename includesSubstring: 'raw' ])
|
||||
renameTo: (((fileUrl splitOn: 'raw?') second splitOn: '/') last removeSuffix: '&ci=tip')
|
||||
].
|
||||
^ folder
|
@ -1,21 +1,4 @@
|
||||
accessing
|
||||
wordcloudData
|
||||
|
||||
| stopwords stopwordsCapitalized occurrencesWords wordAndValue |
|
||||
stopwords := (TweetsCollection dataStore / 'commons' / 'stopwords-es.txt') contents splitOn: Character lf.
|
||||
stopwordsCapitalized := stopwords collect: [:each | each first asString asUppercase, each allButFirst asLowercase ].
|
||||
occurrencesWords := ((((self writeWordsFile contents) splitOn: ' ') asBag asDictionary)
|
||||
associations asSortedCollection: [:x :y | x value > y value]) asOrderedDictionary.
|
||||
occurrencesWords removeKeys: stopwords.
|
||||
occurrencesWords removeKeys: stopwordsCapitalized.
|
||||
occurrencesWords removeKey: ''.
|
||||
|
||||
occurrencesWords size > 50 ifTrue: [
|
||||
occurrencesWords := (occurrencesWords associations copyFrom: 1 to: 50) asOrderedDictionary ].
|
||||
|
||||
wordAndValue := OrderedCollection new.
|
||||
occurrencesWords keysAndValuesDo: [ :k :v |
|
||||
wordAndValue add: ('{name:', ($' asString), k, ($' asString), ',value:', v asString, '}')
|
||||
].
|
||||
^ {'[', ((',' join: wordAndValue) copyWithout: Character lf), ']'.
|
||||
occurrencesWords}
|
||||
^ self wordcloudDataLanguage: 'en'
|
@ -0,0 +1,21 @@
|
||||
accessing
|
||||
wordcloudDataLanguage: language
|
||||
|
||||
| stopwords stopwordsCapitalized occurrencesWords wordAndValue |
|
||||
stopwords := (TweetsCollection dataStore / 'commons' / ('stopwords-', language, '.txt')) contents splitOn: Character lf.
|
||||
stopwordsCapitalized := stopwords collect: [:each | each first asString asUppercase, each allButFirst asLowercase ].
|
||||
occurrencesWords := ((((self writeWordsFile contents) splitOn: ' ') asBag asDictionary)
|
||||
associations asSortedCollection: [:x :y | x value > y value]) asOrderedDictionary.
|
||||
occurrencesWords removeKeys: stopwords.
|
||||
occurrencesWords removeKeys: stopwordsCapitalized.
|
||||
occurrencesWords removeKey: ''.
|
||||
|
||||
occurrencesWords size > 50 ifTrue: [
|
||||
occurrencesWords := (occurrencesWords associations copyFrom: 1 to: 50) asOrderedDictionary ].
|
||||
|
||||
wordAndValue := OrderedCollection new.
|
||||
occurrencesWords keysAndValuesDo: [ :k :v |
|
||||
wordAndValue add: ('{name:', ($' asString), k, ($' asString), ',value:', v asString, '}')
|
||||
].
|
||||
^ {'[', ((',' join: wordAndValue) copyWithout: Character lf), ']'.
|
||||
occurrencesWords}
|
Loading…
Reference in New Issue
Block a user