Socialmetrica/Socialmetrica.package/NitterUser.class/instance/wordcloudDataLanguage..st

21 lines
1.0 KiB
Smalltalk
Raw Normal View History

accessing
wordcloudDataLanguage: language
| stopwords stopwordsCapitalized occurrencesWords wordAndValue |
stopwords := (TweetsCollection dataStore / 'commons' / ('stopwords-', language, '.txt')) contents splitOn: Character lf.
stopwordsCapitalized := stopwords collect: [:each | each first asString asUppercase, each allButFirst asLowercase ].
occurrencesWords := ((((self writeWordsFile contents) splitOn: ' ') asBag asDictionary)
associations asSortedCollection: [:x :y | x value > y value]) asOrderedDictionary.
occurrencesWords removeKeys: stopwords.
occurrencesWords removeKeys: stopwordsCapitalized.
occurrencesWords removeKey: ''.
occurrencesWords size > 50 ifTrue: [
occurrencesWords := (occurrencesWords associations copyFrom: 1 to: 50) asOrderedDictionary ].
wordAndValue := OrderedCollection new.
occurrencesWords keysAndValuesDo: [ :k :v |
wordAndValue add: ('{name:', ($' asString), k, ($' asString), ',value:', v asString, '}')
].
^ {'[', ((',' join: wordAndValue) copyWithout: Character lf), ']'.
occurrencesWords}