46 lines
1.3 KiB
Smalltalk
46 lines
1.3 KiB
Smalltalk
Extension { #name : #Array }
|
|
|
|
{ #category : #'*MiniDocs' }
|
|
Array >> bagOfWordsFor: sentenceArray [
|
|
"An utility machine training little algorithm.
|
|
Inspired by https://youtu.be/8qwowmiXANQ?t=1144.
|
|
This should be moved probably to [Polyglot](https://github.com/pharo-ai/Polyglot),
|
|
but the repository is pretty innactive (with commits 2 or more years old and no reponse to issues).
|
|
Meanwhile, it will be in MiniDocs.
|
|
|
|
Given the sentence := #('hello' 'how' 'are' 'you')
|
|
and the testVocabulary := #('hi' 'hello' 'I' 'you' 'bye' 'thank' 'you')
|
|
then
|
|
|
|
testVocabulary bagOfWordsFor: sentence.
|
|
|
|
Should give: #(0 1 0 1 0 0 0)
|
|
"
|
|
| bagOfWords |
|
|
bagOfWords := Array new: self size.
|
|
bagOfWords doWithIndex: [:each :i | bagOfWords at: i put: 0 ].
|
|
sentenceArray do: [:token | |index|
|
|
index := self indexOf: token.
|
|
index > 0
|
|
ifTrue: [bagOfWords at: index put: 1]
|
|
].
|
|
^ bagOfWords
|
|
]
|
|
|
|
{ #category : #'*MiniDocs' }
|
|
Array >> replaceWithUniqueNilsAndBooleans [
|
|
| response |
|
|
(self includesAny: #(true false nil))
|
|
ifFalse: [ response := self ]
|
|
ifTrue: [ | newItem |
|
|
response := OrderedCollection new.
|
|
self do: [:item |
|
|
(item isBoolean or: [ item isNil ])
|
|
ifTrue: [ newItem := item asString, '-', (NanoID generate copyFrom: 1 to: 3) ]
|
|
ifFalse: [ newItem := item ].
|
|
response add: newItem.
|
|
].
|
|
].
|
|
^ response
|
|
]
|