MiniDocs/src/MiniDocs/Array.extension.st

46 lines
1.3 KiB
Smalltalk

Extension { #name : #Array }
{ #category : #'*MiniDocs' }
Array >> bagOfWordsFor: sentenceArray [
"An utility machine training little algorithm.
Inspired by https://youtu.be/8qwowmiXANQ?t=1144.
This should be moved probably to [Polyglot](https://github.com/pharo-ai/Polyglot),
but the repository is pretty innactive (with commits 2 or more years old and no reponse to issues).
Meanwhile, it will be in MiniDocs.
Given the sentence := #('hello' 'how' 'are' 'you')
and the testVocabulary := #('hi' 'hello' 'I' 'you' 'bye' 'thank' 'you')
then
testVocabulary bagOfWordsFor: sentence.
Should give: #(0 1 0 1 0 0 0)
"
| bagOfWords |
bagOfWords := Array new: self size.
bagOfWords doWithIndex: [:each :i | bagOfWords at: i put: 0 ].
sentenceArray do: [:token | |index|
index := self indexOf: token.
index > 0
ifTrue: [bagOfWords at: index put: 1]
].
^ bagOfWords
]
{ #category : #'*MiniDocs' }
Array >> replaceWithUniqueNilsAndBooleans [
| response |
(self includesAny: #(true false nil))
ifFalse: [ response := self ]
ifTrue: [ | newItem |
response := OrderedCollection new.
self do: [:item |
(item isBoolean or: [ item isNil ])
ifTrue: [ newItem := item asString, '-', (NanoID generate copyFrom: 1 to: 3) ]
ifFalse: [ newItem := item ].
response add: newItem.
].
].
^ response
]