diff --git a/src/MiniDocs/Array.extension.st b/src/MiniDocs/Array.extension.st new file mode 100644 index 0000000..ee27e93 --- /dev/null +++ b/src/MiniDocs/Array.extension.st @@ -0,0 +1,28 @@ +Extension { #name : #Array } + +{ #category : #'*MiniDocs' } +Array >> bagOfWordsFor: sentenceArray [ + "An utility machine training little algorithm. + Inspired by https://youtu.be/8qwowmiXANQ?t=1144. + This should be moved probably to [Polyglot](https://github.com/pharo-ai/Polyglot), + but the repository is pretty innactive (with commits 2 or more years old and no reponse to issues). + Meanwhile, it will be in MiniDocs. + + Given the sentence := #('hello' 'how' 'are' 'you') + and the testVocabulary := #('hi' 'hello' 'I' 'you' 'bye' 'thank' 'you') + then + + testVocabulary bagOfWordsFor: sentence. + + Should give: #(0 1 0 1 0 0 0) + " + | bagOfWords | + bagOfWords := Array new: self size. + bagOfWords doWithIndex: [:each :i | bagOfWords at: i put: 0 ]. + sentenceArray do: [:token | |index| + index := self indexOf: token. + index > 0 + ifTrue: [bagOfWords at: index put: 1] + ]. + ^ bagOfWords +]