Html image extraction.

This commit is contained in:
GloriaMeneses 2017-12-02 16:18:43 +00:00 committed by SantiagoBragagnolo
parent 924a1deeff
commit 3f2cbed4dc
2 changed files with 38 additions and 0 deletions

View File

@ -418,6 +418,28 @@ GrafoscopioNode >> exportPreambleTo: aStream [
aStream nextPutAll: 'abstract: ', '|'; lf; nextPutAll: (configDict at: 'abstract'); lf]
]
{ #category : #'as yet unclassified' }
GrafoscopioNode >> extractHtmlImages [
"comment stating purpose of message"
|imgSoup imgHost imgList|
imgList := Set new.
imgSoup := Soup fromString: self body.
(imgSoup findAllTags: 'img') do: [ :each|
imgList add: (each attributeAt: 'src') asUrl.
"OSProcess waitForCommand: 'wget ', (each attributeAt: 'src')."
"imgHost := self links last removeLastPathSegment."
"imgPath:= ((each attributeAt: 'src') asUrl). "
"ZnEasy getJpeg: (imgHost , imgPath) asUrl."
"OSProcess waitForCommand: ('mkdir ', imgPath)."
"Transcript show: ' wget ', imgPath , '/',(each attributeAt: 'src'). "
].
^imgList .
]
{ #category : #utility }
GrafoscopioNode >> find: aString andReplaceWith: anotherString [
anotherString ifNil: [ ^ self ].

View File

@ -31,6 +31,22 @@ GrafoscopioNodeTest >> testDemoteNode [
self assert: child2 level equals: child1 level + 1
]
{ #category : #tests }
GrafoscopioNodeTest >> testExtractHtmlImages [
"comment stating purpose of message"
|txt txtNode|
txt:= '<html> <body> <img src="http://pharo.org/files/pharo.png">
<img src="http://pharo.org/files/pharo.png">
<img src="http://pharo.org/files/pharo2.png">
</body></html>'.
txtNode := GrafoscopioNode new body: txt.
self assert: (txtNode extractHtmlImages) size equals: 2.
]
{ #category : #tests }