Initial support for extracting images

This commit is contained in:
Eduardo Riesco 2017-12-02 17:50:26 +00:00 committed by Offray Luna
parent e408053c9b
commit a304e4847a
3 changed files with 45 additions and 2 deletions

View File

@ -35,6 +35,32 @@ GrafoscopioCodeModel >> content: aGrafoscopioNodeContent [
startOn: (GTPlayPage new saveContent: aGrafoscopioNodeContent)
]
{ #category : #'as yet unclassified' }
GrafoscopioCodeModel >> extractHtmlImages [
"comment stating purpose of message"
|imgSoup imgHost imgList src|
imgList := Set new.
imgSoup := Soup fromString: self body.
(imgSoup findAllTags: 'img') do: [ :each|
src := (each attributeAt: 'src') asUrl.
(src host) ifNil: [src host: self links last asUrl removeLastPathSegment].
imgList add: src.
"imgList add: (each attributeAt: 'src') asUrl."
"OSProcess waitForCommand: 'wget ', (each attributeAt: 'src')."
"imgHost := self links last removeLastPathSegment."
"imgPath:= ((each attributeAt: 'src') asUrl). "
"ZnEasy getJpeg: (imgHost , imgPath) asUrl."
"OSProcess waitForCommand: ('mkdir ', imgPath)."
"Transcript show: ' wget ', imgPath , '/',(each attributeAt: 'src'). "
].
^imgList .
]
{ #category : #initialization }
GrafoscopioCodeModel >> initializeWidgets [

View File

@ -329,6 +329,17 @@ GrafoscopioNode >> demote [
]
{ #category : #'as yet unclassified' }
GrafoscopioNode >> downloadImagesInto: folder [
"comment stating purpose of message"
self extractHtmlImages do: [ :each | |localFolder|
localFolder := (folder / each directory) ensureCreateDirectory.
ZnClient new
url: each ;
downloadTo: localFolder . ]
]
{ #category : #'custom markup' }
GrafoscopioNode >> embedAll [
"This is just a previous part of the messy markDownContent. The %embed-all keyword should be revaluated.
@ -421,12 +432,17 @@ GrafoscopioNode >> exportPreambleTo: aStream [
{ #category : #'as yet unclassified' }
GrafoscopioNode >> extractHtmlImages [
"comment stating purpose of message"
|imgSoup imgHost imgList|
|imgSoup imgHost imgList src folders|
imgList := Set new.
imgSoup := Soup fromString: self body.
imgHost:= self links last asUrl removeLastPathSegment.
folders:= Set new.
(imgSoup findAllTags: 'img') do: [ :each|
imgList add: (each attributeAt: 'src') asUrl.
src := (each attributeAt: 'src') asUrl.
(src host) ifNil: [ src := imgHost addPathSegments: src pathSegments ].
imgList add: src.
"folders add: src directory."
"OSProcess waitForCommand: 'wget ', (each attributeAt: 'src')."
"imgHost := self links last removeLastPathSegment."

View File

@ -44,6 +44,7 @@ GrafoscopioNodeTest >> testExtractHtmlImages [
</body></html>'.
txtNode := GrafoscopioNode new body: txt.
txtNode links: 'http://pharo.org/files/'.
self assert: (txtNode extractHtmlImages) size equals: 2.