From a304e4847a381d40d5f40ec32425bda59fde7d27 Mon Sep 17 00:00:00 2001 From: Eduardo Riesco Date: Sat, 2 Dec 2017 17:50:26 +0000 Subject: [PATCH] Initial support for extracting images --- .../Grafoscopio/GrafoscopioCodeModel.class.st | 26 +++++++++++++++++++ .../Grafoscopio/GrafoscopioNode.class.st | 20 ++++++++++++-- .../Grafoscopio/GrafoscopioNodeTest.class.st | 1 + 3 files changed, 45 insertions(+), 2 deletions(-) diff --git a/repository/Grafoscopio/GrafoscopioCodeModel.class.st b/repository/Grafoscopio/GrafoscopioCodeModel.class.st index 2898446..154a21a 100644 --- a/repository/Grafoscopio/GrafoscopioCodeModel.class.st +++ b/repository/Grafoscopio/GrafoscopioCodeModel.class.st @@ -35,6 +35,32 @@ GrafoscopioCodeModel >> content: aGrafoscopioNodeContent [ startOn: (GTPlayPage new saveContent: aGrafoscopioNodeContent) ] +{ #category : #'as yet unclassified' } +GrafoscopioCodeModel >> extractHtmlImages [ + "comment stating purpose of message" + |imgSoup imgHost imgList src| + + imgList := Set new. + imgSoup := Soup fromString: self body. + (imgSoup findAllTags: 'img') do: [ :each| + src := (each attributeAt: 'src') asUrl. + (src host) ifNil: [src host: self links last asUrl removeLastPathSegment]. + imgList add: src. + "imgList add: (each attributeAt: 'src') asUrl." + + "OSProcess waitForCommand: 'wget ', (each attributeAt: 'src')." + "imgHost := self links last removeLastPathSegment." + + "imgPath:= ((each attributeAt: 'src') asUrl). " + "ZnEasy getJpeg: (imgHost , imgPath) asUrl." + + "OSProcess waitForCommand: ('mkdir ', imgPath)." + + "Transcript show: ' wget ', imgPath , '/',(each attributeAt: 'src'). " + ]. + ^imgList . +] + { #category : #initialization } GrafoscopioCodeModel >> initializeWidgets [ diff --git a/repository/Grafoscopio/GrafoscopioNode.class.st b/repository/Grafoscopio/GrafoscopioNode.class.st index f78091f..0bee194 100644 --- a/repository/Grafoscopio/GrafoscopioNode.class.st +++ b/repository/Grafoscopio/GrafoscopioNode.class.st @@ -329,6 +329,17 @@ GrafoscopioNode >> demote [ ] +{ #category : #'as yet unclassified' } +GrafoscopioNode >> downloadImagesInto: folder [ + "comment stating purpose of message" + + self extractHtmlImages do: [ :each | |localFolder| + localFolder := (folder / each directory) ensureCreateDirectory. + ZnClient new + url: each ; + downloadTo: localFolder . ] +] + { #category : #'custom markup' } GrafoscopioNode >> embedAll [ "This is just a previous part of the messy markDownContent. The %embed-all keyword should be revaluated. @@ -421,12 +432,17 @@ GrafoscopioNode >> exportPreambleTo: aStream [ { #category : #'as yet unclassified' } GrafoscopioNode >> extractHtmlImages [ "comment stating purpose of message" - |imgSoup imgHost imgList| + |imgSoup imgHost imgList src folders| imgList := Set new. imgSoup := Soup fromString: self body. + imgHost:= self links last asUrl removeLastPathSegment. + folders:= Set new. (imgSoup findAllTags: 'img') do: [ :each| - imgList add: (each attributeAt: 'src') asUrl. + src := (each attributeAt: 'src') asUrl. + (src host) ifNil: [ src := imgHost addPathSegments: src pathSegments ]. + imgList add: src. + "folders add: src directory." "OSProcess waitForCommand: 'wget ', (each attributeAt: 'src')." "imgHost := self links last removeLastPathSegment." diff --git a/repository/Grafoscopio/GrafoscopioNodeTest.class.st b/repository/Grafoscopio/GrafoscopioNodeTest.class.st index fba9e02..daec371 100644 --- a/repository/Grafoscopio/GrafoscopioNodeTest.class.st +++ b/repository/Grafoscopio/GrafoscopioNodeTest.class.st @@ -44,6 +44,7 @@ GrafoscopioNodeTest >> testExtractHtmlImages [ '. txtNode := GrafoscopioNode new body: txt. + txtNode links: 'http://pharo.org/files/'. self assert: (txtNode extractHtmlImages) size equals: 2.