Debugging Pandoc HTML to Markdown importation, now using OSSubprocess.

This commit is contained in:
Offray Vladimir Luna Cárdenas 2018-12-04 23:53:05 +00:00
parent 919d56177e
commit fe3d9ff84c
3 changed files with 42 additions and 13 deletions

View File

@ -522,27 +522,18 @@ GrafoscopioNode >> headers [
{ #category : #operation } { #category : #operation }
GrafoscopioNode >> htmlToMarkdown [ GrafoscopioNode >> htmlToMarkdown [
"I convert the node body from HTML format to Pandoc's Markdown." "I convert the node body from HTML format to Pandoc's Markdown."
| htmlFile markdownFile | | htmlFile |
(self isTaggedAs: 'código' ) ifTrue: [ ^self ]. (self isTaggedAs: 'código' ) ifTrue: [ ^self ].
((self headerStartsWith: '%invisible') "or:[self hasAncestorHeaderWith: '%invisible']") ((self headerStartsWith: '%invisible') "or:[self hasAncestorHeaderWith: '%invisible']")
ifTrue: [ ^self ]. ifTrue: [ ^self ].
htmlFile := FileLocator temp asFileReference / 'body.html'. htmlFile := FileLocator temp asFileReference / 'body.html'.
htmlFile ensureDelete. htmlFile ensureCreateFile.
markdownFile := FileLocator temp asFileReference / 'body.markdown'.
markdownFile ensureDelete.
htmlFile writeStreamDo: [:out | out nextPutAll: self body ]. htmlFile writeStreamDo: [:out | out nextPutAll: self body ].
Smalltalk platformName = 'unix' Smalltalk platformName = 'unix'
ifTrue: [ OSProcess ifTrue: [ self body: (Pandoc htmlToMarkdown: htmlFile) ].
waitForCommand: 'pandoc -f html -t markdown --atx-headers ',
htmlFile fullName, ' -o ', markdownFile fullName ].
Smalltalk platformName = 'Win32' Smalltalk platformName = 'Win32'
ifTrue: [ WinProcess ifTrue: [ self shouldBeImplemented ].
createAndWaitForProcess: 'pandoc -f html -t markdown --atx-headers ',
htmlFile fullName, ' -o ', markdownFile fullName ].
self body: markdownFile contents.
htmlFile ensureDelete. htmlFile ensureDelete.
markdownFile ensureDelete.
] ]
{ #category : #operation } { #category : #operation }

View File

@ -170,6 +170,24 @@ GrafoscopioNotebook >> demoteNode [
self notebookContent: notebook. self notebookContent: notebook.
] ]
{ #category : #'as yet unclassified' }
GrafoscopioNotebook >> downloadImages [
"I download all images in a notebook into a local folder that respects relative paths.
So if a image refers to http://mysite.com/uploads/chap1/myimage.png, it will be stored
into: 'uploads/chap1/myimage.png' in the same folder where the notebook is stored.
This is helpful for notebooks conversions that expect to have local images in particular
locations."
| parentFolder |
parentFolder := self workingFile parent.
self.
^ self imagesList do: [ :each | | relativePathString link |
link := each contents asUrl.
relativePathString := link directory.
relativePathString ifNotEmpty: [
GrafoscopioUtils ensureCreateDirectory: relativePathString into: parentFolder ]]
]
{ #category : #persistence } { #category : #persistence }
GrafoscopioNotebook >> exportAllSubtreesAsMarkdow [ GrafoscopioNotebook >> exportAllSubtreesAsMarkdow [
| toBeExported | | toBeExported |

View File

@ -61,6 +61,26 @@ Pandoc class >> extractImagesInUnixFor: aFileReference withFilter: aLuaFilter [
] ]
] ]
{ #category : #converters }
Pandoc class >> htmlToMarkdown: inputFile [
| outputFile |
outputFile := FileLocator temp / 'body.md'.
outputFile ensureDelete.
outputFile ensureCreateFile.
OSSUnixSubprocess new
command: 'pandoc';
arguments: {'-f'. 'html'. '-t'. 'markdown'. '--atx-headers'. inputFile fullName.
'--output'. outputFile fullName };
redirectStdout;
redirectStderr;
runAndWaitOnExitDo: [ :process :outString :errString |
process isSuccess
ifTrue: [ ^ outputFile contents ]
ifFalse: [ ^inputFile contents ]
]
]
{ #category : #'as yet unclassified' } { #category : #'as yet unclassified' }
Pandoc class >> listImagesFrom: aFileReference [ Pandoc class >> listImagesFrom: aFileReference [
"I provide a list of all images contained in aFile." "I provide a list of all images contained in aFile."