From 0647a860f7f11215f444f057eee739d3e9a3bf10 Mon Sep 17 00:00:00 2001 From: Offray Luna Date: Wed, 17 Apr 2019 13:18:05 +0000 Subject: [PATCH] Moving core functionality to GrafoscopioUtils. --- repository/Grafoscopio-Utils/Pandoc.class.st | 112 +++++++++++++++++ .../Grafoscopio-Utils/PandocWork.class.st | 118 ++++++++++++++++++ 2 files changed, 230 insertions(+) create mode 100644 repository/Grafoscopio-Utils/Pandoc.class.st create mode 100644 repository/Grafoscopio-Utils/PandocWork.class.st diff --git a/repository/Grafoscopio-Utils/Pandoc.class.st b/repository/Grafoscopio-Utils/Pandoc.class.st new file mode 100644 index 0000000..0aaa8fd --- /dev/null +++ b/repository/Grafoscopio-Utils/Pandoc.class.st @@ -0,0 +1,112 @@ +" +I model the interaction between Pandoc and Grafoscopio. +" +Class { + #name : #Pandoc, + #superclass : #Object, + #classInstVars : [ + 'executable' + ], + #category : #'Grafoscopio-Utils' +} + +{ #category : #'as yet unclassified' } +Pandoc class >> downloadLuaFilters [ + self luaFilters do: [ :filter | | filterUrl | + filterUrl := filter asUrl. + (FileLocator temp asFileReference / (filterUrl segments last)) exists + ifFalse: [ + ZnClient new + url: filterUrl; + downloadTo: FileLocator temp ] ] +] + +{ #category : #accessing } +Pandoc class >> executable [ + ^ executable ifNil: [ self executableLocation ] +] + +{ #category : #accessing } +Pandoc class >> executable: aFileReference [ + executable := aFileReference +] + +{ #category : #accessing } +Pandoc class >> executableLocation [ + | location | + location := '/usr/bin/pandoc'. + location asFileReference exists + ifTrue: [ ^ location ] + ifFalse: [ self definePandocExecutable ] +] + +{ #category : #utility } +Pandoc class >> extractImagesInUnixFor: aFileReference withFilter: aLuaFilter [ + "I use Pandoc Lua scripting capabilities to extract al images links in aFileReference" + + OSSUnixSubprocess new + command: 'pandoc'; + arguments: {aFileReference fullName . '--lua-filter=',aLuaFilter fullName }; + redirectStdout; + redirectStderr; + runAndWaitOnExitDo: [ :process :outString :errString | + process isSuccess + ifTrue: [ + ^ ((Soup fromString: outString) findAllTags: 'td') collect: [ :each | each next ] ] + ifFalse: [ + "OSSUnixProcessExitStatus has a nice #printOn: " + Transcript show: 'Command exit with error status: ', process exitStatusInterpreter printString; cr. + Transcript show: 'Stderr contents: ', errString. + ] + ] +] + +{ #category : #converters } +Pandoc class >> htmlToMarkdown: inputFile [ + + | outputFile | + outputFile := FileLocator temp / 'body.md'. + outputFile ensureDelete. + outputFile ensureCreateFile. + OSSUnixSubprocess new + command: 'pandoc'; + arguments: {'-f'. 'html'. '-t'. 'markdown'. '--atx-headers'. inputFile fullName. + '--output'. outputFile fullName }; + redirectStdout; + redirectStderr; + runAndWaitOnExitDo: [ :process :outString :errString | + process isSuccess + ifTrue: [ ^ outputFile contents ] + ifFalse: [ ^inputFile contents ] + ] +] + +{ #category : #'as yet unclassified' } +Pandoc class >> listImagesFrom: aFileReference [ + "I provide a list of all images contained in aFile." + | filter commandString outputString | + filter := FileLocator temp asFileReference / 'image-links.lua'. + filter exists ifFalse: [ self downloadLuaFilters ]. + commandString := 'pandoc ', aFileReference fullName, ' --lua-filter=',filter fullName. + Smalltalk platformName = 'unix' + ifTrue: [ ^ self extractImagesInUnixFor: aFileReference withFilter: filter ]. + Smalltalk platformName = 'Win32' + ifTrue: [ self ]. +] + +{ #category : #utility } +Pandoc class >> luaFilters [ + "I define the location of set of scripts, that allows to change the default behaviour of Pandoc + and/or the processing of supported markup languages. + + For more information about Lua filters see: + + https://pandoc.org/lua-filters.html + " + + | filters | + filters := OrderedCollection new. + filters + add: 'http://mutabit.com/repos.fossil/dataweek/doc/tip/Artefactos/Scripts/image-links.lua'. + ^ filters +] diff --git a/repository/Grafoscopio-Utils/PandocWork.class.st b/repository/Grafoscopio-Utils/PandocWork.class.st new file mode 100644 index 0000000..5ec9021 --- /dev/null +++ b/repository/Grafoscopio-Utils/PandocWork.class.st @@ -0,0 +1,118 @@ +" +I model a work (book, booklet, web page, etc) in Pandoc, its table of contents, its metadata file to +control exportation and other elements. + +I can be used to improve reproductibility of published works that use Pandoc. + +By default it is supposed that a root folder contains the set of folders, organized by +language (following the ISO 639-1 two letters convetion) where the contents of the work +and their translations are located. +Chapters, subchapters, sections and subsections are contained there as Markdown files +and its order is stated as a ordered dictionary for each language. +A YAML metadata block is used in each file to map translations between files and languages +and other sources, synchronizations and meta data. +" +Class { + #name : #PandocWork, + #superclass : #Object, + #instVars : [ + 'language', + 'contents', + 'metadataFiles', + 'rootFolder', + 'manifest' + ], + #category : #'Grafoscopio-Utils' +} + +{ #category : #utilities } +PandocWork >> buildManifest [ + "I create a manifest, that lists all the files which are needed to create a + derivate file (PDF, EPUB, etc) with their checksums and folder locations. + + I can be used to associated derivated files with their sources." + | checksums | + checksums := OrderedDictionary new. + self contents keysDo: [ :folder | + (self contents at: folder) do: [ :fileName | | keyName contentFile | + keyName := fileName, self defaultFileExtension. + contentFile := self rootFolder / self language / folder / keyName. + checksums at: keyName put: (GrafoscopioUtils checksumFor: contentFile)]. + self manifest at: folder put: checksums ]. + +] + +{ #category : #accessing } +PandocWork >> contents [ + ^ contents +] + +{ #category : #accessing } +PandocWork >> contents: anOrderedDictionary [ + "I model the table of contents of the work. + The key of the dictionary is the folder, inside the language folder (see the language variable) + where the files are stored, and the value is and ordered collection of the files on such folder + which are part ot the exported result, without the file extension (by default is supposed to be '.md')" + contents := anOrderedDictionary +] + +{ #category : #utilities } +PandocWork >> defaultFileExtension [ + ^ '.md' + + + +] + +{ #category : #accessing } +PandocWork >> language [ + ^ language +] + +{ #category : #accessing } +PandocWork >> language: aISOLangString [ + "I model the lanaguage of a work as a ISO 639-1 two letters string. + I used to stablish the folder where the content is stored, following the convention a folder + by language." + language := aISOLangString +] + +{ #category : #accessing } +PandocWork >> manifest [ + "I create a manifest, that lists all the files which are needed to create a + derivate file (PDF, EPUB, etc) with their checksums and folder locations. + + I can be used to associated derivated files with their sources." + ^ manifest ifNil: [ ^ manifest := OrderedDictionary new ] +] + +{ #category : #accessing } +PandocWork >> manifest: anOrderedDictionary [ + manifest := anOrderedDictionary +] + +{ #category : #accessing } +PandocWork >> metadataFiles [ + ^ metadataFiles +] + +{ #category : #accessing } +PandocWork >> metadataFiles: aCollection [ + "I model the YAML metadata files that are used to control the output of the exportation. + I can have several files, controlling several outputs, one for PDF, one for HTML, one for EPUB + and so on. + This should be stated in the name of the metadatafile and by default will be controlling PDF + output." + metadataFiles := aCollection +] + +{ #category : #accessing } +PandocWork >> rootFolder [ + ^ rootFolder +] + +{ #category : #accessing } +PandocWork >> rootFolder: aFileReference [ + "I model the folder where the Markdown files are located." + rootFolder := aFileReference +]