Data cleaning methods.

This commit is contained in:
Offray Vladimir Luna Cárdenas 2023-05-09 14:45:23 -05:00
parent 91e92c3e6a
commit fe0d65cf5a
3 changed files with 55 additions and 30 deletions

View File

@ -393,6 +393,43 @@ Markdeep >> pubPubRawLinks [
^ (parser parse: self body)
]
{ #category : #accessing }
Markdeep >> removeAlternativeImagesArray [
| parser replacements |
self body ifNil: [^ self].
parser := PubPubGrammar2 new.
replacements := parser parse: body.
replacements ifEmpty: [^self].
replacements do: [:replacement |
self body:
(self body copyReplaceAll: replacement third with: '' )
].
self body: (self body copyReplaceAll: '{srcset=}' with: '').
]
{ #category : #accessing }
Markdeep >> removeAutoGeneratedFileNotice [
| autoGeneratedNotice |
autoGeneratedNotice := '**Notice:** This file is an auto-generated download and, as such, might
include minor display or rendering errors. For the version of record,
please visit the HTML version or download the PDF.
------------------------------------------------------------------------'.
self body: (self body copyReplaceAll: autoGeneratedNotice with: '')
]
{ #category : #accessing }
Markdeep >> removeCCByLicenseDiv [
| licenseDiv|
licenseDiv := '<div>
**License:** [Creative Commons Attribution 4.0 International License
(CC-BY 4.0)](https://creativecommons.org/licenses/by/4.0/)
</div>'.
self body: (self body copyReplaceAll: licenseDiv with: '')
]
{ #category : #accessing }
Markdeep >> replaceBackslashBreaklines [
self bodyReplaceAll: '\

View File

@ -25,6 +25,24 @@ Markdown class >> yamlMetadataDelimiter [
]
{ #category : #accessing }
Markdown >> asMarkdeep [
^ Markdeep new
body: self body;
markdownFile: self file;
commentYAMLMetadata
]
{ #category : #accessing }
Markdown >> body [
^ body
]
{ #category : #accessing }
Markdown >> body: aString [
body := aString
]
{ #category : #operation }
Markdown >> commentYAMLMetadata [
| newContents |

View File

@ -1,30 +0,0 @@
Class {
#name : #PubPub,
#superclass : #Markdown,
#instVars : [
'url'
],
#category : #'MiniDocs-Core'
}
{ #category : #accessing }
PubPub >> detectImageLinkPositions [
| docNodes response |
docNodes := self documentTree allChildren.
response := OrderedCollection new.
docNodes doWithIndex: [:each :i|
((each className = 'PPCMLink') and: [ (docNodes at: i - 1) text = '!' ]) ifTrue:
[ response add: {i -> each } ]
].
^ response
]
{ #category : #accessing }
PubPub >> linksWithTreePosition [
| response |
response := OrderedCollection new.
self documentTree allChildren doWithIndex: [:each :i |
each className = 'PPCMLink' ifTrue: [ response add: { i -> each }]
].
^ response
]