PetitCommonMark/software/PetitMarkdown/PPCommonMarkInlinesParser.c...

867 lines
20 KiB
Smalltalk

Class {
#name : 'PPCommonMarkInlinesParser',
#superclass : 'PPCompositeParser',
#instVars : [
'starDelimiterRun',
'underscoreDelimiterRun',
'space',
'newline',
'leftDelimiterRun',
'rightDelimiterRun',
'emphasize',
'link',
'inlineCode',
'punctuation',
'emphasisR1',
'emphasisR2',
'strongR1',
'strongR2',
'delimiterRunSpace',
'delimiterRun',
'preceedingDelimiterRunSpace',
'followingDelimiterRunSpace',
'escaped',
'hexadecimalEntity',
'decimalEntity',
'namedEntity',
'lt',
'gt',
'quot',
'copyEntity',
'nbsp',
'aelig',
'ouml',
'dcaron',
'amp',
'linkRef',
'hardBreak',
'entity',
'linkLabel',
'linkDestination',
'linkTitle',
'lineEnd',
'inlines',
'linkInLabel',
'linkLabelContent',
'lineStart',
'emptyLine',
'inlineCodeStart',
'inlineCodeEnd',
'documentEnd',
'line',
'linkRefDefs',
'softBreak',
'autolink',
'escapedCharacter',
'emailAutolink',
'normalAutolink',
'rawHtml',
'rawHtmlAttribute',
'autolinkScheme',
'openTag',
'closeTag',
'rawHtmlComment',
'processingInstruction',
'htmlDeclaration',
'cdata'
],
#category : 'PetitMarkdown-Parser'
}
{ #category : 'as yet unclassified' }
PPCommonMarkInlinesParser class >> ignoredNames [
^ super ignoredNames , #('linkRefDefs')
]
{ #category : 'entities' }
PPCommonMarkInlinesParser >> aelig [
^ 'Æ' asParser
map: [ :r |
PPCMText new
text: 'Æ'
yourself
]
]
{ #category : 'entities' }
PPCommonMarkInlinesParser >> amp [
^ '&' asParser / '&' asParser / '\&' asParser
map: [ :r |
PPCMText new
text: '&'
yourself
]
]
{ #category : 'autolinks' }
PPCommonMarkInlinesParser >> autolink [
^ normalAutolink / emailAutolink
]
{ #category : 'autolinks' }
PPCommonMarkInlinesParser >> autolinkScheme [
^ 'http' asParser /
'irc' asParser /
'mailto' asParser /
'MAILTO' asParser
]
{ #category : 'raw html' }
PPCommonMarkInlinesParser >> cdata [
^ ('<![CDATA' asParser, (']]>' asParser negate star), ']]>' asParser) flatten
map: [ :_html |
PPCMHtml new
text: _html
yourself
]
]
{ #category : 'raw html' }
PPCommonMarkInlinesParser >> closeTag [
^ ($< asParser,
$/ asParser,
#letter asParser,
(($> asParser) whileFalse: (#word asParser)),
(space optional), $> asParser) flatten
map: [ :_html |
PPCMHtml new
text: _html
yourself
]
]
{ #category : 'links' }
PPCommonMarkInlinesParser >> codeInLabel [
| brackets |
brackets := PPDelegateParser new
name: 'brackets';
yourself.
brackets setParser:
(escaped not, $[ asParser,
((inlineCode / (escaped not, $] asParser)) whileFalse: (brackets / #any asParser)),
$] asParser optional).
"while there is no link or ] -> consume brackets or any"
^ (brackets, inlineCode) and.
]
{ #category : 'entities' }
PPCommonMarkInlinesParser >> copyEntity [
^ '&copy;' asParser
map: [ :r |
PPCMText new
text: '©'
yourself
]
]
{ #category : 'entities' }
PPCommonMarkInlinesParser >> dcaron [
^ '&Dcaron;' asParser
map: [ :r |
PPCMText new
text: 'Ď'
yourself
]
]
{ #category : 'entities' }
PPCommonMarkInlinesParser >> decimalEntity [
^ '&#' asParser, #digit asParser plus flatten, $; asParser
map: [ :_prefix :_number :_end |
PPCMText new
text: (Character codePoint: _number asNumber) asString;
yourself
]
]
{ #category : 'support' }
PPCommonMarkInlinesParser >> decodeEntities: string [
^ PPCommonMarkUtils instance decodeEntities: string
]
{ #category : 'emphasis' }
PPCommonMarkInlinesParser >> delimiterRun [
^ starDelimiterRun / underscoreDelimiterRun
]
{ #category : 'emphasis' }
PPCommonMarkInlinesParser >> delimiterRunSpace [
^ space / newline
]
{ #category : 'lines and whitespace' }
PPCommonMarkInlinesParser >> documentEnd [
^ #eof asParser
]
{ #category : 'autolinks' }
PPCommonMarkInlinesParser >> emailAutolink [
^ $< asParser,
( ($> asParser / space / $\ asParser / $@ asParser) negate plus,
$@ asParser,
($> asParser / space / $\ asParser) negate plus
) flatten,
$> asParser
map: [ :start :content :end |
| label |
label := PPCMText new
text: (self encodeEntities: content);
yourself.
PPCMLink new
destination: 'mailto:', (self escapeUrl: content);
label: label;
yourself
]
]
{ #category : 'emphasis' }
PPCommonMarkInlinesParser >> emphasisR1 [
| start stop text children |
start := (leftDelimiterRun and, $* asParser, $* asParser not) wrapped name: 'start'; yourself.
stop := (rightDelimiterRun and, $* asParser, $* asParser not) wrapped name: 'stop'; yourself.
text := (stop / emphasize / link / inlineCode / hardBreak) negate plus flatten asPPCMText.
children := (text / emphasize / link / inlineCode / hardBreak) star.
^ start, children, stop
map: [ :_start :_children :_stop |
PPCMEmphasize new
addChildren: _children;
yourself
]
]
{ #category : 'emphasis' }
PPCommonMarkInlinesParser >> emphasisR2 [
| start stop text children |
start := (leftDelimiterRun and,
((rightDelimiterRun not) / (punctuation preceeds, rightDelimiterRun) and),
$_ asParser, $_ asParser not) wrapped name: 'start'; yourself.
stop := (rightDelimiterRun and,
((leftDelimiterRun not) / (leftDelimiterRun, punctuation) and),
$_ asParser, $_ asParser not) wrapped name: 'stop'; yourself.
text := (stop / emphasize / entity) negate plus flatten asPPCMText.
children := (text / emphasize / entity) star.
^ start, children, stop
map: [ :_start :_children :_stop |
PPCMEmphasize new
addChildren: _children;
yourself
]
]
{ #category : 'emphasis' }
PPCommonMarkInlinesParser >> emphasize [
^ emphasisR1 /
emphasisR2 /
strongR1 /
strongR2
]
{ #category : 'lines and whitespace' }
PPCommonMarkInlinesParser >> emptyLine [
^ space star, #endOfLine asParser ==> [ :e |
PPCMLine empty
]
]
{ #category : 'support' }
PPCommonMarkInlinesParser >> encodeEntities: string [
^ PPCommonMarkUtils instance encodeEntities: string
]
{ #category : 'entities' }
PPCommonMarkInlinesParser >> entity [
^ hexadecimalEntity / decimalEntity / namedEntity
]
{ #category : 'support' }
PPCommonMarkInlinesParser >> escape: string [
^ PPCommonMarkUtils instance escape: string
]
{ #category : 'support' }
PPCommonMarkInlinesParser >> escapeUrl: string [
^ PPCommonMarkUtils instance escapeUrl: string
]
{ #category : 'lines and whitespace' }
PPCommonMarkInlinesParser >> escaped [
" ^ ('\\' asParser preceeds: 2) not, '\' asParser preceeds"
"Might be a bit faster version:"
^ [ :context |
(context position == 0) ifTrue: [ PPFailure new ] ifFalse: [
context skip: -1.
context peek == $\ ifFalse: [ context skip: 1. PPFailure new ]
ifTrue: [
(context position == 0) ifTrue: [ context skip: 1. #escaped ] ifFalse: [
context skip: -1.
context peek == $\ ifTrue: [ context skip: 2. PPFailure new ]
ifFalse: [ context skip: 2. #escaped ]
]
]
]
] asParser
]
{ #category : 'lines and whitespace' }
PPCommonMarkInlinesParser >> escapedCharacter [
^ $\ asParser, (
$\ asParser /
$[ asParser /
$! asParser /
$# asParser /
$$ asParser /
$% asParser /
$' asParser /
$( asParser /
$) asParser /
$* asParser /
$+ asParser /
$, asParser /
$+ asParser /
$- asParser /
$. asParser /
$/ asParser /
$: asParser /
$; asParser /
$= asParser /
$? asParser /
$@ asParser /
$^ asParser /
$_ asParser /
$` asParser /
${ asParser /
$| asParser /
$} asParser /
$~ asParser /
$] asParser
) map: [ :_escape :_char |
PPCMText new
text: _char asString;
yourself
]
]
{ #category : 'emphasis' }
PPCommonMarkInlinesParser >> followingDelimiterRunSpace [
^ delimiterRunSpace and / #eof asParser
]
{ #category : 'entities' }
PPCommonMarkInlinesParser >> gt [
^ '>' asParser / '\>' asParser / '&gt;' asParser
map: [ :r |
PPCMText new
text: '&gt;'
yourself
]
]
{ #category : 'newlines' }
PPCommonMarkInlinesParser >> hardBreak [
^ ($\ asParser, newline and) / ((space min: 2), newline and)
==> [ :e | PPCMHardBreak new ]
]
{ #category : 'entities' }
PPCommonMarkInlinesParser >> hexadecimalEntity [
^ '&#x' asParser, (#hex asParser) plus flatten, $; asParser
map: [ :_prefix :_number :_end |
| text char |
char := Character codePoint: (Number readFrom: _number base: 16).
text := (char = $") ifTrue: [ '&quot;' ] ifFalse: [ char asString ].
PPCMText new
text: text;
yourself
]
]
{ #category : 'raw html' }
PPCommonMarkInlinesParser >> htmlDeclaration [
^ ('<!' asParser, '--' asParser not, ('>' asParser negate star), '>' asParser) flatten
map: [ :_html |
PPCMHtml new
text: _html
yourself
]
]
{ #category : 'intialization' }
PPCommonMarkInlinesParser >> initialize [
super initialize.
linkRefDefs := IdentityDictionary new.
]
{ #category : 'code' }
PPCommonMarkInlinesParser >> inlineCode [
^ inlineCodeStart, inlineCodeEnd negate star flatten, inlineCodeEnd
map: [ :_begin :_content :_end |
PPCMInlinedCode new
code: (self encodeEntities: _content);
yourself
]
]
{ #category : 'code' }
PPCommonMarkInlinesParser >> inlineCodeEnd [
^ [ :context |
context inlineCodeEnd parseOn: context
] asParser
]
{ #category : 'code' }
PPCommonMarkInlinesParser >> inlineCodeStart [
^ ((escaped / $` asParser preceeds) not, ($` asParser plus)) flatten >=> [ :context :cc |
| result |
result := cc value.
result isPetitFailure ifFalse: [
context inlineCodeEnd: (($` asParser preceeds) not, result asParser, $` asParser not).
].
result
]
]
{ #category : 'lines and whitespace' }
PPCommonMarkInlinesParser >> inlines [
^ emphasize / inlineCode / rawHtml / autolink / link / linkRef / hardBreak / softBreak / entity / escapedCharacter
]
{ #category : 'emphasis' }
PPCommonMarkInlinesParser >> leftDelimiterRun [
| a |
a := delimiterRun, delimiterRunSpace not, #any asParser and.
^ (a, punctuation not) / ((preceedingDelimiterRunSpace / punctuation preceeds), a)
]
{ #category : 'as yet unclassified' }
PPCommonMarkInlinesParser >> line [
| text |
text := inlines negate plus flatten asPPCMText.
^ (inlines / text) star
]
{ #category : 'lines and whitespace' }
PPCommonMarkInlinesParser >> lineEnd [
^ newline / documentEnd
]
{ #category : 'lines and whitespace' }
PPCommonMarkInlinesParser >> lineStart [
^ #startOfLine asParser
]
{ #category : 'links' }
PPCommonMarkInlinesParser >> link [
^ linkLabel, $( asParser, linkDestination optional trimSpaces, linkTitle optional trim, $) asParser
map: [ :_label :_open :_destination :_title :_close |
PPCMLink new
label: _label;
destination: _destination;
title: _title;
yourself
]
]
{ #category : 'links' }
PPCommonMarkInlinesParser >> linkDestination [
| parens escapedParen |
escapedParen := '\(' asParser / '\)' asParser.
parens := PPDelegateParser new
name: 'parens';
yourself.
"Parens cannot be nested!"
parens setParser: $( asParser, (($( asParser / $) asParser) not, (escapedParen / #any asParser)) star, $) asParser.
^ (($< asParser, ((($> asParser / newline) not, #any asParser) star) flatten, $> asParser)
==> [ :e | self escapeUrl: (self escape: e second) ]) /
((space / lineEnd / $) asParser) not, (parens / escapedParen / $( asParser negate)) plus flatten
==> [ :e | self escapeUrl: (self escape: e) ]
]
{ #category : 'links' }
PPCommonMarkInlinesParser >> linkInLabel [
| brackets |
brackets := PPDelegateParser new
name: 'brackets';
yourself.
brackets setParser:
(escaped not, $[ asParser,
((link / (escaped not, $] asParser)) whileFalse: (brackets / #any asParser)),
$] asParser optional).
"while there is no link or ] -> consume brackets or any"
^ ((link / $] asParser) whileFalse: (brackets / #any asParser)),
link and.
]
{ #category : 'links' }
PPCommonMarkInlinesParser >> linkInLinkLabel [
| brackets |
brackets := PPDelegateParser new
name: 'brackets';
yourself.
brackets setParser:
(escaped not, $[ asParser,
((link / (escaped not, $] asParser)) whileFalse: (brackets / #any asParser)),
$] asParser optional).
"while there is no link or ] -> consume brackets or any"
^ (link / $] asParser) whileFalse: (brackets / #any asParser),
link and.
]
{ #category : 'links' }
PPCommonMarkInlinesParser >> linkLabel [
| brackets labelText |
brackets := PPDelegateParser new
name: 'brackets';
yourself.
brackets setParser:
(escaped not, $[ asParser,
((escaped not, $] asParser) not, (brackets / #any asParser)) plus,
$] asParser) flatten asPPCMText.
labelText := (inlines / brackets / (escaped not, $] asParser)) negate plus flatten asPPCMText.
^ ((escaped not, $[ asParser) wrapped, (linkInLabel not),
(inlines / brackets / labelText) plus,
$] asParser
map: [ :_open :_pred :_content :_close |
| retval |
retval := PPCMLine new
addChildren: _content;
yourself.
]) "limitedBy: linkLabelContent"
]
{ #category : 'links' }
PPCommonMarkInlinesParser >> linkLabelContent [
| brackets |
brackets := PPDelegateParser new
name: 'brackets';
yourself.
brackets setParser:
(escaped not, $[ asParser,
((escaped not, $] asParser) not, (brackets / #any asParser)) plus,
$] asParser).
^ brackets
]
{ #category : 'links' }
PPCommonMarkInlinesParser >> linkRef [
^ linkLabel >=> [ :context :cc |
| retval memento |
memento := context position.
retval := cc value.
retval isPetitFailure ifFalse: [
(linkRefDefs includesKey: retval text asString asLowercase asSymbol) ifTrue: [
retval := PPCMLinkRef new
label: retval;
yourself
] ifFalse: [
context position: memento.
retval := PPFailure message: 'label not registered!' context: context
]
].
retval
]
]
{ #category : 'links' }
PPCommonMarkInlinesParser >> linkTitle [
^ ((space preceeds / lineStart),
(($" asParser,
((escaped not, $" asParser) / (lineEnd, emptyLine)) negate plus flatten,
$" asParser ==> #second) /
($' asParser,
((escaped not, $' asParser) / (lineEnd, emptyLine)) negate plus flatten,
$' asParser ==> #second) /
($( asParser,
($) asParser / (lineEnd, emptyLine)) negate plus flatten,
$) asParser ==> #second)))
==> [ :e | self decodeEntities: (self escape: (e second)) ]
]
{ #category : 'entities' }
PPCommonMarkInlinesParser >> lt [
^ '<' asParser / '\<' asParser / '&lt;' asParser
map: [ :r |
PPCMText new
text: '&lt;'
yourself
]
]
{ #category : 'entities' }
PPCommonMarkInlinesParser >> namedEntity [
^ lt / gt / quot / copyEntity / nbsp / aelig / ouml / dcaron / amp
]
{ #category : 'entities' }
PPCommonMarkInlinesParser >> nbsp [
^ '&nbsp;' asParser
map: [ :r |
PPCMText new
text: ' '
yourself
]
]
{ #category : 'lines and whitespace' }
PPCommonMarkInlinesParser >> newline [
^ #newline asParser
]
{ #category : 'autolinks' }
PPCommonMarkInlinesParser >> normalAutolink [
^
$< asParser,
(autolinkScheme, $: asParser,
($> asParser / space) negate plus) flatten,
$> asParser
map: [ :start :content :end |
| label |
label := PPCMText new
text: (self encodeEntities: content);
yourself.
PPCMLink new
destination: (self escapeUrl: content);
label: label;
yourself
]
]
{ #category : 'raw html' }
PPCommonMarkInlinesParser >> openTag [
^ ($< asParser,
#letter asParser,
(($> asParser) whileFalse: (#word asParser /
space /
newline /
$= asParser /
$_ asParser /
$: asParser /
rawHtmlAttribute)),
($/ asParser optional), $> asParser) flatten
map: [ :_html |
PPCMHtml new
text: _html
yourself
]
]
{ #category : 'entities' }
PPCommonMarkInlinesParser >> ouml [
^ '&ouml;' asParser
map: [ :r |
PPCMText new
text: 'ö'
yourself
]
]
{ #category : 'emphasis' }
PPCommonMarkInlinesParser >> preceedingDelimiterRunSpace [
^
delimiterRunSpace preceeds /
[:context | context position == 0 ifFalse: [
PPFailure message: 'start of file not found' ]
ifTrue: [ #startOfFile ]
] asParser
]
{ #category : 'raw html' }
PPCommonMarkInlinesParser >> processingInstruction [
^ ('<?' asParser, ('?>' asParser negate star), '?>' asParser) flatten
map: [ :_html |
PPCMHtml new
text: _html
yourself
]
]
{ #category : 'lines and whitespace' }
PPCommonMarkInlinesParser >> punctuation [
^ $. asParser /
$" asParser /
$, asParser /
$- asParser /
$( asParser /
$) asParser /
$] asParser
]
{ #category : 'entities' }
PPCommonMarkInlinesParser >> quot [
^ '"' asParser / '\"' asParser / '&quot;' asParser
map: [ :r |
PPCMText new
text: '&quot;'
yourself
]
]
{ #category : 'raw html' }
PPCommonMarkInlinesParser >> rawHtml [
^ closeTag / openTag / rawHtmlComment / processingInstruction / cdata / htmlDeclaration
]
{ #category : 'raw html' }
PPCommonMarkInlinesParser >> rawHtmlAttribute [
| pairedBrackets |
pairedBrackets := PPDelegateParser new name: 'brackets'; yourself.
pairedBrackets setParser: ( $< asParser,
($> asParser whileFalse: pairedBrackets / #any asParser),
$> asParser).
^
(
($' asParser, ($' asParser whileFalse: (pairedBrackets / #any asParser)), $' asParser) /
($" asParser, ($" asParser whileFalse: (pairedBrackets / #any asParser)), $" asParser)
),
(($> asParser / space / newline) and )
]
{ #category : 'raw html' }
PPCommonMarkInlinesParser >> rawHtmlComment [
^ ('<!--' asParser, $> asParser not, ('--' asParser negate star), '-->' asParser) flatten
map: [ :_html |
PPCMHtml new
text: _html
yourself
]
]
{ #category : 'links' }
PPCommonMarkInlinesParser >> registerLinkRefDef: node [
linkRefDefs at: node label asString asLowercase asSymbol
put: node
]
{ #category : 'emphasis' }
PPCommonMarkInlinesParser >> rightDelimiterRun [
| a |
a := (preceedingDelimiterRunSpace not), delimiterRun.
^ (punctuation preceeds not, a) / (a, (punctuation and / followingDelimiterRunSpace))
]
{ #category : 'newlines' }
PPCommonMarkInlinesParser >> softBreak [
^ space optional, Character cr asParser ==> [ :e |
PPCMSoftBreak new
]
]
{ #category : 'lines and whitespace' }
PPCommonMarkInlinesParser >> space [
^ Character space asParser
]
{ #category : 'emphasis' }
PPCommonMarkInlinesParser >> starDelimiterRun [
^ ($* asParser preceeds / escaped) not, $* asParser plus
]
{ #category : 'as yet unclassified' }
PPCommonMarkInlinesParser >> start [
^ line
]
{ #category : 'emphasis' }
PPCommonMarkInlinesParser >> strongR1 [
| start stop text children |
start := (leftDelimiterRun and, ($* asParser, $* asParser), $* asParser not) wrapped name: 'start'; yourself.
stop := (rightDelimiterRun and, ($* asParser, $* asParser), $* asParser not) wrapped name: 'stop'; yourself.
text := (stop / emphasize / entity) negate plus flatten asPPCMText.
children := (text / emphasize / entity) star.
^ start, children, stop
map: [ :_start :_children :_stop |
PPCMStrong new
addChildren: _children;
yourself
]
]
{ #category : 'emphasis' }
PPCommonMarkInlinesParser >> strongR2 [
| start stop text children |
start := (leftDelimiterRun and,
((rightDelimiterRun not) / (punctuation preceeds, rightDelimiterRun) and),
($_ asParser, $_ asParser), $_ asParser not) wrapped name: 'start'; yourself.
stop := (rightDelimiterRun and,
((leftDelimiterRun not) / (leftDelimiterRun, punctuation) and),
($_ asParser, $_ asParser), $_ asParser not) wrapped name: 'stop'; yourself.
text := (stop / emphasize) negate plus flatten asPPCMText.
children := (text / emphasize) star.
^ start, children, stop
map: [ :_start :_children :_stop |
PPCMStrong new
addChildren: _children;
yourself
]
]
{ #category : 'emphasis' }
PPCommonMarkInlinesParser >> underscoreDelimiterRun [
^ ($_ asParser preceeds / escaped not), $_ asParser plus
]
{ #category : 'support' }
PPCommonMarkInlinesParser >> utils [
^ PPCommonMarkUtils instance
]