s around + # "paragraphs" that are wrapped in non-block-level tags, such as anchors, + # phrase emphasis, and spans. The list of tags we're looking for is + # hard-coded: $block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'. 'script|noscript|form|fieldset|iframe|math|ins|del'; $block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'. 'script|noscript|form|fieldset|iframe|math'; + # First, look for nested blocks, e.g.: + #
tags around block-level tags.
$text = _HashHTMLBlocks($text);
$text = _FormParagraphs($text);
@@ -236,20 +355,32 @@ function _RunBlockGamut($text) {
function _RunSpanGamut($text) {
+#
+# These are all the transformations that occur *within* block-level
+# tags like paragraphs, headers, and list items.
+#
global $md_empty_element_suffix;
+
$text = _DoCodeSpans($text);
- # Fix unencoded ampersands and <'s:
- $text = _EncodeAmpsAndAngles($text);
+ $text = _EscapeSpecialChars($text);
# Process anchor and image tags. Images must come first,
# because ![foo][f] looks like an anchor.
$text = _DoImages($text);
$text = _DoAnchors($text);
+ # Make links out of things like ` Just type tags
+#
global $md_html_blocks;
# Strip leading and trailing lines:
$text = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $text);
$grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
- $count = count($grafs);
+ #
+ # Wrap tags.
+ #
foreach ($grafs as $key => $value) {
if (!isset( $md_html_blocks[$value] )) {
$value = _RunSpanGamut($value);
@@ -726,6 +981,9 @@ function _FormParagraphs($text) {
}
}
+ #
+ # Unhashify HTML blocks
+ #
foreach ($grafs as $key => $value) {
if (isset( $md_html_blocks[$value] )) {
$grafs[$key] = $md_html_blocks[$value];
@@ -737,6 +995,10 @@ function _FormParagraphs($text) {
function _EncodeAmpsAndAngles($text) {
+# Smart processing for ampersands and angle brackets that need to be encoded.
+
+ # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
+ # http://bumppo.net/projects/amputator/
$text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
'&', $text);;
@@ -748,6 +1010,11 @@ function _EncodeAmpsAndAngles($text) {
function _EncodeBackslashEscapes($text) {
+#
+# Parameter: String.
+# Returns: The string, with after processing the following backslash
+# escape sequences.
+#
global $md_escape_table, $md_backslash_escape_table;
# Must process escaped backslashes first.
return str_replace(array_keys($md_backslash_escape_table),
@@ -762,6 +1029,7 @@ function _DoAutoLinks($text) {
# Email addresses:
or tags.
+# $tags_to_skip = "!<(/?)(?:pre|code|kbd|script|math)[\s>]!";
+
foreach ($tokens as $cur_token) {
if ($cur_token[0] == 'tag') {
+ # Within tags, encode * and _ so they don't conflict
+ # with their use in Markdown for italics and strong.
+ # We're replacing each such character with its
+ # corresponding MD5 checksum value; this is likely
+ # overkill, but it should prevent us from colliding
+ # with the escape values by accident.
$cur_token[1] = str_replace(array('*', '_'),
array($md_escape_table['*'], $md_escape_table['_']),
$cur_token[1]);
@@ -278,6 +418,9 @@ function _EscapeSpecialChars($text) {
function _DoAnchors($text) {
+#
+# Turn Markdown link shortcuts into XHTML tags.
+#
global $md_nested_brackets;
#
# First, handle reference-style links: [link text] [id]
@@ -298,6 +441,9 @@ function _DoAnchors($text) {
}xs",
'_DoAnchors_reference_callback', $text);
+ #
+ # Next, inline-style links: [link text](url "optional title")
+ #
$text = preg_replace_callback("{
( # wrap whole match in $1
\\[
@@ -305,7 +451,7 @@ function _DoAnchors($text) {
\\]
\\( # literal paren
[ \\t]*
- (.+?)>? # href = $3
+ (.*?)>? # href = $3
[ \\t]*
( # $4
(['\"]) # quote char = $5
@@ -352,10 +498,10 @@ function _DoAnchors_reference_callback($matches) {
}
function _DoAnchors_inline_callback($matches) {
global $md_escape_table;
- $whole_match = $matches[1];
- $link_text = $matches[2];
- $url = $matches[3];
- $title = $matches[6];
+ $whole_match = $matches[1];
+ $link_text = $matches[2];
+ $url = $matches[3];
+ $title =& $matches[6];
# We've got to encode these to avoid conflicting with italics/bold.
$url = str_replace(array('*', '_'),
@@ -363,7 +509,7 @@ function _DoAnchors_inline_callback($matches) {
$url);
$result = " tags.
+#
+ #
+ # First, handle reference-style labeled images: ![alt text][id]
+ #
$text = preg_replace_callback('{
( # wrap whole match in $1
!\[
@@ -456,12 +608,12 @@ function _DoImages_reference_callback($matches) {
}
function _DoImages_inline_callback($matches) {
global $md_empty_element_suffix, $md_escape_table;
- $whole_match = $matches[1];
- $alt_text = $matches[2];
- $url = $matches[3];
- $title = '';
+ $whole_match = $matches[1];
+ $alt_text = $matches[2];
+ $url = $matches[3];
+ $title = '';
if (isset($matches[6])) {
- $title = $matches[6];
+ $title = $matches[6];
}
$alt_text = str_replace('"', '"', $alt_text);
@@ -484,13 +636,27 @@ function _DoImages_inline_callback($matches) {
function _DoHeaders($text) {
+ # Setext-style headers:
+ # Header 1
+ # ========
+ #
+ # Header 2
+ # --------
+ #
$text = preg_replace(
- array("/(.+)[ \t]*\n=+[ \t]*\n+/e",
- "/(.+)[ \t]*\n-+[ \t]*\n+/e"),
+ array('{ ^(.+)[ \t]*\n=+[ \t]*\n+ }emx',
+ '{ ^(.+)[ \t]*\n-+[ \t]*\n+ }emx'),
array("'
'._RunSpanGamut(_UnslashQuotes('\\1')).'
\n\n'",
"''._RunSpanGamut(_UnslashQuotes('\\1')).'
\n\n'"),
$text);
+ # atx-style headers:
+ # # Header 1
+ # ## Header 2
+ # ## Header 2 with closing hashes ##
+ # ...
+ # ###### Header 6
+ #
$text = preg_replace("{
^(\\#{1,6}) # $1 = string of #'s
[ \\t]*
@@ -510,7 +676,7 @@ function _DoLists($text) {
#
# Form HTML ordered (numbered) and unordered (bulleted) lists.
#
- global $md_tab_width;
+ global $md_tab_width, $md_list_level;
$less_than_tab = $md_tab_width - 1;
# Re-usable patterns to match list item bullets and number markers:
@@ -518,27 +684,45 @@ function _DoLists($text) {
$marker_ol = '\d+[.]';
$marker_any = "(?:$marker_ul|$marker_ol)";
- $text = preg_replace_callback("{
- ( # $1
- ( # $2
- ^[ ]{0,$less_than_tab}
- ($marker_any) # $3 - first list item marker
- [ \\t]+
+ # Re-usable pattern to match any entirel ul or ol list:
+ $whole_list = '
+ ( # $1 = whole list
+ ( # $2
+ [ ]{0,'.$less_than_tab.'}
+ ('.$marker_any.') # $3 = first list item marker
+ [ \t]+
+ )
+ (?s:.+?)
+ ( # $4
+ \z
+ |
+ \n{2,}
+ (?=\S)
+ (?! # Negative lookahead for another list item marker
+ [ \t]*
+ '.$marker_any.'[ \t]+
)
- (?s:.+?)
- ( # $4
- \\z
- |
- \\n{2,}
- (?=\\S)
- (?! # Negative lookahead for another list item marker
- [ \\t]*
- {$marker_any}[ \\t]+
- )
- )
- )
- }xm",
- '_DoLists_callback', $text);
+ )
+ )
+ '; // mx
+
+ # We use a different prefix before nested lists than top-level lists.
+ # See extended comment in _ProcessListItems().
+
+ if ($md_list_level) {
+ $text = preg_replace_callback('{
+ ^
+ '.$whole_list.'
+ }mx',
+ '_DoLists_callback', $text);
+ }
+ else {
+ $text = preg_replace_callback('{
+ (?:(?<=\n\n)|\A\n?)
+ '.$whole_list.'
+ }mx',
+ '_DoLists_callback', $text);
+ }
return $text;
}
@@ -549,17 +733,46 @@ function _DoLists_callback($matches) {
$marker_any = "(?:$marker_ul|$marker_ol)";
$list = $matches[1];
- $list_type = preg_match('/[*+-]/', $matches[3]) ? "ul" : "ol";
+ $list_type = preg_match("/$marker_ul/", $matches[3]) ? "ul" : "ol";
# Turn double returns into triple returns, so that we can make a
# paragraph for the last item in a list, if necessary:
$list = preg_replace("/\n{2,}/", "\n\n\n", $list);
$result = _ProcessListItems($list, $marker_any);
- $result = "<$list_type>\n" . $result . "$list_type>\n\n";
+ $result = "<$list_type>\n" . $result . "$list_type>\n";
return $result;
}
function _ProcessListItems($list_str, $marker_any) {
+#
+# Process the contents of a single ordered or unordered list, splitting it
+# into individual list items.
+#
+ global $md_list_level;
+
+ # The $md_list_level global keeps track of when we're inside a list.
+ # Each time we enter a list, we increment it; when we leave a list,
+ # we decrement. If it's zero, we're not in a list anymore.
+ #
+ # We do this because when we're not inside a list, we want to treat
+ # something like this:
+ #
+ # I recommend upgrading to version
+ # 8. Oops, now this line is treated
+ # as a sub-list.
+ #
+ # As a single paragraph, despite the fact that the second line starts
+ # with a digit-period-space sequence.
+ #
+ # Whereas when we're inside a list (or sub-list), that line will be
+ # treated as the start of a sub-list. What a kludge, huh? This is
+ # an aspect of Markdown's syntax that's hard to parse perfectly
+ # without resorting to mind-reading. Perhaps the solution is to
+ # change the syntax rules such that sub-lists must start with a
+ # starting cardinal number; e.g. "1." or "a.".
+
+ $md_list_level++;
+
# trim trailing blank lines:
$list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
@@ -573,16 +786,16 @@ function _ProcessListItems($list_str, $marker_any) {
}xm',
'_ProcessListItems_callback', $list_str);
+ $md_list_level--;
return $list_str;
}
function _ProcessListItems_callback($matches) {
$item = $matches[4];
- $leading_line = $matches[1];
- $leading_space = $matches[2];
+ $leading_line =& $matches[1];
+ $leading_space =& $matches[2];
if ($leading_line || preg_match('/\n{2,}/', $item)) {
$item = _RunBlockGamut(_Outdent($item));
- #$item =~ s/\n+/\n/g;
}
else {
# Recursion for sub-lists:
@@ -596,6 +809,9 @@ function _ProcessListItems_callback($matches) {
function _DoCodeBlocks($text) {
+#
+# Process Markdown `` blocks.
+#
global $md_tab_width;
$text = preg_replace_callback("{
(?:\\n\\n|\\A)
@@ -615,7 +831,7 @@ function _DoCodeBlocks_callback($matches) {
$codeblock = $matches[1];
$codeblock = _EncodeCode(_Outdent($codeblock));
- $codeblock = _Detab($codeblock);
+// $codeblock = _Detab($codeblock);
# trim leading newlines and trailing whitespace
$codeblock = preg_replace(array('/\A\n+/', '/\s+\z/'), '', $codeblock);
@@ -626,6 +842,30 @@ function _DoCodeBlocks_callback($matches) {
function _DoCodeSpans($text) {
+#
+# * Backtick quotes are used for
spans.
+#
+# * You can use multiple backticks as the delimiters if you want to
+# include literal backticks in the code span. So, this input:
+#
+# Just type ``foo `bar` baz`` at the prompt.
+#
+# Will translate to:
+#
+#
foo `bar` baz
at the prompt.`bar`
...
+#
$text = preg_replace_callback("@
(`+) # $1 = Opening run of `
(.+?) # $2 = The code block
@@ -647,13 +887,22 @@ function _DoCodeSpans_callback($matches) {
function _EncodeCode($_) {
+#
+# Encode/escape certain characters inside Markdown code runs.
+# The point is that in code, these characters are literals,
+# and lose their special Markdown meanings.
+#
global $md_escape_table;
+ # Encode all ampersands; HTML entities are not
+ # entities within a Markdown code span.
$_ = str_replace('&', '&', $_);
+ # Do the angle bracket song and dance:
$_ = str_replace(array('<', '>'),
array('<', '>'), $_);
+ # Now, escape characters that are magic in Markdown:
$_ = str_replace(array_keys($md_escape_table),
array_values($md_escape_table), $_);
@@ -663,7 +912,7 @@ function _EncodeCode($_) {
function _DoItalicsAndBold($text) {
# must go first:
- $text = preg_replace('{ (\*\*|__) (?=\S) (.+?) (?<=\S) \1 }sx',
+ $text = preg_replace('{ (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 }sx',
'\2', $text);
# Then :
$text = preg_replace('{ (\*|_) (?=\S) (.+?) (?<=\S) \1 }sx',
@@ -709,14 +958,20 @@ function _DoBlockQuotes_callback2($matches) {
function _FormParagraphs($text) {
+#
+# Params:
+# $text - string to process with html as well).
+
+For more information about Markdown's syntax, see:
+
+