Updating Markdown to 1.0.1 - http://mosquito.wordpress.org/view.php?id=730

git-svn-id: http://svn.automattic.com/wordpress/trunk@2128 1a063a9b-81f0-0310-95a4-ce76da25c4cd
2005-01-24 06:30:16 +00:00 · 2005-01-24 06:30:16 +00:00 · 9ddeb1b1a3
parent 804f3227da
commit 9ddeb1b1a3
1 changed files with 514 additions and 89 deletions
--- a/wp-content/plugins/markdown.php
+++ b/wp-content/plugins/markdown.php
@ -6,22 +6,19 @@
 # Copyright (c) 2004 John Gruber  
 # <http://daringfireball.net/projects/markdown/>
 #
-# Copyright (c) 2004 Michel Fortin - Translation to PHP  
+# Copyright (c) 2004 Michel Fortin - PHP Port  
 # <http://www.michelf.com/projects/php-markdown/>
 #

-# This version has been modified for inclusion in WordPress
-# For the original please see Michel's site
-

 global	$MarkdownPHPVersion, $MarkdownSyntaxVersion,
 		$md_empty_element_suffix, $md_tab_width,
 		$md_nested_brackets_depth, $md_nested_brackets, 
-		$md_escape_table, $md_backslash_escape_table;
+		$md_escape_table, $md_backslash_escape_table, 
+		$md_list_level;

-
-$MarkdownPHPVersion    = '1.0'; # Sat 21 Aug 2004
-$MarkdownSyntaxVersion = '1.0'; # Fri 20 Aug 2004
+$MarkdownPHPVersion    = '1.0.1'; # Fri 17 Dec 2004
+$MarkdownSyntaxVersion = '1.0.1'; # Sun 12 Dec 2004


 #
@ -34,9 +31,9 @@ $md_tab_width = 4;
 # -- WordPress Plugin Interface -----------------------------------------------
 /*
 Plugin Name: Markdown
-Plugin URI: http://codex.wordpress.org/Plugin:Markdown
+Plugin URI: http://www.michelf.com/projects/php-markdown/
 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>
-Version: 1.0
+Version: 1.0.1
 Author: Michel Fortin
 Author URI: http://www.michelf.com/
 */
@ -51,15 +48,56 @@ if (isset($wp_version)) {
 	add_filter('comment_text', 'Markdown', 6);
 }

+
+# -- bBlog Plugin Info --------------------------------------------------------
+function identify_modifier_markdown() {
+	global $MarkdownPHPVersion;
+	return array(
+		'name'			=> 'markdown',
+		'type'			=> 'modifier',
+		'nicename'		=> 'Markdown',
+		'description'	=> 'A text-to-HTML conversion tool for web writers',
+		'authors'		=> 'Michel Fortin and John Gruber',
+		'licence'		=> 'GPL',
+		'version'		=> $MarkdownPHPVersion,
+		'help'			=> '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>'
+	);
+}
+
+# -- Smarty Modifier Interface ------------------------------------------------
 function smarty_modifier_markdown($text) {
 	return Markdown($text);
 }

+# -- Textile Compatibility Mode -----------------------------------------------
+# Rename this file to "classTextile.php" and it can replace Textile anywhere.
+if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
+	# Try to include PHP SmartyPants. Should be in the same directory.
+	@include_once 'smartypants.php';
+	# Fake Textile class. It calls Markdown instead.
+	class Textile {
+		function TextileThis($text, $lite='', $encode='', $noimage='', $strict='') {
+			if ($lite == '' && $encode == '')   $text = Markdown($text);
+			if (function_exists('SmartyPants')) $text = SmartyPants($text);
+			return $text;
+		}
+	}
+}
+
+
+
+#
+# Globals:
+#
+
+# Regex to match balanced [brackets].
+# Needed to insert a maximum bracked depth while converting to PHP.
 $md_nested_brackets_depth = 6;
 $md_nested_brackets = 
 	str_repeat('(?>[^\[\]]+|\[', $md_nested_brackets_depth).
 	str_repeat('\])*', $md_nested_brackets_depth);

+# Table of hash values for escaped characters:
 $md_escape_table = array(
 	"\\" => md5("\\"),
 	"`" => md5("`"),
@ -71,7 +109,10 @@ $md_escape_table = array(
 	"]" => md5("]"),
 	"(" => md5("("),
 	")" => md5(")"),
+	">" => md5(">"),
 	"#" => md5("#"),
+	"+" => md5("+"),
+	"-" => md5("-"),
 	"." => md5("."),
 	"!" => md5("!")
 );
@ -82,25 +123,43 @@ foreach ($md_escape_table as $key => $char)


 function Markdown($text) {
+#
+# Main function. The order in which other subs are called here is
+# essential. Link and image substitutions need to happen before
+# _EscapeSpecialChars(), so that any *'s or _'s in the <a>
+# and <img> tags get encoded.
+#
+	# Clear the global hashes. If we don't clear these, you get conflicts
+	# from other articles when generating a page which contains more than
+	# one article (e.g. an index page that shows the N most recent
+	# articles):
 	global $md_urls, $md_titles, $md_html_blocks;
 	$md_urls = array();
 	$md_titles = array();
 	$md_html_blocks = array();

+	# Standardize line endings:
+	#   DOS to Unix and Mac to Unix
 	$text = str_replace(array("\r\n", "\r"), "\n", $text);

+	# Make sure $text ends with a couple of newlines:
 	$text .= "\n\n";

+	# Convert all tabs to spaces.
 	$text = _Detab($text);

+	# Strip any lines consisting only of spaces and tabs.
+	# This makes subsequent regexen easier to write, because we can
+	# match consecutive blank lines with /\n+/ instead of something
+	# contorted like /[ \t]*\n+/ .
 	$text = preg_replace('/^[ \t]+$/m', '', $text);

+	# Turn block-level HTML blocks into hash entries
 	$text = _HashHTMLBlocks($text);

+	# Strip link definitions, store in hashes.
 	$text = _StripLinkDefinitions($text);

-	$text = _EscapeSpecialChars($text);
-
 	$text = _RunBlockGamut($text);

 	$text = _UnescapeSpecialChars($text);
@ -110,8 +169,16 @@ function Markdown($text) {


 function _StripLinkDefinitions($text) {
+#
+# Strips link definitions from text, stores the URLs and titles in
+# hash references.
+#
+	global $md_tab_width;
+	$less_than_tab = $md_tab_width - 1;
+
+	# Link defs are in the form: ^[id]: url "optional title"
 	$text = preg_replace_callback('{
-						^[ \t]*\[(.+)\]:	# id = $1
+						^[ ]{0,'.$less_than_tab.'}\[(.+)\]:	# id = $1
 						  [ \t]*
 						  \n?				# maybe *one* newline
 						  [ \t]*
@ -120,7 +187,7 @@ function _StripLinkDefinitions($text) {
 						  \n?				# maybe one newline
 						  [ \t]*
 						(?:
-							# Todo: Titles are delimited by "quotes" or (parens).
+							(?<=\s)			# lookbehind for whitespace
 							["(]
 							(.+?)			# title = $3
 							[")]
@ -137,17 +204,37 @@ function _StripLinkDefinitions_callback($matches) {
 	$link_id = strtolower($matches[1]);
 	$md_urls[$link_id] = _EncodeAmpsAndAngles($matches[2]);
 	if (isset($matches[3]))
-		$md_titles[$link_id] = htmlentities($matches[3]);
+		$md_titles[$link_id] = str_replace('"', '&quot;', $matches[3]);
 	return ''; # String that will replace the block
 }


 function _HashHTMLBlocks($text) {
+	global $md_tab_width;
+	$less_than_tab = $md_tab_width - 1;
+
+	# Hashify HTML blocks:
+	# We only want to do this for block-level HTML tags, such as headers,
+	# lists, and tables. That's because we still want to wrap <p>s around
+	# "paragraphs" that are wrapped in non-block-level tags, such as anchors,
+	# phrase emphasis, and spans. The list of tags we're looking for is
+	# hard-coded:
 	$block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'.
 					'script|noscript|form|fieldset|iframe|math|ins|del';
 	$block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'.
 					'script|noscript|form|fieldset|iframe|math';

+	# First, look for nested blocks, e.g.:
+	# 	<div>
+	# 		<div>
+	# 		tags for inner block must be indented.
+	# 		</div>
+	# 	</div>
+	#
+	# The outermost tags must start at the left margin for this to match, and
+	# the inner nested divs must be indented.
+	# We need to do this before the next, more liberal match, because the next
+	# match will start at the first `<div>` and stop at the first `</div>`.
 	$text = preg_replace_callback("{
 				(						# save in $1
 					^					# start of line  (with /m)
@ -162,6 +249,9 @@ function _HashHTMLBlocks($text) {
 		'_HashHTMLBlocks_callback',
 		$text);

+	#
+	# Now match more liberally, simply from `\n<tag>` to `</tag>\n`
+	#
 	$text = preg_replace_callback("{
 				(						# save in $1
 					^					# start of line  (with /m)
@ -176,6 +266,8 @@ function _HashHTMLBlocks($text) {
 		'_HashHTMLBlocks_callback',
 		$text);

+	# Special case just for <hr />. It was easier to make a special case than
+	# to make the other regex more complicated.
 	$text = preg_replace_callback('{
 				(?:
 					(?<=\n\n)		# Starting after a blank line
@ -183,17 +275,39 @@ function _HashHTMLBlocks($text) {
 					\A\n?			# the beginning of the doc
 				)
 				(						# save in $1
-					[ \t]*
+					[ ]{0,'.$less_than_tab.'}
 					<(hr)				# start tag = $2
 					\b					# word break
 					([^<>])*?			# 
 					/?>					# the matching end tag
+					[ \t]*
 					(?=\n{2,}|\Z)		# followed by a blank line or end of document
 				)
 		}x',
 		'_HashHTMLBlocks_callback',
 		$text);

+	# Special case for standalone HTML comments:
+	$text = preg_replace_callback('{
+				(?:
+					(?<=\n\n)		# Starting after a blank line
+					|				# or
+					\A\n?			# the beginning of the doc
+				)
+				(						# save in $1
+					[ ]{0,'.$less_than_tab.'}
+					(?s:
+						<!
+						(--.*?--\s*)+
+						>
+					)
+					[ \t]*
+					(?=\n{2,}|\Z)		# followed by a blank line or end of document
+				)
+			}x',
+			'_HashHTMLBlocks_callback',
+			$text);
+
 	return $text;
 }
 function _HashHTMLBlocks_callback($matches) {
@ -206,15 +320,19 @@ function _HashHTMLBlocks_callback($matches) {


 function _RunBlockGamut($text) {
+#
+# These are all the transformations that form block-level
+# tags like paragraphs, headers, and list items.
+#
 	global $md_empty_element_suffix;

 	$text = _DoHeaders($text);

 	# Do Horizontal Rules:
 	$text = preg_replace(
-		array('/^( ?\* ?){3,}$/m',
-			  '/^( ?- ?){3,}$/m',
-			  '/^( ?_ ?){3,}$/m'),
+		array('{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}mx',
+			  '{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}mx',
+			  '{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}mx'),
 		"\n<hr$md_empty_element_suffix\n", 
 		$text);

@ -224,9 +342,10 @@ function _RunBlockGamut($text) {

 	$text = _DoBlockQuotes($text);

-	# Make links out of things like `<http://example.com/>`
-	$text = _DoAutoLinks($text);
-
+	# We already ran _HashHTMLBlocks() before, in Markdown(), but that
+	# was to escape raw HTML in the original Markdown source. This time,
+	# we're escaping the markup we've just created, so that we don't wrap
+	# <p> tags around block-level tags.
 	$text = _HashHTMLBlocks($text);

 	$text = _FormParagraphs($text);
@ -236,20 +355,32 @@ function _RunBlockGamut($text) {


 function _RunSpanGamut($text) {
+#
+# These are all the transformations that occur *within* block-level
+# tags like paragraphs, headers, and list items.
+#
 	global $md_empty_element_suffix;
+
 	$text = _DoCodeSpans($text);

-	# Fix unencoded ampersands and <'s:
-	$text = _EncodeAmpsAndAngles($text);
+	$text = _EscapeSpecialChars($text);

 	# Process anchor and image tags. Images must come first,
 	# because ![foo][f] looks like an anchor.
 	$text = _DoImages($text);
 	$text = _DoAnchors($text);

+	# Make links out of things like `<http://example.com/>`
+	# Must come after _DoAnchors(), because you can use < and >
+	# delimiters in inline links like [this](<url>).
+	$text = _DoAutoLinks($text);
+
+	# Fix unencoded ampersands and <'s:
+	$text = _EncodeAmpsAndAngles($text);

 	$text = _DoItalicsAndBold($text);

+	# Do hard breaks:
 	$text = preg_replace('/ {2,}\n/', "<br$md_empty_element_suffix\n", $text);

 	return $text;
@ -261,8 +392,17 @@ function _EscapeSpecialChars($text) {
 	$tokens = _TokenizeHTML($text);

 	$text = '';   # rebuild $text from the tokens
+#	$in_pre = 0;  # Keep track of when we're inside <pre> or <code> tags.
+#	$tags_to_skip = "!<(/?)(?:pre|code|kbd|script|math)[\s>]!";
+
 	foreach ($tokens as $cur_token) {
 		if ($cur_token[0] == 'tag') {
+			# Within tags, encode * and _ so they don't conflict
+			# with their use in Markdown for italics and strong.
+			# We're replacing each such character with its
+			# corresponding MD5 checksum value; this is likely
+			# overkill, but it should prevent us from colliding
+			# with the escape values by accident.
 			$cur_token[1] = str_replace(array('*', '_'),
 				array($md_escape_table['*'], $md_escape_table['_']),
 				$cur_token[1]);
@ -278,6 +418,9 @@ function _EscapeSpecialChars($text) {


 function _DoAnchors($text) {
+#
+# Turn Markdown link shortcuts into XHTML <a> tags.
+#
 	global $md_nested_brackets;
 	#
 	# First, handle reference-style links: [link text] [id]
@ -298,6 +441,9 @@ function _DoAnchors($text) {
 		}xs",
 		'_DoAnchors_reference_callback', $text);

+	#
+	# Next, inline-style links: [link text](url "optional title")
+	#
 	$text = preg_replace_callback("{
 		(				# wrap whole match in $1
 		  \\[
@ -305,7 +451,7 @@ function _DoAnchors($text) {
 		  \\]
 		  \\(			# literal paren
 			[ \\t]*
-			<?(.+?)>?	# href = $3
+			<?(.*?)>?	# href = $3
 			[ \\t]*
 			(			# $4
 			  (['\"])	# quote char = $5
@ -352,10 +498,10 @@ function _DoAnchors_reference_callback($matches) {
 }
 function _DoAnchors_inline_callback($matches) {
 	global $md_escape_table;
-	$whole_match = $matches[1];
-	$link_text   = $matches[2];
-	$url	  		= $matches[3];
-	$title		= $matches[6];
+	$whole_match	= $matches[1];
+	$link_text		= $matches[2];
+	$url			= $matches[3];
+	$title			=& $matches[6];

 	# We've got to encode these to avoid conflicting with italics/bold.
 	$url = str_replace(array('*', '_'),
@ -363,7 +509,7 @@ function _DoAnchors_inline_callback($matches) {
 					   $url);
 	$result = "<a href=\"$url\"";
 	if (isset($title)) {
-		$title = str_replace('"', '&quot', $title);
+		$title = str_replace('"', '&quot;', $title);
 		$title = str_replace(array('*', '_'),
 							 array($md_escape_table['*'], $md_escape_table['_']),
 							 $title);
@ -377,6 +523,12 @@ function _DoAnchors_inline_callback($matches) {


 function _DoImages($text) {
+#
+# Turn Markdown image shortcuts into <img> tags.
+#
+	#
+	# First, handle reference-style labeled images: ![alt text][id]
+	#
 	$text = preg_replace_callback('{
 		(				# wrap whole match in $1
 		  !\[
@ -456,12 +608,12 @@ function _DoImages_reference_callback($matches) {
 }
 function _DoImages_inline_callback($matches) {
 	global $md_empty_element_suffix, $md_escape_table;
-	$whole_match = $matches[1];
-	$alt_text    = $matches[2];
-	$url	  		= $matches[3];
-	$title		= '';
+	$whole_match	= $matches[1];
+	$alt_text		= $matches[2];
+	$url			= $matches[3];
+	$title			= '';
 	if (isset($matches[6])) {
-		$title = $matches[6];
+		$title		= $matches[6];
 	}

 	$alt_text = str_replace('"', '&quot;', $alt_text);
@ -484,13 +636,27 @@ function _DoImages_inline_callback($matches) {


 function _DoHeaders($text) {
+	# Setext-style headers:
+	#	  Header 1
+	#	  ========
+	#  
+	#	  Header 2
+	#	  --------
+	#
 	$text = preg_replace(
-		array("/(.+)[ \t]*\n=+[ \t]*\n+/e",
-			  "/(.+)[ \t]*\n-+[ \t]*\n+/e"),
+		array('{ ^(.+)[ \t]*\n=+[ \t]*\n+ }emx',
+			  '{ ^(.+)[ \t]*\n-+[ \t]*\n+ }emx'),
 		array("'<h1>'._RunSpanGamut(_UnslashQuotes('\\1')).'</h1>\n\n'",
 			  "'<h2>'._RunSpanGamut(_UnslashQuotes('\\1')).'</h2>\n\n'"),
 		$text);

+	# atx-style headers:
+	#	# Header 1
+	#	## Header 2
+	#	## Header 2 with closing hashes ##
+	#	...
+	#	###### Header 6
+	#
 	$text = preg_replace("{
 			^(\\#{1,6})	# $1 = string of #'s
 			[ \\t]*
@ -510,7 +676,7 @@ function _DoLists($text) {
 #
 # Form HTML ordered (numbered) and unordered (bulleted) lists.
 #
-	global $md_tab_width;
+	global $md_tab_width, $md_list_level;
 	$less_than_tab = $md_tab_width - 1;

 	# Re-usable patterns to match list item bullets and number markers:
@ -518,27 +684,45 @@ function _DoLists($text) {
 	$marker_ol  = '\d+[.]';
 	$marker_any = "(?:$marker_ul|$marker_ol)";

-	$text = preg_replace_callback("{
-			(								# $1
-			  (								# $2
-				^[ ]{0,$less_than_tab}
-			    ($marker_any)				# $3 - first list item marker
-				[ \\t]+
+	# Re-usable pattern to match any entirel ul or ol list:
+	$whole_list = '
+		(								# $1 = whole list
+		  (								# $2
+			[ ]{0,'.$less_than_tab.'}
+			('.$marker_any.')				# $3 = first list item marker
+			[ \t]+
+		  )
+		  (?s:.+?)
+		  (								# $4
+			  \z
+			|
+			  \n{2,}
+			  (?=\S)
+			  (?!						# Negative lookahead for another list item marker
+				[ \t]*
+				'.$marker_any.'[ \t]+
 			  )
-			  (?s:.+?)
-			  (								# $4
-				  \\z
-				|
-				  \\n{2,}
-				  (?=\\S)
-				  (?!						# Negative lookahead for another list item marker
-				  	[ \\t]*
-				  	{$marker_any}[ \\t]+
-				  )
-			  )
-			)
-		}xm",
-		'_DoLists_callback', $text);
+		  )
+		)
+	'; // mx
+	
+	# We use a different prefix before nested lists than top-level lists.
+	# See extended comment in _ProcessListItems().
+
+	if ($md_list_level) {
+		$text = preg_replace_callback('{
+				^
+				'.$whole_list.'
+			}mx',
+			'_DoLists_callback', $text);
+	}
+	else {
+		$text = preg_replace_callback('{
+				(?:(?<=\n\n)|\A\n?)
+				'.$whole_list.'
+			}mx',
+			'_DoLists_callback', $text);
+	}

 	return $text;
 }
@ -549,17 +733,46 @@ function _DoLists_callback($matches) {
 	$marker_any = "(?:$marker_ul|$marker_ol)";
 	
 	$list = $matches[1];
-	$list_type = preg_match('/[*+-]/', $matches[3]) ? "ul" : "ol";
+	$list_type = preg_match("/$marker_ul/", $matches[3]) ? "ul" : "ol";
 	# Turn double returns into triple returns, so that we can make a
 	# paragraph for the last item in a list, if necessary:
 	$list = preg_replace("/\n{2,}/", "\n\n\n", $list);
 	$result = _ProcessListItems($list, $marker_any);
-	$result = "<$list_type>\n" . $result . "</$list_type>\n\n";
+	$result = "<$list_type>\n" . $result . "</$list_type>\n";
 	return $result;
 }


 function _ProcessListItems($list_str, $marker_any) {
+#
+#	Process the contents of a single ordered or unordered list, splitting it
+#	into individual list items.
+#
+	global $md_list_level;
+	
+	# The $md_list_level global keeps track of when we're inside a list.
+	# Each time we enter a list, we increment it; when we leave a list,
+	# we decrement. If it's zero, we're not in a list anymore.
+	#
+	# We do this because when we're not inside a list, we want to treat
+	# something like this:
+	#
+	#		I recommend upgrading to version
+	#		8. Oops, now this line is treated
+	#		as a sub-list.
+	#
+	# As a single paragraph, despite the fact that the second line starts
+	# with a digit-period-space sequence.
+	#
+	# Whereas when we're inside a list (or sub-list), that line will be
+	# treated as the start of a sub-list. What a kludge, huh? This is
+	# an aspect of Markdown's syntax that's hard to parse perfectly
+	# without resorting to mind-reading. Perhaps the solution is to
+	# change the syntax rules such that sub-lists must start with a
+	# starting cardinal number; e.g. "1." or "a.".
+	
+	$md_list_level++;
+
 	# trim trailing blank lines:
 	$list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);

@ -573,16 +786,16 @@ function _ProcessListItems($list_str, $marker_any) {
 		}xm',
 		'_ProcessListItems_callback', $list_str);

+	$md_list_level--;
 	return $list_str;
 }
 function _ProcessListItems_callback($matches) {
 	$item = $matches[4];
-	$leading_line = $matches[1];
-	$leading_space = $matches[2];
+	$leading_line =& $matches[1];
+	$leading_space =& $matches[2];

 	if ($leading_line || preg_match('/\n{2,}/', $item)) {
 		$item = _RunBlockGamut(_Outdent($item));
-		#$item =~ s/\n+/\n/g;
 	}
 	else {
 		# Recursion for sub-lists:
@ -596,6 +809,9 @@ function _ProcessListItems_callback($matches) {


 function _DoCodeBlocks($text) {
+#
+#	Process Markdown `<pre><code>` blocks.
+#
 	global $md_tab_width;
 	$text = preg_replace_callback("{
 			(?:\\n\\n|\\A)
@ -615,7 +831,7 @@ function _DoCodeBlocks_callback($matches) {
 	$codeblock = $matches[1];

 	$codeblock = _EncodeCode(_Outdent($codeblock));
-	$codeblock = _Detab($codeblock);
+//	$codeblock = _Detab($codeblock);
 	# trim leading newlines and trailing whitespace
 	$codeblock = preg_replace(array('/\A\n+/', '/\s+\z/'), '', $codeblock);

@ -626,6 +842,30 @@ function _DoCodeBlocks_callback($matches) {


 function _DoCodeSpans($text) {
+#
+# 	*	Backtick quotes are used for <code></code> spans.
+#
+# 	*	You can use multiple backticks as the delimiters if you want to
+# 		include literal backticks in the code span. So, this input:
+#
+#		  Just type ``foo `bar` baz`` at the prompt.
+#
+#	  	Will translate to:
+#
+#		  <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
+#
+#		There's no arbitrary limit to the number of backticks you
+#		can use as delimters. If you need three consecutive backticks
+#		in your code, use four for delimiters, etc.
+#
+#	*	You can use spaces to get literal backticks at the edges:
+#
+#		  ... type `` `bar` `` ...
+#
+#	  	Turns to:
+#
+#		  ... type <code>`bar`</code> ...
+#
 	$text = preg_replace_callback("@
 			(`+)		# $1 = Opening run of `
 			(.+?)		# $2 = The code block
@ -647,13 +887,22 @@ function _DoCodeSpans_callback($matches) {


 function _EncodeCode($_) {
+#
+# Encode/escape certain characters inside Markdown code runs.
+# The point is that in code, these characters are literals,
+# and lose their special Markdown meanings.
+#
 	global $md_escape_table;

+	# Encode all ampersands; HTML entities are not
+	# entities within a Markdown code span.
 	$_ = str_replace('&', '&amp;', $_);

+	# Do the angle bracket song and dance:
 	$_ = str_replace(array('<',    '>'), 
 					 array('&lt;', '&gt;'), $_);

+	# Now, escape characters that are magic in Markdown:
 	$_ = str_replace(array_keys($md_escape_table), 
 					 array_values($md_escape_table), $_);

@ -663,7 +912,7 @@ function _EncodeCode($_) {

 function _DoItalicsAndBold($text) {
 	# <strong> must go first:
-	$text = preg_replace('{ (\*\*|__) (?=\S) (.+?) (?<=\S) \1 }sx',
+	$text = preg_replace('{ (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 }sx',
 		'<strong>\2</strong>', $text);
 	# Then <em>:
 	$text = preg_replace('{ (\*|_) (?=\S) (.+?) (?<=\S) \1 }sx',
@ -709,14 +958,20 @@ function _DoBlockQuotes_callback2($matches) {


 function _FormParagraphs($text) {
+#
+#	Params:
+#		$text - string to process with html <p> tags
+#
 	global $md_html_blocks;

 	# Strip leading and trailing lines:
 	$text = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $text);

 	$grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
-	$count = count($grafs);

+	#
+	# Wrap <p> tags.
+	#
 	foreach ($grafs as $key => $value) {
 		if (!isset( $md_html_blocks[$value] )) {
 			$value = _RunSpanGamut($value);
@ -726,6 +981,9 @@ function _FormParagraphs($text) {
 		}
 	}

+	#
+	# Unhashify HTML blocks
+	#
 	foreach ($grafs as $key => $value) {
 		if (isset( $md_html_blocks[$value] )) {
 			$grafs[$key] = $md_html_blocks[$value];
@ -737,6 +995,10 @@ function _FormParagraphs($text) {


 function _EncodeAmpsAndAngles($text) {
+# Smart processing for ampersands and angle brackets that need to be encoded.
+
+	# Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
+	#   http://bumppo.net/projects/amputator/
 	$text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', 
 						 '&amp;', $text);;

@ -748,6 +1010,11 @@ function _EncodeAmpsAndAngles($text) {


 function _EncodeBackslashEscapes($text) {
+#
+#	Parameter:  String.
+#	Returns:    The string, with after processing the following backslash
+#				escape sequences.
+#
 	global $md_escape_table, $md_backslash_escape_table;
 	# Must process escaped backslashes first.
 	return str_replace(array_keys($md_backslash_escape_table),
@ -762,6 +1029,7 @@ function _DoAutoLinks($text) {
 	# Email addresses: <address@domain.foo>
 	$text = preg_replace('{
 		<
+        (?:mailto:)?
 		(
 			[-.\w]+
 			\@
@ -777,6 +1045,20 @@ function _DoAutoLinks($text) {


 function _EncodeEmailAddress($addr) {
+#
+#	Input: an email address, e.g. "foo@example.com"
+#
+#	Output: the email address as a mailto link, with each character
+#		of the address encoded as either a decimal or hex entity, in
+#		the hopes of foiling most address harvesting spam bots. E.g.:
+#
+#	  <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
+#		x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
+#		&#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
+#
+#	Based by a filter by Matthew Wickline, posted to the BBEdit-Talk
+#	mailing list: <http://tinyurl.com/yu7ue>
+#
 	$addr = "mailto:" . $addr;
 	$length = strlen($addr);

@ -802,56 +1084,199 @@ function _EncodeEmailAddress_callback($matches) {


 function _UnescapeSpecialChars($text) {
+#
+# Swap back in all the special characters we've hidden.
+#
 	global $md_escape_table;
 	return str_replace(array_values($md_escape_table), 
 					   array_keys($md_escape_table), $text);
 }


-if (!function_exists('_TokenizeHTML')) {
-	function _TokenizeHTML($str) {
-		$index = 0;
-		$tokens = array();
+# _TokenizeHTML is shared between PHP Markdown and PHP SmartyPants.
+# We only define it if it is not already defined.
+if (!function_exists('_TokenizeHTML')) :
+function _TokenizeHTML($str) {
+#
+#   Parameter:  String containing HTML markup.
+#   Returns:    An array of the tokens comprising the input
+#               string. Each token is either a tag (possibly with nested,
+#               tags contained therein, such as <a href="<MTFoo>">, or a
+#               run of text between tags. Each element of the array is a
+#               two-element array; the first is either 'tag' or 'text';
+#               the second is the actual value.
+#
+#
+#   Regular expression derived from the _tokenize() subroutine in 
+#   Brad Choate's MTRegex plugin.
+#   <http://www.bradchoate.com/past/mtregex.php>
+#
+	$index = 0;
+	$tokens = array();

-		$depth = 6;
-		$nested_tags = str_repeat('(?:<[a-z\/!$](?:[^<>]|',$depth)
-					   .str_repeat(')*>)', $depth);
-		$match = "(?s:<!(?:--.*?--\s*)+>)|".  # comment
-				 "(?s:<\?.*?\?>)|".         # processing instruction
-				 "$nested_tags";            # nested tags
+	$match = '(?s:<!(?:--.*?--\s*)+>)|'.	# comment
+			 '(?s:<\?.*?\?>)|'.				# processing instruction
+			 '(?:</?[\w:$]+\b(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*>)'; # regular tags

-		$parts = preg_split("/($match)/", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
+	$parts = preg_split("{($match)}", $str, -1, PREG_SPLIT_DELIM_CAPTURE);

-		foreach ($parts as $part) {
-			if (++$index % 2 && $part != '') 
-				array_push($tokens, array('text', $part));
-			else
-				array_push($tokens, array('tag', $part));
-		}
-
-		return $tokens;
+	foreach ($parts as $part) {
+		if (++$index % 2 && $part != '') 
+			array_push($tokens, array('text', $part));
+		else
+			array_push($tokens, array('tag', $part));
 	}
+
+	return $tokens;
 }
+endif;


 function _Outdent($text) {
+#
+# Remove one level of line-leading tabs or spaces
+#
 	global $md_tab_width;
 	return preg_replace("/^(\\t|[ ]{1,$md_tab_width})/m", "", $text);
 }


 function _Detab($text) {
+#
+# Replace tabs with the appropriate amount of space.
+#
 	global $md_tab_width;
-	$text = preg_replace(
-		"/(.*?)\t/e",
-		"'\\1'.str_repeat(' ', $md_tab_width - strlen('\\1') % $md_tab_width)",
-		$text);
+
+	# For each line we separate the line in blocks delemited by
+	# tab characters. Then we reconstruct the line adding the appropriate
+	# number of space charcters.
+	
+	$lines = explode("\n", $text);
+	$text = "";
+	
+	foreach ($lines as $line) {
+		# Split in blocks.
+		$blocks = explode("\t", $line);
+		# Add each blocks to the line.
+		$line = $blocks[0];
+		unset($blocks[0]); # Do not add first block twice.
+		foreach ($blocks as $block) {
+			# Calculate amount of space, insert spaces, insert block.
+			$amount = $md_tab_width - strlen($line) % $md_tab_width;
+			$line .= str_repeat(" ", $amount) . $block;
+		}
+		$text .= "$line\n";
+	}
 	return $text;
 }


 function _UnslashQuotes($text) {
+#
+#	This function is useful to remove automaticaly slashed double quotes
+#	when using preg_replace and evaluating an expression.
+#	Parameter:  String.
+#	Returns:    The string with any slash-double-quote (\") sequence replaced
+#				by a single double quote.
+#
 	return str_replace('\"', '"', $text);
 }

+
+/*
+
+PHP Markdown
+============
+
+Description
+-----------
+
+This is a PHP translation of the original Markdown formatter written in
+Perl by John Gruber.
+
+Markdown is a text-to-HTML filter; it translates an easy-to-read /
+easy-to-write structured text format into HTML. Markdown's text format
+is most similar to that of plain text email, and supports features such
+as headers, *emphasis*, code blocks, blockquotes, and links.
+
+Markdown's syntax is designed not as a generic markup language, but
+specifically to serve as a front-end to (X)HTML. You can use span-level
+HTML tags anywhere in a Markdown document, and you can use block level
+HTML tags (like <div> and <table> as well).
+
+For more information about Markdown's syntax, see:
+
+<http://daringfireball.net/projects/markdown/>
+
+
+Bugs
+----
+
+To file bug reports please send email to:
+
+<michel.fortin@michelf.com>
+
+Please include with your report: (1) the example input; (2) the output you
+expected; (3) the output Markdown actually produced.
+
+
+Version History
+--------------- 
+
+See the readme file for detailed release notes for this version.
+
+1.0.1 - 17 Dec 2004
+
+1.0 - 21 Aug 2004
+
+
+Author & Contributors
+---------------------
+
+Original Perl version by John Gruber  
+<http://daringfireball.net/>
+
+PHP port and other contributions by Michel Fortin  
+<http://www.michelf.com/>
+
+
+Copyright and License
+---------------------
+
+Copyright (c) 2004 Michel Fortin  
+<http://www.michelf.com/>  
+All rights reserved.
+
+Copyright (c) 2003-2004 John Gruber   
+<http://daringfireball.net/>   
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+*	Redistributions of source code must retain the above copyright notice,
+	this list of conditions and the following disclaimer.
+
+*	Redistributions in binary form must reproduce the above copyright
+	notice, this list of conditions and the following disclaimer in the
+	documentation and/or other materials provided with the distribution.
+
+*	Neither the name "Markdown" nor the names of its contributors may
+	be used to endorse or promote products derived from this software
+	without specific prior written permission.
+
+This software is provided by the copyright holders and contributors "as
+is" and any express or implied warranties, including, but not limited
+to, the implied warranties of merchantability and fitness for a
+particular purpose are disclaimed. In no event shall the copyright owner
+or contributors be liable for any direct, indirect, incidental, special,
+exemplary, or consequential damages (including, but not limited to,
+procurement of substitute goods or services; loss of use, data, or
+profits; or business interruption) however caused and on any theory of
+liability, whether in contract, strict liability, or tort (including
+negligence or otherwise) arising in any way out of the use of this
+software, even if advised of the possibility of such damage.
+
+*/
 ?>