From c747273ddd0656b348598b64c78b72d2d4ca23d1 Mon Sep 17 00:00:00 2001 From: wei <> Date: Thu, 1 Jun 2006 11:30:16 +0000 Subject: adding TMarkdown for markdown syntax --- framework/3rdParty/Markdown/License.text | 34 + framework/3rdParty/Markdown/MarkdownParser.php | 1257 ++++++++++++++++++++++++ 2 files changed, 1291 insertions(+) create mode 100644 framework/3rdParty/Markdown/License.text create mode 100644 framework/3rdParty/Markdown/MarkdownParser.php (limited to 'framework/3rdParty/Markdown') diff --git a/framework/3rdParty/Markdown/License.text b/framework/3rdParty/Markdown/License.text new file mode 100644 index 00000000..ea6a6a1a --- /dev/null +++ b/framework/3rdParty/Markdown/License.text @@ -0,0 +1,34 @@ +Copyright (c) 2004-2005, John Gruber + +All rights reserved. + +Copyright (c) 2004-2005, Michel Fortin + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +* Neither the name "Markdown" nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +This software is provided by the copyright holders and contributors "as +is" and any express or implied warranties, including, but not limited +to, the implied warranties of merchantability and fitness for a +particular purpose are disclaimed. In no event shall the copyright owner +or contributors be liable for any direct, indirect, incidental, special, +exemplary, or consequential damages (including, but not limited to, +procurement of substitute goods or services; loss of use, data, or +profits; or business interruption) however caused and on any theory of +liability, whether in contract, strict liability, or tort (including +negligence or otherwise) arising in any way out of the use of this +software, even if advised of the possibility of such damage. diff --git a/framework/3rdParty/Markdown/MarkdownParser.php b/framework/3rdParty/Markdown/MarkdownParser.php new file mode 100644 index 00000000..c0d2becf --- /dev/null +++ b/framework/3rdParty/Markdown/MarkdownParser.php @@ -0,0 +1,1257 @@ + +# +# Copyright (c) 2004-2005 Michel Fortin - PHP Port +# +# + +/** + * PHP5 version of the markdown parser. + * Usage: + * + * $markdown = new MarkdownParser; + * echo $markdown->parse($text); + * + */ +class MarkdownParser +{ + private static $md_nested_brackets; + private static $md_escape_table = array(); + private static $md_backslash_escape_table = array(); + private static $md_nested_brackets_depth = 6; + + protected $md_empty_element_suffix = " />"; # Change to ">" for HTML output + protected $md_tab_width = 4; + + private $md_list_level = 0; + private $md_urls = array(); + private $md_titles = array(); + private $md_html_blocks = array(); + + public function __construct() + { + if(is_null(self::$md_nested_brackets)) + $this->initialize(); + } + + private function initialize() + { + self::$md_nested_brackets = + str_repeat('(?>[^\[\]]+|\[', self::$md_nested_brackets_depth). + str_repeat('\])*', self::$md_nested_brackets_depth); + + self::$md_escape_table = array( + "\\" => md5("\\"), + "`" => md5("`"), + "*" => md5("*"), + "_" => md5("_"), + "{" => md5("{"), + "}" => md5("}"), + "[" => md5("["), + "]" => md5("]"), + "(" => md5("("), + ")" => md5(")"), + ">" => md5(">"), + "#" => md5("#"), + "+" => md5("+"), + "-" => md5("-"), + "." => md5("."), + "!" => md5("!") + ); + + # Table of hash values for escaped characters: + # Create an identical table but for escaped characters. + foreach (self::$md_escape_table as $key => $char) + self::$md_backslash_escape_table["\\$key"] = $char; + } + + public function parse($text) + { + # + # Main function. The order in which other subs are called here is + # essential. Link and image substitutions need to happen before + # _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the + # and tags get encoded. + # + # Clear the hashes. If we don't clear these, you get conflicts + # from other articles when generating a page which contains more than + # one article (e.g. an index page that shows the N most recent + # articles): + $this->md_urls = array(); + $this->md_titles = array(); + $this->md_html_blocks = array(); + + # Standardize line endings: + # DOS to Unix and Mac to Unix + $text = str_replace(array("\r\n", "\r"), "\n", $text); + + # Make sure $text ends with a couple of newlines: + $text .= "\n\n"; + + # Convert all tabs to spaces. + $text = $this->_Detab($text); + + # Strip any lines consisting only of spaces and tabs. + # This makes subsequent regexen easier to write, because we can + # match consecutive blank lines with /\n+/ instead of something + # contorted like /[ \t]*\n+/ . + $text = preg_replace('/^[ \t]+$/m', '', $text); + + # Turn block-level HTML blocks into hash entries + $text = $this->_HashHTMLBlocks($text); + + # Strip link definitions, store in hashes. + $text = $this->_StripLinkDefinitions($text); + + $text = $this->_RunBlockGamut($text); + + $text = $this->_UnescapeSpecialChars($text); + + return $text . "\n"; + } + + + private function _StripLinkDefinitions($text) { + # + # Strips link definitions from text, stores the URLs and titles in + # hash references. + # + $less_than_tab = $this->md_tab_width - 1; + + # Link defs are in the form: ^[id]: url "optional title" + $text = preg_replace_callback('{ + ^[ ]{0,'.$less_than_tab.'}\[(.+)\]: # id = $1 + [ \t]* + \n? # maybe *one* newline + [ \t]* + ? # url = $2 + [ \t]* + \n? # maybe one newline + [ \t]* + (?: + (?<=\s) # lookbehind for whitespace + ["(] + (.+?) # title = $3 + [")] + [ \t]* + )? # title is optional + (?:\n+|\Z) + }xm', + array($this,'_StripLinkDefinitions_callback'), + $text); + return $text; + } + + private function _StripLinkDefinitions_callback($matches) { + $link_id = strtolower($matches[1]); + $this->md_urls[$link_id] = $this->_EncodeAmpsAndAngles($matches[2]); + if (isset($matches[3])) + $this->md_titles[$link_id] = str_replace('"', '"', $matches[3]); + return ''; # String that will replace the block + } + + + private function _HashHTMLBlocks($text) { + $less_than_tab = $this->md_tab_width - 1; + + # Hashify HTML blocks: + # We only want to do this for block-level HTML tags, such as headers, + # lists, and tables. That's because we still want to wrap

s around + # "paragraphs" that are wrapped in non-block-level tags, such as anchors, + # phrase emphasis, and spans. The list of tags we're looking for is + # hard-coded: + $block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'. + 'script|noscript|form|fieldset|iframe|math|ins|del'; + $block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'. + 'script|noscript|form|fieldset|iframe|math'; + + # First, look for nested blocks, e.g.: + #

+ #
+ # tags for inner block must be indented. + #
+ #
+ # + # The outermost tags must start at the left margin for this to match, and + # the inner nested divs must be indented. + # We need to do this before the next, more liberal match, because the next + # match will start at the first `
` and stop at the first `
`. + $text = preg_replace_callback("{ + ( # save in $1 + ^ # start of line (with /m) + <($block_tags_a) # start tag = $2 + \\b # word break + (.*\\n)*? # any number of lines, minimally matching + # the matching end tag + [ \\t]* # trailing spaces/tabs + (?=\\n+|\\Z) # followed by a newline or end of document + ) + }xm", + array($this,'_HashHTMLBlocks_callback'), + $text); + + # + # Now match more liberally, simply from `\n` to `\n` + # + $text = preg_replace_callback("{ + ( # save in $1 + ^ # start of line (with /m) + <($block_tags_b) # start tag = $2 + \\b # word break + (.*\\n)*? # any number of lines, minimally matching + .* # the matching end tag + [ \\t]* # trailing spaces/tabs + (?=\\n+|\\Z) # followed by a newline or end of document + ) + }xm", + array($this,'_HashHTMLBlocks_callback'), + $text); + + # Special case just for
. It was easier to make a special case than + # to make the other regex more complicated. + $text = preg_replace_callback('{ + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in $1 + [ ]{0,'.$less_than_tab.'} + <(hr) # start tag = $2 + \b # word break + ([^<>])*? # + /?> # the matching end tag + [ \t]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + ) + }x', + array($this,'_HashHTMLBlocks_callback'), + $text); + + # Special case for standalone HTML comments: + $text = preg_replace_callback('{ + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in $1 + [ ]{0,'.$less_than_tab.'} + (?s: + + ) + [ \t]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + ) + }x', + array($this,'_HashHTMLBlocks_callback'), + $text); + + return $text; + } + private function _HashHTMLBlocks_callback($matches) { + $text = $matches[1]; + $key = md5($text); + $this->md_html_blocks[$key] = $text; + return "\n\n$key\n\n"; # String that will replace the block + } + + + private function _RunBlockGamut($text) { + # + # These are all the transformations that form block-level + # tags like paragraphs, headers, and list items. + # + $text = $this->_DoHeaders($text); + + # Do Horizontal Rules: + $text = preg_replace( + array('{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}mx', + '{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}mx', + '{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}mx'), + "\nmd_empty_element_suffix}\n", + $text); + + $text = $this->_DoLists($text); + $text = $this->_DoCodeBlocks($text); + $text = $this->_DoBlockQuotes($text); + + # We already ran _HashHTMLBlocks() before, in Markdown(), but that + # was to escape raw HTML in the original Markdown source. This time, + # we're escaping the markup we've just created, so that we don't wrap + #

tags around block-level tags. + $text = $this->_HashHTMLBlocks($text); + $text = $this->_FormParagraphs($text); + + return $text; + } + + + private function _RunSpanGamut($text) { + # + # These are all the transformations that occur *within* block-level + # tags like paragraphs, headers, and list items. + # + + $text = $this->_DoCodeSpans($text); + + $text = $this->_EscapeSpecialChars($text); + + # Process anchor and image tags. Images must come first, + # because ![foo][f] looks like an anchor. + $text = $this->_DoImages($text); + $text = $this->_DoAnchors($text); + + # Make links out of things like `` + # Must come after _DoAnchors(), because you can use < and > + # delimiters in inline links like [this](). + $text = $this->_DoAutoLinks($text); + $text = $this->_EncodeAmpsAndAngles($text); + $text = $this->_DoItalicsAndBold($text); + + # Do hard breaks: + $text = preg_replace('/ {2,}\n/', "md_empty_element_suffix}\n", $text); + + return $text; + } + + + private function _EscapeSpecialChars($text) { + $tokens = $this->_TokenizeHTML($text); + + $text = ''; # rebuild $text from the tokens + # $in_pre = 0; # Keep track of when we're inside

 or  tags.
+	#	$tags_to_skip = "!<(/?)(?:pre|code|kbd|script|math)[\s>]!";
+
+		foreach ($tokens as $cur_token) {
+			if ($cur_token[0] == 'tag') {
+				# Within tags, encode * and _ so they don't conflict
+				# with their use in Markdown for italics and strong.
+				# We're replacing each such character with its
+				# corresponding MD5 checksum value; this is likely
+				# overkill, but it should prevent us from colliding
+				# with the escape values by accident.
+				$cur_token[1] = str_replace(array('*', '_'),
+					array(self::$md_escape_table['*'], self::$md_escape_table['_']),
+					$cur_token[1]);
+				$text .= $cur_token[1];
+			} else {
+				$t = $cur_token[1];
+				$t = $this->_EncodeBackslashEscapes($t);
+				$text .= $t;
+			}
+		}
+		return $text;
+	}
+
+
+	private function _DoAnchors($text) {
+	#
+	# Turn Markdown link shortcuts into XHTML  tags.
+	#
+		#
+		# First, handle reference-style links: [link text] [id]
+		#
+		$bracket = self::$md_nested_brackets;
+		$text = preg_replace_callback("{
+			(					# wrap whole match in $1
+			  \\[
+				({$bracket})	# link text = $2
+			  \\]
+
+			  [ ]?				# one optional space
+			  (?:\\n[ ]*)?		# one optional newline followed by spaces
+
+			  \\[
+				(.*?)		# id = $3
+			  \\]
+			)
+			}xs",
+			array($this,'_DoAnchors_reference_callback'), $text);
+
+		#
+		# Next, inline-style links: [link text](url "optional title")
+		#
+		$text = preg_replace_callback("{
+			(				# wrap whole match in $1
+			  \\[
+				({$bracket})	# link text = $2
+			  \\]
+			  \\(			# literal paren
+				[ \\t]*
+				?	# href = $3
+				[ \\t]*
+				(			# $4
+				  (['\"])	# quote char = $5
+				  (.*?)		# Title = $6
+				  \\5		# matching quote
+				)?			# title is optional
+			  \\)
+			)
+			}xs",
+			array($this,'_DoAnchors_inline_callback'), $text);
+
+		return $text;
+	}
+	private function _DoAnchors_reference_callback($matches) {
+		$whole_match = $matches[1];
+		$link_text   = $matches[2];
+		$link_id     = strtolower($matches[3]);
+
+		if ($link_id == "") {
+			$link_id = strtolower($link_text); # for shortcut links like [this][].
+		}
+
+		if (isset($this->md_urls[$link_id])) {
+			$url = $this->md_urls[$link_id];
+			# We've got to encode these to avoid conflicting with italics/bold.
+			$url = str_replace(array('*', '_'),
+							   array(self::$md_escape_table['*'], self::$md_escape_table['_']),
+							   $url);
+			$result = "md_titles[$link_id] ) ) {
+				$title = $this->md_titles[$link_id];
+				$title = str_replace(array('*',     '_'),
+									 array(self::$md_escape_table['*'], 
+										   self::$md_escape_table['_']), $title);
+				$result .=  " title=\"$title\"";
+			}
+			$result .= ">$link_text";
+		}
+		else {
+			$result = $whole_match;
+		}
+		return $result;
+	}
+	private function _DoAnchors_inline_callback($matches) {
+		$whole_match	= $matches[1];
+		$link_text		= $matches[2];
+		$url			= $matches[3];
+		$title			=& $matches[6];
+
+		# We've got to encode these to avoid conflicting with italics/bold.
+		$url = str_replace(array('*', '_'),
+						   array(self::$md_escape_table['*'], self::$md_escape_table['_']), 
+						   $url);
+		$result = " tags.
+	#
+		#
+		# First, handle reference-style labeled images: ![alt text][id]
+		#
+		$text = preg_replace_callback('{
+			(				# wrap whole match in $1
+			  !\[
+				('.self::$md_nested_brackets.')		# alt text = $2
+			  \]
+
+			  [ ]?				# one optional space
+			  (?:\n[ ]*)?		# one optional newline followed by spaces
+
+			  \[
+				(.*?)		# id = $3
+			  \]
+
+			)
+			}xs', 
+			array($this,'_DoImages_reference_callback'), $text);
+
+		#
+		# Next, handle inline images:  ![alt text](url "optional title")
+		# Don't forget: encode * and _
+
+		$text = preg_replace_callback('{
+			(				# wrap whole match in $1
+			  !\[
+				('.self::$md_nested_brackets.')		# alt text = $2
+			  \]
+			  \(			# literal paren
+				[ \t]*
+				?	# src url = $3
+				[ \t]*
+				(			# $4
+				  ([\'"])	# quote char = $5
+				  (.*?)		# title = $6
+				  \5		# matching quote
+				  [ \t]*
+				)?			# title is optional
+			  \)
+			)
+			}xs',
+			array($this,'_DoImages_inline_callback'), $text);
+
+		return $text;
+	}
+	private function _DoImages_reference_callback($matches) {
+		$whole_match = $matches[1];
+		$alt_text    = $matches[2];
+		$link_id     = strtolower($matches[3]);
+
+		if ($link_id == "") {
+			$link_id = strtolower($alt_text); # for shortcut links like ![this][].
+		}
+
+		$alt_text = str_replace('"', '"', $alt_text);
+		if (isset($this->md_urls[$link_id])) {
+			$url = $this->md_urls[$link_id];
+			# We've got to encode these to avoid conflicting with italics/bold.
+			$url = str_replace(array('*', '_'),
+							   array(self::$md_escape_table['*'], self::$md_escape_table['_']),
+							   $url);
+			$result = "\"$alt_text\"";md_titles[$link_id])) {
+				$title = $this->md_titles[$link_id];
+				$title = str_replace(array('*', '_'),
+									 array(self::$md_escape_table['*'], 
+										   self::$md_escape_table['_']), $title);
+				$result .=  " title=\"$title\"";
+			}
+			$result .= $this->md_empty_element_suffix;
+		}
+		else {
+			# If there's no such link ID, leave intact:
+			$result = $whole_match;
+		}
+
+		return $result;
+	}
+	private function _DoImages_inline_callback($matches) {
+		$whole_match	= $matches[1];
+		$alt_text		= $matches[2];
+		$url			= $matches[3];
+		$title			= '';
+		if (isset($matches[6])) {
+			$title		= $matches[6];
+		}
+
+		$alt_text = str_replace('"', '"', $alt_text);
+		$title    = str_replace('"', '"', $title);
+		# We've got to encode these to avoid conflicting with italics/bold.
+		$url = str_replace(array('*', '_'),
+						   array(self::$md_escape_table['*'], self::$md_escape_table['_']),
+						   $url);
+		$result = "\"$alt_text\"";md_empty_element_suffix;
+
+		return $result;
+	}
+
+
+	private function _DoHeaders($text) {
+		# Setext-style headers:
+		#	  Header 1
+		#	  ========
+		#  
+		#	  Header 2
+		#	  --------
+		#
+		$text = preg_replace(
+			array('{ ^(.+)[ \t]*\n=+[ \t]*\n+ }emx',
+				  '{ ^(.+)[ \t]*\n-+[ \t]*\n+ }emx'),
+			array("'

'.\$this->_RunSpanGamut(\$this->_UnslashQuotes('\\1')).'

\n\n'", + "'

'.\$this->_RunSpanGamut(\$this->_UnslashQuotes('\\1')).'

\n\n'"), + $text); + + # atx-style headers: + # # Header 1 + # ## Header 2 + # ## Header 2 with closing hashes ## + # ... + # ###### Header 6 + # + $text = preg_replace("{ + ^(\\#{1,6}) # $1 = string of #'s + [ \\t]* + (.+?) # $2 = Header text + [ \\t]* + \\#* # optional closing #'s (not counted) + \\n+ + }xme", + "''.\$this->_RunSpanGamut(\$this->_UnslashQuotes('\\2')).'\n\n'", + $text); + + return $text; + } + + + private function _DoLists($text) { + # + # Form HTML ordered (numbered) and unordered (bulleted) lists. + # + $less_than_tab = $this->md_tab_width - 1; + + # Re-usable patterns to match list item bullets and number markers: + $marker_ul = '[*+-]'; + $marker_ol = '\d+[.]'; + $marker_any = "(?:$marker_ul|$marker_ol)"; + + $markers = array($marker_ul, $marker_ol); + + foreach ($markers as $marker) { + # Re-usable pattern to match any entirel ul or ol list: + $whole_list = ' + ( # $1 = whole list + ( # $2 + [ ]{0,'.$less_than_tab.'} + ('.$marker.') # $3 = first list item marker + [ \t]+ + ) + (?s:.+?) + ( # $4 + \z + | + \n{2,} + (?=\S) + (?! # Negative lookahead for another list item marker + [ \t]* + '.$marker.'[ \t]+ + ) + ) + ) + '; // mx + + # We use a different prefix before nested lists than top-level lists. + # See extended comment in _ProcessListItems(). + + if ($this->md_list_level) { + $text = preg_replace_callback('{ + ^ + '.$whole_list.' + }mx', + array($this,'_DoLists_callback_top'), $text); + } + else { + $text = preg_replace_callback('{ + (?:(?<=\n\n)|\A\n?) + '.$whole_list.' + }mx', + array($this,'_DoLists_callback_nested'), $text); + } + } + + return $text; + } + private function _DoLists_callback_top($matches) { + # Re-usable patterns to match list item bullets and number markers: + $marker_ul = '[*+-]'; + $marker_ol = '\d+[.]'; + $marker_any = "(?:$marker_ul|$marker_ol)"; + + $list = $matches[1]; + $list_type = preg_match("/$marker_ul/", $matches[3]) ? "ul" : "ol"; + + $marker_any = ( $list_type == "ul" ? $marker_ul : $marker_ol ); + + # Turn double returns into triple returns, so that we can make a + # paragraph for the last item in a list, if necessary: + $list = preg_replace("/\n{2,}/", "\n\n\n", $list); + $result = $this->_ProcessListItems($list, $marker_any); + + # Trim any trailing whitespace, to put the closing `` + # up on the preceding line, to get it past the current stupid + # HTML block parser. This is a hack to work around the terrible + # hack that is the HTML block parser. + $result = rtrim($result); + $result = "<$list_type>" . $result . "\n"; + return $result; + } + private function _DoLists_callback_nested($matches) { + # Re-usable patterns to match list item bullets and number markers: + $marker_ul = '[*+-]'; + $marker_ol = '\d+[.]'; + $marker_any = "(?:$marker_ul|$marker_ol)"; + + $list = $matches[1]; + $list_type = preg_match("/$marker_ul/", $matches[3]) ? "ul" : "ol"; + + $marker_any = ( $list_type == "ul" ? $marker_ul : $marker_ol ); + + # Turn double returns into triple returns, so that we can make a + # paragraph for the last item in a list, if necessary: + $list = preg_replace("/\n{2,}/", "\n\n\n", $list); + $result = $this->_ProcessListItems($list, $marker_any); + $result = "<$list_type>\n" . $result . "\n"; + return $result; + } + + + private function _ProcessListItems($list_str, $marker_any) { + # + # Process the contents of a single ordered or unordered list, splitting it + # into individual list items. + # + + # The $md_list_level keeps track of when we're inside a list. + # Each time we enter a list, we increment it; when we leave a list, + # we decrement. If it's zero, we're not in a list anymore. + # + # We do this because when we're not inside a list, we want to treat + # something like this: + # + # I recommend upgrading to version + # 8. Oops, now this line is treated + # as a sub-list. + # + # As a single paragraph, despite the fact that the second line starts + # with a digit-period-space sequence. + # + # Whereas when we're inside a list (or sub-list), that line will be + # treated as the start of a sub-list. What a kludge, huh? This is + # an aspect of Markdown's syntax that's hard to parse perfectly + # without resorting to mind-reading. Perhaps the solution is to + # change the syntax rules such that sub-lists must start with a + # starting cardinal number; e.g. "1." or "a.". + + $this->md_list_level++; + + # trim trailing blank lines: + $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); + + $list_str = preg_replace_callback('{ + (\n)? # leading line = $1 + (^[ \t]*) # leading whitespace = $2 + ('.$marker_any.') [ \t]+ # list marker = $3 + ((?s:.+?) # list item text = $4 + (\n{1,2})) + (?= \n* (\z | \2 ('.$marker_any.') [ \t]+)) + }xm', + array($this,'_ProcessListItems_callback'), $list_str); + + $this->md_list_level--; + return $list_str; + } + private function _ProcessListItems_callback($matches) { + $item = $matches[4]; + $leading_line =& $matches[1]; + $leading_space =& $matches[2]; + + if ($leading_line || preg_match('/\n{2,}/', $item)) { + $item = $this->_RunBlockGamut($this->_Outdent($item)); + } + else { + # Recursion for sub-lists: + $item = $this->_DoLists($this->_Outdent($item)); + $item = preg_replace('/\n+$/', '', $item); + $item = $this->_RunSpanGamut($item); + } + + return "
  • " . $item . "
  • \n"; + } + + + private function _DoCodeBlocks($text) { + # + # Process Markdown `
    ` blocks.
    +	#
    +		$text = preg_replace_callback('{
    +				(?:\n\n|\A)
    +				(	            # $1 = the code block -- one or more lines, starting with a space/tab
    +				  (?:
    +					(?:[ ]{'.$this->md_tab_width.'} | \t)  # Lines must start with a tab or a tab-width of spaces
    +					.*\n+
    +				  )+
    +				)
    +				((?=^[ ]{0,'.$this->md_tab_width.'}\S)|\Z)	# Lookahead for non-space at line-start, or end of doc
    +			}xm',
    +			array($this,'_DoCodeBlocks_callback'), $text);
    +
    +		return $text;
    +	}
    +	private function _DoCodeBlocks_callback($matches) {
    +		$codeblock = $matches[1];
    +
    +		$codeblock = $this->_EncodeCode($this->_Outdent($codeblock));
    +	//	$codeblock = _Detab($codeblock);
    +		# trim leading newlines and trailing whitespace
    +		$codeblock = preg_replace(array('/\A\n+/', '/\s+\z/'), '', $codeblock);
    +
    +		$result = "\n\n
    " . $codeblock . "\n
    \n\n"; + + return $result; + } + + + private function _DoCodeSpans($text) { + # + # * Backtick quotes are used for spans. + # + # * You can use multiple backticks as the delimiters if you want to + # include literal backticks in the code span. So, this input: + # + # Just type ``foo `bar` baz`` at the prompt. + # + # Will translate to: + # + #

    Just type foo `bar` baz at the prompt.

    + # + # There's no arbitrary limit to the number of backticks you + # can use as delimters. If you need three consecutive backticks + # in your code, use four for delimiters, etc. + # + # * You can use spaces to get literal backticks at the edges: + # + # ... type `` `bar` `` ... + # + # Turns to: + # + # ... type `bar` ... + # + $text = preg_replace_callback('@ + (?_EncodeCode($c); + return "$c"; + } + + + private function _EncodeCode($_) { + # + # Encode/escape certain characters inside Markdown code runs. + # The point is that in code, these characters are literals, + # and lose their special Markdown meanings. + # + # Encode all ampersands; HTML entities are not + # entities within a Markdown code span. + $_ = str_replace('&', '&', $_); + + # Do the angle bracket song and dance: + $_ = str_replace(array('<', '>'), + array('<', '>'), $_); + + # Now, escape characters that are magic in Markdown: + $_ = str_replace(array_keys(self::$md_escape_table), + array_values(self::$md_escape_table), $_); + + return $_; + } + + + private function _DoItalicsAndBold($text) { + # must go first: + $text = preg_replace('{ + ( # $1: Marker + (?\2', $text); + # Then : + $text = preg_replace( + '{ ( (?\2', $text); + + return $text; + } + + + private function _DoBlockQuotes($text) { + $text = preg_replace_callback('/ + ( # Wrap whole match in $1 + ( + ^[ \t]*>[ \t]? # ">" at the start of a line + .+\n # rest of the first line + (.+\n)* # subsequent consecutive lines + \n* # blanks + )+ + ) + /xm', + array($this,'_DoBlockQuotes_callback'), $text); + + return $text; + } + private function _DoBlockQuotes_callback($matches) { + $bq = $matches[1]; + # trim one level of quoting - trim whitespace-only lines + $bq = preg_replace(array('/^[ \t]*>[ \t]?/m', '/^[ \t]+$/m'), '', $bq); + $bq = $this->_RunBlockGamut($bq); # recurse + + $bq = preg_replace('/^/m', " ", $bq); + # These leading spaces screw with
     content, so we need to fix that:
    +		$bq = preg_replace_callback('{(\s*
    .+?
    )}sx', + array($this,'_DoBlockQuotes_callback2'), $bq); + + return "
    \n$bq\n
    \n\n"; + } + private function _DoBlockQuotes_callback2($matches) { + $pre = $matches[1]; + $pre = preg_replace('/^ /m', '', $pre); + return $pre; + } + + + private function _FormParagraphs($text) { + # + # Params: + # $text - string to process with html

    tags + # + # Strip leading and trailing lines: + $text = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $text); + + $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); + + # + # Wrap

    tags. + # + foreach ($grafs as $key => $value) { + if (!isset( $this->md_html_blocks[$value] )) { + $value = $this->_RunSpanGamut($value); + $value = preg_replace('/^([ \t]*)/', '

    ', $value); + $value .= "

    "; + $grafs[$key] = $value; + } + } + + # + # Unhashify HTML blocks + # + foreach ($grafs as $key => $value) { + if (isset( $this->md_html_blocks[$value] )) { + $grafs[$key] = $this->md_html_blocks[$value]; + } + } + + return implode("\n\n", $grafs); + } + + + private function _EncodeAmpsAndAngles($text) { + # Smart processing for ampersands and angle brackets that need to be encoded. + + # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: + # http://bumppo.net/projects/amputator/ + $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', + '&', $text);; + + # Encode naked <'s + $text = preg_replace('{<(?![a-z/?\$!])}i', '<', $text); + + return $text; + } + + + private function _EncodeBackslashEscapes($text) { + # + # Parameter: String. + # Returns: The string, with after processing the following backslash + # escape sequences. + # + # Must process escaped backslashes first. + return str_replace(array_keys(self::$md_backslash_escape_table), + array_values(self::$md_backslash_escape_table), $text); + } + + + private function _DoAutoLinks($text) { + $text = preg_replace("!<((https?|ftp):[^'\">\\s]+)>!", + '
    \1', $text); + + # Email addresses: + $text = preg_replace('{ + < + (?:mailto:)? + ( + [-.\w]+ + \@ + [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+ + ) + > + }exi', + "\$this->_EncodeEmailAddress(\$this->_UnescapeSpecialChars(\$this->_UnslashQuotes('\\1')))", + $text); + + return $text; + } + + + private function _EncodeEmailAddress($addr) { + # + # Input: an email address, e.g. "foo@example.com" + # + # Output: the email address as a mailto link, with each character + # of the address encoded as either a decimal or hex entity, in + # the hopes of foiling most address harvesting spam bots. E.g.: + # + # foo + # @example.com + # + # Based by a filter by Matthew Wickline, posted to the BBEdit-Talk + # mailing list: + # + $addr = "mailto:" . $addr; + $length = strlen($addr); + + # leave ':' alone (to spot mailto: later) + $addr = preg_replace_callback('/([^\:])/', + array($this,'_EncodeEmailAddress_callback'), $addr); + + $addr = "$addr"; + # strip the mailto: from the visible part + $addr = preg_replace('/">.+?:/', '">', $addr); + + return $addr; + } + private function _EncodeEmailAddress_callback($matches) { + $char = $matches[1]; + $r = rand(0, 100); + # roughly 10% raw, 45% hex, 45% dec + # '@' *must* be encoded. I insist. + if ($r > 90 && $char != '@') return $char; + if ($r < 45) return '&#x'.dechex(ord($char)).';'; + return '&#'.ord($char).';'; + } + + + private function _UnescapeSpecialChars($text) { + # + # Swap back in all the special characters we've hidden. + # + return str_replace(array_values(self::$md_escape_table), + array_keys(self::$md_escape_table), $text); + } + + + # _TokenizeHTML is shared between PHP Markdown and PHP SmartyPants. + # We only define it if it is not already defined. + + private function _TokenizeHTML($str) { + # + # Parameter: String containing HTML markup. + # Returns: An array of the tokens comprising the input + # string. Each token is either a tag (possibly with nested, + # tags contained therein, such as , or a + # run of text between tags. Each element of the array is a + # two-element array; the first is either 'tag' or 'text'; + # the second is the actual value. + # + # + # Regular expression derived from the _tokenize() subroutine in + # Brad Choate's MTRegex plugin. + # + # + $index = 0; + $tokens = array(); + + $match = '(?s:)|'. # comment + '(?s:<\?.*?\?>)|'. # processing instruction + # regular tags + '(?:<[/!$]?[-a-zA-Z0-9:]+\b(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*>)'; + + $parts = preg_split("{($match)}", $str, -1, PREG_SPLIT_DELIM_CAPTURE); + + foreach ($parts as $part) { + if (++$index % 2 && $part != '') + $tokens[] = array('text', $part); + else + $tokens[] = array('tag', $part); + } + + return $tokens; + } + + private function _Outdent($text) { + # + # Remove one level of line-leading tabs or spaces + # + return preg_replace("/^(\\t|[ ]{1,".$this->md_tab_width."})/m", "", $text); + } + + + private function _Detab($text) { + # + # Replace tabs with the appropriate amount of space. + # + # For each line we separate the line in blocks delemited by + # tab characters. Then we reconstruct every line by adding the + # appropriate number of space between each blocks. + + $lines = explode("\n", $text); + $text = ""; + + foreach ($lines as $line) { + # Split in blocks. + $blocks = explode("\t", $line); + # Add each blocks to the line. + $line = $blocks[0]; + unset($blocks[0]); # Do not add first block twice. + foreach ($blocks as $block) { + # Calculate amount of space, insert spaces, insert block. + $amount = $this->md_tab_width - strlen($line) % $this->md_tab_width; + $line .= str_repeat(" ", $amount) . $block; + } + $text .= "$line\n"; + } + return $text; + } + + + private function _UnslashQuotes($text) { + # + # This function is useful to remove automaticaly slashed double quotes + # when using preg_replace and evaluating an expression. + # Parameter: String. + # Returns: The string with any slash-double-quote (\") sequence replaced + # by a single double quote. + # + return str_replace('\"', '"', $text); + } +} + +/* + +PHP Markdown +============ + +Description +----------- + +This is a PHP translation of the original Markdown formatter written in +Perl by John Gruber. + +Markdown is a text-to-HTML filter; it translates an easy-to-read / +easy-to-write structured text format into HTML. Markdown's text format +is most similar to that of plain text email, and supports features such +as headers, *emphasis*, code blocks, blockquotes, and links. + +Markdown's syntax is designed not as a generic markup language, but +specifically to serve as a front-end to (X)HTML. You can use span-level +HTML tags anywhere in a Markdown document, and you can use block level +HTML tags (like
    and as well). + +For more information about Markdown's syntax, see: + + + + +Bugs +---- + +To file bug reports please send email to: + + + +Please include with your report: (1) the example input; (2) the output you +expected; (3) the output Markdown actually produced. + + +Version History +--------------- + +See the readme file for detailed release notes for this version. + +1.0.1c - 9 Dec 2005 + +1.0.1b - 6 Jun 2005 + +1.0.1a - 15 Apr 2005 + +1.0.1 - 16 Dec 2004 + +1.0 - 21 Aug 2004 + + +Author & Contributors +--------------------- + +Original Perl version by John Gruber + + +PHP port and other contributions by Michel Fortin + + + +Copyright and License +--------------------- + +Copyright (c) 2004-2005 Michel Fortin + +All rights reserved. + +Copyright (c) 2003-2004 John Gruber + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +* Neither the name "Markdown" nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +This software is provided by the copyright holders and contributors "as +is" and any express or implied warranties, including, but not limited +to, the implied warranties of merchantability and fitness for a +particular purpose are disclaimed. In no event shall the copyright owner +or contributors be liable for any direct, indirect, incidental, special, +exemplary, or consequential damages (including, but not limited to, +procurement of substitute goods or services; loss of use, data, or +profits; or business interruption) however caused and on any theory of +liability, whether in contract, strict liability, or tort (including +negligence or otherwise) arising in any way out of the use of this +software, even if advised of the possibility of such damage. + +*/ +?> -- cgit v1.2.3