1 <?php
2 #
3 # Markdown - A text-to-HTML conversion tool for web writers
4 #
5 # PHP Markdown
6 # Copyright (c) 2004-2012 Michel Fortin
7 # <http://michelf.com/projects/php-markdown/>
8 #
9 # Original Markdown
10 # Copyright (c) 2004-2006 John Gruber
11 # <http://daringfireball.net/projects/markdown/>
12 #
13
14 define( 'MARKDOWN_VERSION', "1.0.1o" ); # Sun 8 Jan 2012
15
16
17 #
18 # Global default settings:
19 #
20
21 # Change to ">" for HTML output
22 @define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX', " />");
23
24 # Define the width of a tab for code blocks.
25 @define( 'MARKDOWN_TAB_WIDTH', 4 );
26
27 # Optional title attribute for footnote links and backlinks.
28 @define( 'MARKDOWN_FN_LINK_TITLE', "" );
29 @define( 'MARKDOWN_FN_BACKLINK_TITLE', "" );
30
31 # Optional class attribute for footnote links and backlinks.
32 @define( 'MARKDOWN_FN_LINK_CLASS', "" );
33 @define( 'MARKDOWN_FN_BACKLINK_CLASS', "" );
34
35
36 #
37 # WordPress settings:
38 #
39
40 # Change to false to remove Markdown from posts and/or comments.
41 @define( 'MARKDOWN_WP_POSTS', true );
42 @define( 'MARKDOWN_WP_COMMENTS', true );
43
44
45
46 ### Standard Function Interface ###
47
48 if (!defined('MARKDOWN_PARSER_CLASS'))
49 {
50 define( 'MARKDOWN_PARSER_CLASS', 'MarkdownExtra_Parser' );
51 }
52
53 function Markdown($text) {
54 #
55 # Initialize the parser and return the result of its transform method.
56 #
57 # Setup static parser variable.
58 static $parser;
59 if (!isset($parser)) {
60 $parser_class = MARKDOWN_PARSER_CLASS;
61 $parser = new $parser_class;
62 }
63
64 # Transform text using parser.
65 return $parser->transform($text);
66 }
67
68
69 ### WordPress Plugin Interface ###
70
71 /*
72 Plugin Name: Markdown Extra
73 Plugin URI: http://michelf.com/projects/php-markdown/
74 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://michelf.com/projects/php-markdown/">More...</a>
75 Version: 1.2.4
76 Author: Michel Fortin
77 Author URI: http://michelf.com/
78 */
79
80 if (isset($wp_version)) {
81 # More details about how it works here:
82 # <http://michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
83
84 # Post content and excerpts
85 # - Remove WordPress paragraph generator.
86 # - Run Markdown on excerpt, then remove all tags.
87 # - Add paragraph tag around the excerpt, but remove it for the excerpt rss.
88 if (MARKDOWN_WP_POSTS) {
89 remove_filter('the_content', 'wpautop');
90 remove_filter('the_content_rss', 'wpautop');
91 remove_filter('the_excerpt', 'wpautop');
92 add_filter('the_content', 'mdwp_MarkdownPost', 6);
93 add_filter('the_content_rss', 'mdwp_MarkdownPost', 6);
94 add_filter('get_the_excerpt', 'mdwp_MarkdownPost', 6);
95 add_filter('get_the_excerpt', 'trim', 7);
96 add_filter('the_excerpt', 'mdwp_add_p');
97 add_filter('the_excerpt_rss', 'mdwp_strip_p');
98
99 remove_filter('content_save_pre', 'balanceTags', 50);
100 remove_filter('excerpt_save_pre', 'balanceTags', 50);
101 add_filter('the_content', 'balanceTags', 50);
102 add_filter('get_the_excerpt', 'balanceTags', 9);
103 }
104
105 # Add a footnote id prefix to posts when inside a loop.
106 function mdwp_MarkdownPost($text) {
107 static $parser;
108 if (!$parser) {
109 $parser_class = MARKDOWN_PARSER_CLASS;
110 $parser = new $parser_class;
111 }
112 if (is_single() || is_page() || is_feed()) {
113 $parser->fn_id_prefix = "";
114 } else {
115 $parser->fn_id_prefix = get_the_ID() . ".";
116 }
117 return $parser->transform($text);
118 }
119
120 # Comments
121 # - Remove WordPress paragraph generator.
122 # - Remove WordPress auto-link generator.
123 # - Scramble important tags before passing them to the kses filter.
124 # - Run Markdown on excerpt then remove paragraph tags.
125 if (MARKDOWN_WP_COMMENTS) {
126 remove_filter('comment_text', 'wpautop', 30);
127 remove_filter('comment_text', 'make_clickable');
128 add_filter('pre_comment_content', 'Markdown', 6);
129 add_filter('pre_comment_content', 'mdwp_hide_tags', 8);
130 add_filter('pre_comment_content', 'mdwp_show_tags', 12);
131 add_filter('get_comment_text', 'Markdown', 6);
132 add_filter('get_comment_excerpt', 'Markdown', 6);
133 add_filter('get_comment_excerpt', 'mdwp_strip_p', 7);
134
135 global $mdwp_hidden_tags, $mdwp_placeholders;
136 $mdwp_hidden_tags = explode(' ',
137 '<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>');
138 $mdwp_placeholders = explode(' ', str_rot13(
139 'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '.
140 'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli'));
141 }
142
143 function mdwp_add_p($text) {
144 if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) {
145 $text = '<p>'.$text.'</p>';
146 $text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text);
147 }
148 return $text;
149 }
150
151 function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); }
152
153 function mdwp_hide_tags($text) {
154 global $mdwp_hidden_tags, $mdwp_placeholders;
155 return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text);
156 }
157 function mdwp_show_tags($text) {
158 global $mdwp_hidden_tags, $mdwp_placeholders;
159 return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text);
160 }
161 }
162
163
164 ### bBlog Plugin Info ###
165
166 function identify_modifier_markdown() {
167 return array(
168 'name' => 'markdown',
169 'type' => 'modifier',
170 'nicename' => 'PHP Markdown Extra',
171 'description' => 'A text-to-HTML conversion tool for web writers',
172 'authors' => 'Michel Fortin and John Gruber',
173 'licence' => 'GPL',
174 'version' => MARKDOWNEXTRA_VERSION,
175 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://michelf.com/projects/php-markdown/">More...</a>',
176 );
177 }
178
179
180 ### Smarty Modifier Interface ###
181
182 function smarty_modifier_markdown($text) {
183 return Markdown($text);
184 }
185
186
187 ### Textile Compatibility Mode ###
188
189 # Rename this file to "classTextile.php" and it can replace Textile everywhere.
190
191 if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
192 # Try to include PHP SmartyPants. Should be in the same directory.
193 @include_once 'smartypants.php';
194 # Fake Textile class. It calls Markdown instead.
195 class Textile {
196 function TextileThis($text, $lite='', $encode='') {
197 if ($lite == '' && $encode == '') $text = Markdown($text);
198 if (function_exists('SmartyPants')) $text = SmartyPants($text);
199 return $text;
200 }
201 # Fake restricted version: restrictions are not supported for now.
202 function TextileRestricted($text, $lite='', $noimage='') {
203 return $this->TextileThis($text, $lite);
204 }
205 # Workaround to ensure compatibility with TextPattern 4.0.3.
206 function blockLite($text) { return $text; }
207 }
208 }
209
210
211
212 #
213 # Markdown Parser Class
214 #
215
216 class Markdown_Parser {
217
218 # Regex to match balanced [brackets].
219 # Needed to insert a maximum bracked depth while converting to PHP.
220 var $nested_brackets_depth = 6;
221 var $nested_brackets_re;
222
223 var $nested_url_parenthesis_depth = 4;
224 var $nested_url_parenthesis_re;
225
226 # Table of hash values for escaped characters:
227 var $escape_chars = '\`*_{}[]()>#+-.!';
228 var $escape_chars_re;
229
230 # Change to ">" for HTML output.
231 var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;
232 var $tab_width = MARKDOWN_TAB_WIDTH;
233
234 # Change to `true` to disallow markup or entities.
235 var $no_markup = false;
236 var $no_entities = false;
237
238 # Predefined urls and titles for reference links and images.
239 var $predef_urls = array();
240 var $predef_titles = array();
241
242
243 function Markdown_Parser() {
244 #
245 # Constructor function. Initialize appropriate member variables.
246 #
247 $this->_initDetab();
248 $this->prepareItalicsAndBold();
249
250 $this->nested_brackets_re =
251 str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
252 str_repeat('\])*', $this->nested_brackets_depth);
253
254 $this->nested_url_parenthesis_re =
255 str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
256 str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
257
258 $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
259
260 # Sort document, block, and span gamut in ascendent priority order.
261 asort($this->early_gamut); // @olvlv
262 asort($this->document_gamut);
263 asort($this->block_gamut);
264 asort($this->span_gamut);
265 }
266
267
268 # Internal hashes used during transformation.
269 var $urls = array();
270 var $titles = array();
271 var $html_hashes = array();
272
273 # Status flag to avoid invalid nesting.
274 var $in_anchor = false;
275
276
277 function setup() {
278 #
279 # Called before the transformation process starts to setup parser
280 # states.
281 #
282 # Clear global hashes.
283 $this->urls = $this->predef_urls;
284 $this->titles = $this->predef_titles;
285 $this->html_hashes = array();
286
287 $in_anchor = false;
288 }
289
290 function teardown() {
291 #
292 # Called after the transformation process to clear any variable
293 # which may be taking up memory unnecessarly.
294 #
295 $this->urls = array();
296 $this->titles = array();
297 $this->html_hashes = array();
298 }
299
300
301 function transform($text) {
302 #
303 # Main function. Performs some preprocessing on the input text
304 # and pass it through the document gamut.
305 #
306 $this->setup();
307
308 # Remove UTF-8 BOM and marker character in input, if present.
309 $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
310
311 # Standardize line endings:
312 # DOS to Unix and Mac to Unix
313 $text = preg_replace('{\r\n?}', "\n", $text);
314
315 # Make sure $text ends with a couple of newlines:
316 $text .= "\n\n";
317
318 # Convert all tabs to spaces.
319 $text = $this->detab($text);
320
321 # Run document gamut methods. // @olvlvl
322 foreach ($this->early_gamut as $method => $priority) {
323 $text = $this->$method($text);
324 }
325
326 # Turn block-level HTML blocks into hash entries
327 $text = $this->hashHTMLBlocks($text);
328
329 # Strip any lines consisting only of spaces and tabs.
330 # This makes subsequent regexen easier to write, because we can
331 # match consecutive blank lines with /\n+/ instead of something
332 # contorted like /[ ]*\n+/ .
333 $text = preg_replace('/^[ ]+$/m', '', $text);
334
335 # Run document gamut methods.
336 foreach ($this->document_gamut as $method => $priority) {
337 $text = $this->$method($text);
338 }
339
340 $this->teardown();
341
342 return $text . "\n";
343 }
344
345 public $early_gamut = array
346 (
347 'doFencedCodeBlocks' => 5
348 );
349
350 var $document_gamut = array
351 (
352 # Strip link definitions, store in hashes.
353 "stripLinkDefinitions" => 20,
354
355 "runBasicBlockGamut" => 30,
356 );
357
358 /**
359 * @var array These are all the transformations that form block-level tags like paragraphs, headers, and
360 * list items.
361 */
362 public $block_gamut = array
363 (
364 "doHeaders" => 10,
365 "doHorizontalRules" => 20,
366
367 "doLists" => 40,
368 "doCodeBlocks" => 50,
369 "doBlockQuotes" => 60,
370 );
371
372 function stripLinkDefinitions($text) {
373 #
374 # Strips link definitions from text, stores the URLs and titles in
375 # hash references.
376 #
377 $less_than_tab = $this->tab_width - 1;
378
379 # Link defs are in the form: ^[id]: url "optional title"
380 $text = preg_replace_callback('{
381 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
382 [ ]*
383 \n? # maybe *one* newline
384 [ ]*
385 (?:
386 <(.+?)> # url = $2
387 |
388 (\S+?) # url = $3
389 )
390 [ ]*
391 \n? # maybe one newline
392 [ ]*
393 (?:
394 (?<=\s) # lookbehind for whitespace
395 ["(]
396 (.*?) # title = $4
397 [")]
398 [ ]*
399 )? # title is optional
400 (?:\n+|\Z)
401 }xm',
402 array(&$this, '_stripLinkDefinitions_callback'),
403 $text);
404 return $text;
405 }
406 function _stripLinkDefinitions_callback($matches) {
407 $link_id = strtolower($matches[1]);
408 $url = $matches[2] == '' ? $matches[3] : $matches[2];
409 $this->urls[$link_id] = $url;
410 $this->titles[$link_id] =& $matches[4];
411 return ''; # String that will replace the block
412 }
413
414
415 function hashHTMLBlocks($text) {
416 if ($this->no_markup) return $text;
417
418 $less_than_tab = $this->tab_width - 1;
419
420 # Hashify HTML blocks:
421 # We only want to do this for block-level HTML tags, such as headers,
422 # lists, and tables. That's because we still want to wrap <p>s around
423 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
424 # phrase emphasis, and spans. The list of tags we're looking for is
425 # hard-coded:
426 #
427 # * List "a" is made of tags which can be both inline or block-level.
428 # These will be treated block-level when the start tag is alone on
429 # its line, otherwise they're not matched here and will be taken as
430 # inline later.
431 # * List "b" is made of tags which are always block-level;
432 #
433 $block_tags_a_re = 'ins|del';
434 $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
435 'script|noscript|form|fieldset|iframe|math';
436
437 # Regular expression for the content of a block tag.
438 $nested_tags_level = 4;
439 $attr = '
440 (?> # optional tag attributes
441 \s # starts with whitespace
442 (?>
443 [^>"/]+ # text outside quotes
444 |
445 /+(?!>) # slash not followed by ">"
446 |
447 "[^"]*" # text inside double quotes (tolerate ">")
448 |
449 \'[^\']*\' # text inside single quotes (tolerate ">")
450 )*
451 )?
452 ';
453 $content =
454 str_repeat('
455 (?>
456 [^<]+ # content without tag
457 |
458 <\2 # nested opening tag
459 '.$attr.' # attributes
460 (?>
461 />
462 |
463 >', $nested_tags_level). # end of opening tag
464 '.*?'. # last level nested tag content
465 str_repeat('
466 </\2\s*> # closing nested tag
467 )
468 |
469 <(?!/\2\s*> # other tags with a different name
470 )
471 )*',
472 $nested_tags_level);
473 $content2 = str_replace('\2', '\3', $content);
474
475 # First, look for nested blocks, e.g.:
476 # <div>
477 # <div>
478 # tags for inner block must be indented.
479 # </div>
480 # </div>
481 #
482 # The outermost tags must start at the left margin for this to match, and
483 # the inner nested divs must be indented.
484 # We need to do this before the next, more liberal match, because the next
485 # match will start at the first `<div>` and stop at the first `</div>`.
486 $text = preg_replace_callback('{(?>
487 (?>
488 (?<=\n\n) # Starting after a blank line
489 | # or
490 \A\n? # the beginning of the doc
491 )
492 ( # save in $1
493
494 # Match from `\n<tag>` to `</tag>\n`, handling nested tags
495 # in between.
496
497 [ ]{0,'.$less_than_tab.'}
498 <('.$block_tags_b_re.')# start tag = $2
499 '.$attr.'> # attributes followed by > and \n
500 '.$content.' # content, support nesting
501 </\2> # the matching end tag
502 [ ]* # trailing spaces/tabs
503 (?=\n+|\Z) # followed by a newline or end of document
504
505 | # Special version for tags of group a.
506
507 [ ]{0,'.$less_than_tab.'}
508 <('.$block_tags_a_re.')# start tag = $3
509 '.$attr.'>[ ]*\n # attributes followed by >
510 '.$content2.' # content, support nesting
511 </\3> # the matching end tag
512 [ ]* # trailing spaces/tabs
513 (?=\n+|\Z) # followed by a newline or end of document
514
515 | # Special case just for <hr />. It was easier to make a special
516 # case than to make the other regex more complicated.
517
518 [ ]{0,'.$less_than_tab.'}
519 <(hr) # start tag = $2
520 '.$attr.' # attributes
521 /?> # the matching end tag
522 [ ]*
523 (?=\n{2,}|\Z) # followed by a blank line or end of document
524
525 | # Special case for standalone HTML comments:
526
527 [ ]{0,'.$less_than_tab.'}
528 (?s:
529 <!-- .*? -->
530 )
531 [ ]*
532 (?=\n{2,}|\Z) # followed by a blank line or end of document
533
534 | # PHP and ASP-style processor instructions (<? and <%)
535
536 [ ]{0,'.$less_than_tab.'}
537 (?s:
538 <([?%]) # $2
539 .*?
540 \2>
541 )
542 [ ]*
543 (?=\n{2,}|\Z) # followed by a blank line or end of document
544
545 )
546 )}Sxmi',
547 array(&$this, '_hashHTMLBlocks_callback'),
548 $text);
549
550 return $text;
551 }
552 function _hashHTMLBlocks_callback($matches) {
553 $text = $matches[1];
554 $key = $this->hashBlock($text);
555 return "\n\n$key\n\n";
556 }
557
558
559 function hashPart($text, $boundary = 'X') {
560 #
561 # Called whenever a tag must be hashed when a function insert an atomic
562 # element in the text stream. Passing $text to through this function gives
563 # a unique text-token which will be reverted back when calling unhash.
564 #
565 # The $boundary argument specify what character should be used to surround
566 # the token. By convension, "B" is used for block elements that needs not
567 # to be wrapped into paragraph tags at the end, ":" is used for elements
568 # that are word separators and "X" is used in the general case.
569 #
570 # Swap back any tag hash found in $text so we do not have to `unhash`
571 # multiple times at the end.
572 $text = $this->unhash($text);
573
574 # Then hash the block.
575 static $i = 0;
576 $key = "$boundary\x1A" . ++$i . $boundary;
577 $this->html_hashes[$key] = $text;
578 return $key; # String that will replace the tag.
579 }
580
581
582 function hashBlock($text) {
583 #
584 # Shortcut function for hashPart with block-level boundaries.
585 #
586 return $this->hashPart($text, 'B');
587 }
588
589 function runBlockGamut($text) {
590 #
591 # Run block gamut tranformations.
592 #
593 # We need to escape raw HTML in Markdown source before doing anything
594 # else. This need to be done for each block, and not only at the
595 # begining in the Markdown function since hashed blocks can be part of
596 # list items and could have been indented. Indented blocks would have
597 # been seen as a code block in a previous pass of hashHTMLBlocks.
598 $text = $this->hashHTMLBlocks($text);
599
600 return $this->runBasicBlockGamut($text);
601 }
602
603 /**
604 * Run block gamut tranformations, without hashing HTML blocks. This is useful when HTML
605 * blocks are known to be already hashed, like in the first whole-document pass.
606 *
607 * @return string
608 */
609 function runBasicBlockGamut($text)
610 {
611 foreach ($this->block_gamut as $method => $priority)
612 {
613 $text = $this->$method($text);
614 }
615
616 #
617 # Finally form paragraph and restore hashed blocks.
618 #
619
620 return $this->formParagraphs($text);
621 }
622
623
624 function doHorizontalRules($text) {
625 # Do Horizontal Rules:
626 return preg_replace(
627 '{
628 ^[ ]{0,3} # Leading space
629 ([-*_]) # $1: First marker
630 (?> # Repeated marker group
631 [ ]{0,2} # Zero, one, or two spaces.
632 \1 # Marker character
633 ){2,} # Group repeated at least twice
634 [ ]* # Tailing spaces
635 $ # End of line.
636 }mx',
637 "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
638 $text);
639 }
640
641
642 var $span_gamut = array(
643 #
644 # These are all the transformations that occur *within* block-level
645 # tags like paragraphs, headers, and list items.
646 #
647 # Process character escapes, code spans, and inline HTML
648 # in one shot.
649 "parseSpan" => -30,
650
651 # Process anchor and image tags. Images must come first,
652 # because ![foo][f] looks like an anchor.
653 "doImages" => 10,
654 "doAnchors" => 20,
655
656 # Make links out of things like `<http://example.com/>`
657 # Must come after doAnchors, because you can use < and >
658 # delimiters in inline links like [this](<url>).
659 "doAutoLinks" => 30,
660 "encodeAmpsAndAngles" => 40,
661
662 "doItalicsAndBold" => 50,
663 "doHardBreaks" => 60,
664 );
665
666 function runSpanGamut($text) {
667 #
668 # Run span gamut tranformations.
669 #
670 foreach ($this->span_gamut as $method => $priority) {
671 $text = $this->$method($text);
672 }
673
674 return $text;
675 }
676
677
678 function doHardBreaks($text) {
679 # Do hard breaks:
680 return preg_replace_callback('/ {2,}\n/',
681 array(&$this, '_doHardBreaks_callback'), $text);
682 }
683 function _doHardBreaks_callback($matches) {
684 return $this->hashPart("<br$this->empty_element_suffix\n");
685 }
686
687
688 function doAnchors($text) {
689 #
690 # Turn Markdown link shortcuts into XHTML <a> tags.
691 #
692 if ($this->in_anchor) return $text;
693 $this->in_anchor = true;
694
695 #
696 # First, handle reference-style links: [link text] [id]
697 #
698 $text = preg_replace_callback('{
699 ( # wrap whole match in $1
700 \[
701 ('.$this->nested_brackets_re.') # link text = $2
702 \]
703
704 [ ]? # one optional space
705 (?:\n[ ]*)? # one optional newline followed by spaces
706
707 \[
708 (.*?) # id = $3
709 \]
710 )
711 }xs',
712 array(&$this, '_doAnchors_reference_callback'), $text);
713
714 #
715 # Next, inline-style links: [link text](url "optional title")
716 #
717 $text = preg_replace_callback('{
718 ( # wrap whole match in $1
719 \[
720 ('.$this->nested_brackets_re.') # link text = $2
721 \]
722 \( # literal paren
723 [ \n]*
724 (?:
725 <(.+?)> # href = $3
726 |
727 ('.$this->nested_url_parenthesis_re.') # href = $4
728 )
729 [ \n]*
730 ( # $5
731 ([\'"]) # quote char = $6
732 (.*?) # Title = $7
733 \6 # matching quote
734 [ \n]* # ignore any spaces/tabs between closing quote and )
735 )? # title is optional
736 \)
737 )
738 }xs',
739 array(&$this, '_doAnchors_inline_callback'), $text);
740
741 #
742 # Last, handle reference-style shortcuts: [link text]
743 # These must come last in case you've also got [link text][1]
744 # or [link text](/foo)
745 #
746 $text = preg_replace_callback('{
747 ( # wrap whole match in $1
748 \[
749 ([^\[\]]+) # link text = $2; can\'t contain [ or ]
750 \]
751 )
752 }xs',
753 array(&$this, '_doAnchors_reference_callback'), $text);
754
755 $this->in_anchor = false;
756 return $text;
757 }
758 function _doAnchors_reference_callback($matches) {
759 $whole_match = $matches[1];
760 $link_text = $matches[2];
761 $link_id =& $matches[3];
762
763 if ($link_id == "") {
764 # for shortcut links like [this][] or [this].
765 $link_id = $link_text;
766 }
767
768 # lower-case and turn embedded newlines into spaces
769 $link_id = strtolower($link_id);
770 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
771
772 if (isset($this->urls[$link_id])) {
773 $url = $this->urls[$link_id];
774 $url = $this->encodeAttribute($url);
775
776 $result = "<a href=\"$url\"";
777 if ( isset( $this->titles[$link_id] ) ) {
778 $title = $this->titles[$link_id];
779 $title = $this->encodeAttribute($title);
780 $result .= " title=\"$title\"";
781 }
782
783 $link_text = $this->runSpanGamut($link_text);
784 $result .= ">$link_text</a>";
785 $result = $this->hashPart($result);
786 }
787 else {
788 $result = $whole_match;
789 }
790 return $result;
791 }
792 function _doAnchors_inline_callback($matches) {
793 $whole_match = $matches[1];
794 $link_text = $this->runSpanGamut($matches[2]);
795 $url = $matches[3] == '' ? $matches[4] : $matches[3];
796 $title =& $matches[7];
797
798 $url = $this->encodeAttribute($url);
799
800 $result = "<a href=\"$url\"";
801 if (isset($title)) {
802 $title = $this->encodeAttribute($title);
803 $result .= " title=\"$title\"";
804 }
805
806 $link_text = $this->runSpanGamut($link_text);
807 $result .= ">$link_text</a>";
808
809 return $this->hashPart($result);
810 }
811
812
813 function doImages($text) {
814 #
815 # Turn Markdown image shortcuts into <img> tags.
816 #
817 #
818 # First, handle reference-style labeled images: ![alt text][id]
819 #
820 $text = preg_replace_callback('{
821 ( # wrap whole match in $1
822 !\[
823 ('.$this->nested_brackets_re.') # alt text = $2
824 \]
825
826 [ ]? # one optional space
827 (?:\n[ ]*)? # one optional newline followed by spaces
828
829 \[
830 (.*?) # id = $3
831 \]
832
833 )
834 }xs',
835 array(&$this, '_doImages_reference_callback'), $text);
836
837 #
838 # Next, handle inline images: ![alt text](url "optional title")
839 # Don't forget: encode * and _
840 #
841 $text = preg_replace_callback('{
842 ( # wrap whole match in $1
843 !\[
844 ('.$this->nested_brackets_re.') # alt text = $2
845 \]
846 \s? # One optional whitespace character
847 \( # literal paren
848 [ \n]*
849 (?:
850 <(\S*)> # src url = $3
851 |
852 ('.$this->nested_url_parenthesis_re.') # src url = $4
853 )
854 [ \n]*
855 ( # $5
856 ([\'"]) # quote char = $6
857 (.*?) # title = $7
858 \6 # matching quote
859 [ \n]*
860 )? # title is optional
861 \)
862 )
863 }xs',
864 array(&$this, '_doImages_inline_callback'), $text);
865
866 return $text;
867 }
868 function _doImages_reference_callback($matches) {
869 $whole_match = $matches[1];
870 $alt_text = $matches[2];
871 $link_id = strtolower($matches[3]);
872
873 if ($link_id == "") {
874 $link_id = strtolower($alt_text); # for shortcut links like ![this][].
875 }
876
877 $alt_text = $this->encodeAttribute($alt_text);
878 if (isset($this->urls[$link_id])) {
879 $url = $this->encodeAttribute($this->urls[$link_id]);
880 $result = "<img src=\"$url\" alt=\"$alt_text\"";
881 if (isset($this->titles[$link_id])) {
882 $title = $this->titles[$link_id];
883 $title = $this->encodeAttribute($title);
884 $result .= " title=\"$title\"";
885 }
886 $result .= $this->empty_element_suffix;
887 $result = $this->hashPart($result);
888 }
889 else {
890 # If there's no such link ID, leave intact:
891 $result = $whole_match;
892 }
893
894 return $result;
895 }
896 function _doImages_inline_callback($matches) {
897 $whole_match = $matches[1];
898 $alt_text = $matches[2];
899 $url = $matches[3] == '' ? $matches[4] : $matches[3];
900 $title =& $matches[7];
901
902 $alt_text = $this->encodeAttribute($alt_text);
903 $url = $this->encodeAttribute($url);
904 $result = "<img src=\"$url\" alt=\"$alt_text\"";
905 if (isset($title)) {
906 $title = $this->encodeAttribute($title);
907 $result .= " title=\"$title\""; # $title already quoted
908 }
909 $result .= $this->empty_element_suffix;
910
911 return $this->hashPart($result);
912 }
913
914
915 function doHeaders($text) {
916 # Setext-style headers:
917 # Header 1
918 # ========
919 #
920 # Header 2
921 # --------
922 #
923 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
924 array(&$this, '_doHeaders_callback_setext'), $text);
925
926 # atx-style headers:
927 # # Header 1
928 # ## Header 2
929 # ## Header 2 with closing hashes ##
930 # ...
931 # ###### Header 6
932 #
933 $text = preg_replace_callback('{
934 ^(\#{1,6}) # $1 = string of #\'s
935 [ ]*
936 (.+?) # $2 = Header text
937 [ ]*
938 \#* # optional closing #\'s (not counted)
939 \n+
940 }xm',
941 array(&$this, '_doHeaders_callback_atx'), $text);
942
943 return $text;
944 }
945 function _doHeaders_callback_setext($matches) {
946 # Terrible hack to check we haven't found an empty list item.
947 if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
948 return $matches[0];
949
950 $level = $matches[2]{0} == '=' ? 1 : 2;
951 $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
952 return "\n" . $this->hashBlock($block) . "\n\n";
953 }
954 function _doHeaders_callback_atx($matches) {
955 $level = strlen($matches[1]);
956 $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
957 return "\n" . $this->hashBlock($block) . "\n\n";
958 }
959
960
961 function doLists($text) {
962 #
963 # Form HTML ordered (numbered) and unordered (bulleted) lists.
964 #
965 $less_than_tab = $this->tab_width - 1;
966
967 # Re-usable patterns to match list item bullets and number markers:
968 $marker_ul_re = '[*+-]';
969 $marker_ol_re = '\d+[\.]';
970 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
971
972 $markers_relist = array(
973 $marker_ul_re => $marker_ol_re,
974 $marker_ol_re => $marker_ul_re,
975 );
976
977 foreach ($markers_relist as $marker_re => $other_marker_re) {
978 # Re-usable pattern to match any entirel ul or ol list:
979 $whole_list_re = '
980 ( # $1 = whole list
981 ( # $2
982 ([ ]{0,'.$less_than_tab.'}) # $3 = number of spaces
983 ('.$marker_re.') # $4 = first list item marker
984 [ ]+
985 )
986 (?s:.+?)
987 ( # $5
988 \z
989 |
990 \n{2,}
991 (?=\S)
992 (?! # Negative lookahead for another list item marker
993 [ ]*
994 '.$marker_re.'[ ]+
995 )
996 |
997 (?= # Lookahead for another kind of list
998 \n
999 \3 # Must have the same indentation
1000 '.$other_marker_re.'[ ]+
1001 )
1002 )
1003 )
1004 '; // mx
1005
1006 # We use a different prefix before nested lists than top-level lists.
1007 # See extended comment in _ProcessListItems().
1008
1009 if ($this->list_level) {
1010 $text = preg_replace_callback('{
1011 ^
1012 '.$whole_list_re.'
1013 }mx',
1014 array(&$this, '_doLists_callback'), $text);
1015 }
1016 else {
1017 $text = preg_replace_callback('{
1018 (?:(?<=\n)\n|\A\n?) # Must eat the newline
1019 '.$whole_list_re.'
1020 }mx',
1021 array(&$this, '_doLists_callback'), $text);
1022 }
1023 }
1024
1025 return $text;
1026 }
1027 function _doLists_callback($matches) {
1028 # Re-usable patterns to match list item bullets and number markers:
1029 $marker_ul_re = '[*+-]';
1030 $marker_ol_re = '\d+[\.]';
1031 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
1032
1033 $list = $matches[1];
1034 $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
1035
1036 $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
1037
1038 $list .= "\n";
1039 $result = $this->processListItems($list, $marker_any_re);
1040
1041 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
1042 return "\n". $result ."\n\n";
1043 }
1044
1045 var $list_level = 0;
1046
1047 function processListItems($list_str, $marker_any_re) {
1048 #
1049 # Process the contents of a single ordered or unordered list, splitting it
1050 # into individual list items.
1051 #
1052 # The $this->list_level global keeps track of when we're inside a list.
1053 # Each time we enter a list, we increment it; when we leave a list,
1054 # we decrement. If it's zero, we're not in a list anymore.
1055 #
1056 # We do this because when we're not inside a list, we want to treat
1057 # something like this:
1058 #
1059 # I recommend upgrading to version
1060 # 8. Oops, now this line is treated
1061 # as a sub-list.
1062 #
1063 # As a single paragraph, despite the fact that the second line starts
1064 # with a digit-period-space sequence.
1065 #
1066 # Whereas when we're inside a list (or sub-list), that line will be
1067 # treated as the start of a sub-list. What a kludge, huh? This is
1068 # an aspect of Markdown's syntax that's hard to parse perfectly
1069 # without resorting to mind-reading. Perhaps the solution is to
1070 # change the syntax rules such that sub-lists must start with a
1071 # starting cardinal number; e.g. "1." or "a.".
1072
1073 $this->list_level++;
1074
1075 # trim trailing blank lines:
1076 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1077
1078 $list_str = preg_replace_callback('{
1079 (\n)? # leading line = $1
1080 (^[ ]*) # leading whitespace = $2
1081 ('.$marker_any_re.' # list marker and space = $3
1082 (?:[ ]+|(?=\n)) # space only required if item is not empty
1083 )
1084 ((?s:.*?)) # list item text = $4
1085 (?:(\n+(?=\n))|\n) # tailing blank line = $5
1086 (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
1087 }xm',
1088 array(&$this, '_processListItems_callback'), $list_str);
1089
1090 $this->list_level--;
1091 return $list_str;
1092 }
1093 function _processListItems_callback($matches) {
1094 $item = $matches[4];
1095 $leading_line =& $matches[1];
1096 $leading_space =& $matches[2];
1097 $marker_space = $matches[3];
1098 $tailing_blank_line =& $matches[5];
1099
1100 if ($leading_line || $tailing_blank_line ||
1101 preg_match('/\n{2,}/', $item))
1102 {
1103 # Replace marker with the appropriate whitespace indentation
1104 $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
1105 $item = $this->runBlockGamut($this->outdent($item)."\n");
1106 }
1107 else {
1108 # Recursion for sub-lists:
1109 $item = $this->doLists($this->outdent($item));
1110 $item = preg_replace('/\n+$/', '', $item);
1111 $item = $this->runSpanGamut($item);
1112 }
1113
1114 return "<li>" . $item . "</li>\n";
1115 }
1116
1117
1118 function doCodeBlocks($text) {
1119 #
1120 # Process Markdown `<pre><code>` blocks.
1121 #
1122 $text = preg_replace_callback('{
1123 (?:\n\n|\A\n?)
1124 ( # $1 = the code block -- one or more lines, starting with a space/tab
1125 (?>
1126 [ ]{'.$this->tab_width.'} # Lines must start with a tab or a tab-width of spaces
1127 .*\n+
1128 )+
1129 )
1130 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1131 }xm',
1132 array(&$this, '_doCodeBlocks_callback'), $text);
1133
1134 return $text;
1135 }
1136
1137 function _doCodeBlocks_callback($matches) {
1138 $codeblock = $matches[1];
1139
1140 $codeblock = $this->outdent($codeblock);
1141
1142 # trim leading newlines and trailing newlines
1143 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
1144
1145 $class = ''; // @olvlv: support code class
1146
1147 if (preg_match('#^```([\S]*)\s*#', $codeblock, $format_match))
1148 {
1149 $class = $format_match[1];
1150 $codeblock = substr($codeblock, strlen($format_match[0]));
1151 $codeblock = $this->format_codeblock($codeblock, $class);
1152 }
1153 else
1154 {
1155 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1156 }
1157
1158 $codeblock = "<pre class=\"$class\"><code>$codeblock\n</code></pre>";
1159 return "\n\n".$this->hashBlock($codeblock)."\n\n";
1160 }
1161
1162 /**
1163 * Create a code span markup for $code. Called from handleSpanToken.
1164 *
1165 * @param string $code
1166 */
1167 function makeCodeSpan($code)
1168 {
1169 $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
1170 return $this->hashPart("<code>$code</code>");
1171 }
1172
1173
1174 var $em_relist = array(
1175 '' => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S|$)(?![\.,:;]\s)',
1176 '*' => '(?<=\S|^)(?<!\*)\*(?!\*)',
1177 '_' => '(?<=\S|^)(?<!_)_(?!_)',
1178 );
1179 var $strong_relist = array(
1180 '' => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S|$)(?![\.,:;]\s)',
1181 '**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)',
1182 '__' => '(?<=\S|^)(?<!_)__(?!_)',
1183 );
1184 var $em_strong_relist = array(
1185 '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S|$)(?![\.,:;]\s)',
1186 '***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)',
1187 '___' => '(?<=\S|^)(?<!_)___(?!_)',
1188 );
1189 var $em_strong_prepared_relist;
1190
1191 function prepareItalicsAndBold() {
1192 #
1193 # Prepare regular expressions for searching emphasis tokens in any
1194 # context.
1195 #
1196 foreach ($this->em_relist as $em => $em_re) {
1197 foreach ($this->strong_relist as $strong => $strong_re) {
1198 # Construct list of allowed token expressions.
1199 $token_relist = array();
1200 if (isset($this->em_strong_relist["$em$strong"])) {
1201 $token_relist[] = $this->em_strong_relist["$em$strong"];
1202 }
1203 $token_relist[] = $em_re;
1204 $token_relist[] = $strong_re;
1205
1206 # Construct master expression from list.
1207 $token_re = '{('. implode('|', $token_relist) .')}';
1208 $this->em_strong_prepared_relist["$em$strong"] = $token_re;
1209 }
1210 }
1211 }
1212
1213 function doItalicsAndBold($text) {
1214 $token_stack = array('');
1215 $text_stack = array('');
1216 $em = '';
1217 $strong = '';
1218 $tree_char_em = false;
1219
1220 while (1) {
1221 #
1222 # Get prepared regular expression for seraching emphasis tokens
1223 # in current context.
1224 #
1225 $token_re = $this->em_strong_prepared_relist["$em$strong"];
1226
1227 #
1228 # Each loop iteration search for the next emphasis token.
1229 # Each token is then passed to handleSpanToken.
1230 #
1231 $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1232 $text_stack[0] .= $parts[0];
1233 $token =& $parts[1];
1234 $text =& $parts[2];
1235
1236 if (empty($token)) {
1237 # Reached end of text span: empty stack without emitting.
1238 # any more emphasis.
1239 while ($token_stack[0]) {
1240 $text_stack[1] .= array_shift($token_stack);
1241 $text_stack[0] .= array_shift($text_stack);
1242 }
1243 break;
1244 }
1245
1246 $token_len = strlen($token);
1247 if ($tree_char_em) {
1248 # Reached closing marker while inside a three-char emphasis.
1249 if ($token_len == 3) {
1250 # Three-char closing marker, close em and strong.
1251 array_shift($token_stack);
1252 $span = array_shift($text_stack);
1253 $span = $this->runSpanGamut($span);
1254 $span = "<strong><em>$span</em></strong>";
1255 $text_stack[0] .= $this->hashPart($span);
1256 $em = '';
1257 $strong = '';
1258 } else {
1259 # Other closing marker: close one em or strong and
1260 # change current token state to match the other
1261 $token_stack[0] = str_repeat($token{0}, 3-$token_len);
1262 $tag = $token_len == 2 ? "strong" : "em";
1263 $span = $text_stack[0];
1264 $span = $this->runSpanGamut($span);
1265 $span = "<$tag>$span</$tag>";
1266 $text_stack[0] = $this->hashPart($span);
1267 $$tag = ''; # $$tag stands for $em or $strong
1268 }
1269 $tree_char_em = false;
1270 } else if ($token_len == 3) {
1271 if ($em) {
1272 # Reached closing marker for both em and strong.
1273 # Closing strong marker:
1274 for ($i = 0; $i < 2; ++$i) {
1275 $shifted_token = array_shift($token_stack);
1276 $tag = strlen($shifted_token) == 2 ? "strong" : "em";
1277 $span = array_shift($text_stack);
1278 $span = $this->runSpanGamut($span);
1279 $span = "<$tag>$span</$tag>";
1280 $text_stack[0] .= $this->hashPart($span);
1281 $$tag = ''; # $$tag stands for $em or $strong
1282 }
1283 } else {
1284 # Reached opening three-char emphasis marker. Push on token
1285 # stack; will be handled by the special condition above.
1286 $em = $token{0};
1287 $strong = "$em$em";
1288 array_unshift($token_stack, $token);
1289 array_unshift($text_stack, '');
1290 $tree_char_em = true;
1291 }
1292 } else if ($token_len == 2) {
1293 if ($strong) {
1294 # Unwind any dangling emphasis marker:
1295 if (strlen($token_stack[0]) == 1) {
1296 $text_stack[1] .= array_shift($token_stack);
1297 $text_stack[0] .= array_shift($text_stack);
1298 }
1299 # Closing strong marker:
1300 array_shift($token_stack);
1301 $span = array_shift($text_stack);
1302 $span = $this->runSpanGamut($span);
1303 $span = "<strong>$span</strong>";
1304 $text_stack[0] .= $this->hashPart($span);
1305 $strong = '';
1306 } else {
1307 array_unshift($token_stack, $token);
1308 array_unshift($text_stack, '');
1309 $strong = $token;
1310 }
1311 } else {
1312 # Here $token_len == 1
1313 if ($em) {
1314 if (strlen($token_stack[0]) == 1) {
1315 # Closing emphasis marker:
1316 array_shift($token_stack);
1317 $span = array_shift($text_stack);
1318 $span = $this->runSpanGamut($span);
1319 $span = "<em>$span</em>";
1320 $text_stack[0] .= $this->hashPart($span);
1321 $em = '';
1322 } else {
1323 $text_stack[0] .= $token;
1324 }
1325 } else {
1326 array_unshift($token_stack, $token);
1327 array_unshift($text_stack, '');
1328 $em = $token;
1329 }
1330 }
1331 }
1332 return $text_stack[0];
1333 }
1334
1335
1336 function doBlockQuotes($text) {
1337 $text = preg_replace_callback('/
1338 ( # Wrap whole match in $1
1339 (?>
1340 ^[ ]*>[ ]? # ">" at the start of a line
1341 .+\n # rest of the first line
1342 (.+\n)* # subsequent consecutive lines
1343 \n* # blanks
1344 )+
1345 )
1346 /xm',
1347 array(&$this, '_doBlockQuotes_callback'), $text);
1348
1349 return $text;
1350 }
1351 function _doBlockQuotes_callback($matches) {
1352 $bq = $matches[1];
1353 # trim one level of quoting - trim whitespace-only lines
1354 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1355 $bq = $this->runBlockGamut($bq); # recurse
1356
1357 $bq = preg_replace('/^/m', " ", $bq);
1358 # These leading spaces cause problem with <pre> content,
1359 # so we need to fix that:
1360 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
1361 array(&$this, '_doBlockQuotes_callback2'), $bq);
1362
1363 return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
1364 }
1365 function _doBlockQuotes_callback2($matches) {
1366 $pre = $matches[1];
1367 $pre = preg_replace('/^ /m', '', $pre);
1368 return $pre;
1369 }
1370
1371
1372 function formParagraphs($text) {
1373 #
1374 # Params:
1375 # $text - string to process with html <p> tags
1376 #
1377 # Strip leading and trailing lines:
1378 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1379
1380 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1381
1382 #
1383 # Wrap <p> tags and unhashify HTML blocks
1384 #
1385 foreach ($grafs as $key => $value) {
1386 if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1387 # Is a paragraph.
1388 $value = $this->runSpanGamut($value);
1389 $value = preg_replace('/^([ ]*)/', "<p>", $value);
1390 $value .= "</p>";
1391 $grafs[$key] = $this->unhash($value);
1392 }
1393 else {
1394 # Is a block.
1395 # Modify elements of @grafs in-place...
1396 $graf = $value;
1397 $block = $this->html_hashes[$graf];
1398 $graf = $block;
1399 // if (preg_match('{
1400 // \A
1401 // ( # $1 = <div> tag
1402 // <div \s+
1403 // [^>]*
1404 // \b
1405 // markdown\s*=\s* ([\'"]) # $2 = attr quote char
1406 // 1
1407 // \2
1408 // [^>]*
1409 // >
1410 // )
1411 // ( # $3 = contents
1412 // .*
1413 // )
1414 // (</div>) # $4 = closing tag
1415 // \z
1416 // }xs', $block, $matches))
1417 // {
1418 // list(, $div_open, , $div_content, $div_close) = $matches;
1419 //
1420 // # We can't call Markdown(), because that resets the hash;
1421 // # that initialization code should be pulled into its own sub, though.
1422 // $div_content = $this->hashHTMLBlocks($div_content);
1423 //
1424 // # Run document gamut methods on the content.
1425 // foreach ($this->document_gamut as $method => $priority) {
1426 // $div_content = $this->$method($div_content);
1427 // }
1428 //
1429 // $div_open = preg_replace(
1430 // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1431 //
1432 // $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1433 // }
1434 $grafs[$key] = $graf;
1435 }
1436 }
1437
1438 return implode("\n\n", $grafs);
1439 }
1440
1441
1442 function encodeAttribute($text) {
1443 #
1444 # Encode text for a double-quoted HTML attribute. This function
1445 # is *not* suitable for attributes enclosed in single quotes.
1446 #
1447 $text = $this->encodeAmpsAndAngles($text);
1448 $text = str_replace('"', '"', $text);
1449 return $text;
1450 }
1451
1452
1453 function encodeAmpsAndAngles($text) {
1454 #
1455 # Smart processing for ampersands and angle brackets that need to
1456 # be encoded. Valid character entities are left alone unless the
1457 # no-entities mode is set.
1458 #
1459 if ($this->no_entities) {
1460 $text = str_replace('&', '&', $text);
1461 } else {
1462 # Ampersand-encoding based entirely on Nat Irons's Amputator
1463 # MT plugin: <http://bumppo.net/projects/amputator/>
1464 $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1465 '&', $text);;
1466 }
1467 # Encode remaining <'s
1468 $text = str_replace('<', '<', $text);
1469
1470 return $text;
1471 }
1472
1473
1474 function doAutoLinks($text) {
1475 $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i',
1476 array(&$this, '_doAutoLinks_url_callback'), $text);
1477
1478 # Email addresses: <address@domain.foo>
1479 $text = preg_replace_callback('{
1480 <
1481 (?:mailto:)?
1482 (
1483 (?:
1484 [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
1485 |
1486 ".*?"
1487 )
1488 \@
1489 (?:
1490 [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1491 |
1492 \[[\d.a-fA-F:]+\] # IPv4 & IPv6
1493 )
1494 )
1495 >
1496 }xi',
1497 array(&$this, '_doAutoLinks_email_callback'), $text);
1498
1499 return $text;
1500 }
1501 function _doAutoLinks_url_callback($matches) {
1502 $url = $this->encodeAttribute($matches[1]);
1503 $link = "<a href=\"$url\">$url</a>";
1504 return $this->hashPart($link);
1505 }
1506 function _doAutoLinks_email_callback($matches) {
1507 $address = $matches[1];
1508 $link = $this->encodeEmailAddress($address);
1509 return $this->hashPart($link);
1510 }
1511
1512
1513 function encodeEmailAddress($addr) {
1514 #
1515 # Input: an email address, e.g. "foo@example.com"
1516 #
1517 # Output: the email address as a mailto link, with each character
1518 # of the address encoded as either a decimal or hex entity, in
1519 # the hopes of foiling most address harvesting spam bots. E.g.:
1520 #
1521 # <p><a href="mailto:foo
1522 # @example.co
1523 # m">foo@exampl
1524 # e.com</a></p>
1525 #
1526 # Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1527 # With some optimizations by Milian Wolff.
1528 #
1529 $addr = "mailto:" . $addr;
1530 $chars = preg_split('/(?<!^)(?!$)/', $addr);
1531 $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
1532
1533 foreach ($chars as $key => $char) {
1534 $ord = ord($char);
1535 # Ignore non-ascii chars.
1536 if ($ord < 128) {
1537 $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
1538 # roughly 10% raw, 45% hex, 45% dec
1539 # '@' *must* be encoded. I insist.
1540 if ($r > 90 && $char != '@') /* do nothing */;
1541 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
1542 else $chars[$key] = '&#'.$ord.';';
1543 }
1544 }
1545
1546 $addr = implode('', $chars);
1547 $text = implode('', array_slice($chars, 7)); # text without `mailto:`
1548 $addr = "<a href=\"$addr\">$text</a>";
1549
1550 return $addr;
1551 }
1552
1553
1554 function parseSpan($str) {
1555 #
1556 # Take the string $str and parse it into tokens, hashing embeded HTML,
1557 # escaped characters and handling code spans.
1558 #
1559 $output = '';
1560
1561 $span_re = '{
1562 (
1563 \\\\'.$this->escape_chars_re.'
1564 |
1565 (?<![`\\\\])
1566 `+ # code span marker
1567 '.( $this->no_markup ? '' : '
1568 |
1569 <!-- .*? --> # comment
1570 |
1571 <\?.*?\?> | <%.*?%> # processing instruction
1572 |
1573 <[/!$]?[-a-zA-Z0-9:_]+ # regular tags
1574 (?>
1575 \s
1576 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1577 )?
1578 >
1579 ').'
1580 )
1581 }xs';
1582
1583 while (true) {
1584 #
1585 # Each loop iteration seach for either the next tag, the next
1586 # openning code span marker, or the next escaped character.
1587 # Each token is then passed to handleSpanToken.
1588 #
1589 $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1590
1591 # Create token from text preceding tag.
1592 if ($parts[0] != "") {
1593 $output .= $parts[0];
1594 }
1595
1596 # Check if we reach the end.
1597 if (isset($parts[1])) {
1598 $output .= $this->handleSpanToken($parts[1], $parts[2]);
1599 $str = $parts[2];
1600 }
1601 else {
1602 break;
1603 }
1604 }
1605
1606 return $output;
1607 }
1608
1609
1610 /**
1611 * Handle $token provided by parseSpan by determining its nature and
1612 * returning the corresponding value that should replace it.
1613 */
1614 protected function handleSpanToken($token, &$str)
1615 {
1616 switch ($token{0}) {
1617 case "\\":
1618 return $this->hashPart("&#". ord($token{1}). ";");
1619 case "`":
1620 # Search for end marker in remaining text.
1621 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
1622 $str, $matches))
1623 {
1624 $str = $matches[2];
1625 $codespan = $this->makeCodeSpan($matches[1]);
1626 return $this->hashPart($codespan);
1627 }
1628 return $token; // return as text since no ending marker found.
1629 default:
1630 return $this->hashPart($token);
1631 }
1632 }
1633
1634
1635 function outdent($text) {
1636 #
1637 # Remove one level of line-leading tabs or spaces
1638 #
1639 return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
1640 }
1641
1642
1643 # String length function for detab. `_initDetab` will create a function to
1644 # hanlde UTF-8 if the default function does not exist.
1645 var $utf8_strlen = 'mb_strlen';
1646
1647 function detab($text) {
1648 #
1649 # Replace tabs with the appropriate amount of space.
1650 #
1651 # For each line we separate the line in blocks delemited by
1652 # tab characters. Then we reconstruct every line by adding the
1653 # appropriate number of space between each blocks.
1654
1655 $text = preg_replace_callback('/^.*\t.*$/m',
1656 array(&$this, '_detab_callback'), $text);
1657
1658 return $text;
1659 }
1660 function _detab_callback($matches) {
1661 $line = $matches[0];
1662 $strlen = $this->utf8_strlen; # strlen function for UTF-8.
1663
1664 # Split in blocks.
1665 $blocks = explode("\t", $line);
1666 # Add each blocks to the line.
1667 $line = $blocks[0];
1668 unset($blocks[0]); # Do not add first block twice.
1669 foreach ($blocks as $block) {
1670 # Calculate amount of space, insert spaces, insert block.
1671 $amount = $this->tab_width -
1672 $strlen($line, 'UTF-8') % $this->tab_width;
1673 $line .= str_repeat(" ", $amount) . $block;
1674 }
1675 return $line;
1676 }
1677 function _initDetab() {
1678 #
1679 # Check for the availability of the function in the `utf8_strlen` property
1680 # (initially `mb_strlen`). If the function is not available, create a
1681 # function that will loosely count the number of UTF-8 characters with a
1682 # regular expression.
1683 #
1684 if (function_exists($this->utf8_strlen)) return;
1685 $this->utf8_strlen = create_function('$text', 'return preg_match_all(
1686 "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
1687 $text, $m);');
1688 }
1689
1690
1691 function unhash($text) {
1692 #
1693 # Swap back in all the tags hashed by _HashHTMLBlocks.
1694 #
1695 return preg_replace_callback('/(.)\x1A[0-9]+\1/',
1696 array(&$this, '_unhash_callback'), $text);
1697 }
1698 function _unhash_callback($matches) {
1699 return $this->html_hashes[$matches[0]];
1700 }
1701
1702 }
1703
1704 /*
1705
1706 PHP Markdown
1707 ============
1708
1709 Description
1710 -----------
1711
1712 This is a PHP translation of the original Markdown formatter written in
1713 Perl by John Gruber.
1714
1715 Markdown is a text-to-HTML filter; it translates an easy-to-read /
1716 easy-to-write structured text format into HTML. Markdown's text format
1717 is most similar to that of plain text email, and supports features such
1718 as headers, *emphasis*, code blocks, blockquotes, and links.
1719
1720 Markdown's syntax is designed not as a generic markup language, but
1721 specifically to serve as a front-end to (X)HTML. You can use span-level
1722 HTML tags anywhere in a Markdown document, and you can use block level
1723 HTML tags (like <div> and <table> as well).
1724
1725 For more information about Markdown's syntax, see:
1726
1727 <http://daringfireball.net/projects/markdown/>
1728
1729
1730 Bugs
1731 ----
1732
1733 To file bug reports please send email to:
1734
1735 <michel.fortin@michelf.com>
1736
1737 Please include with your report: (1) the example input; (2) the output you
1738 expected; (3) the output Markdown actually produced.
1739
1740
1741 Version History
1742 ---------------
1743
1744 See the readme file for detailed release notes for this version.
1745
1746
1747 Copyright and License
1748 ---------------------
1749
1750 PHP Markdown
1751 Copyright (c) 2004-2009 Michel Fortin
1752 <http://michelf.com/>
1753 All rights reserved.
1754
1755 Based on Markdown
1756 Copyright (c) 2003-2006 John Gruber
1757 <http://daringfireball.net/>
1758 All rights reserved.
1759
1760 Redistribution and use in source and binary forms, with or without
1761 modification, are permitted provided that the following conditions are
1762 met:
1763
1764 * Redistributions of source code must retain the above copyright notice,
1765 this list of conditions and the following disclaimer.
1766
1767 * Redistributions in binary form must reproduce the above copyright
1768 notice, this list of conditions and the following disclaimer in the
1769 documentation and/or other materials provided with the distribution.
1770
1771 * Neither the name "Markdown" nor the names of its contributors may
1772 be used to endorse or promote products derived from this software
1773 without specific prior written permission.
1774
1775 This software is provided by the copyright holders and contributors "as
1776 is" and any express or implied warranties, including, but not limited
1777 to, the implied warranties of merchantability and fitness for a
1778 particular purpose are disclaimed. In no event shall the copyright owner
1779 or contributors be liable for any direct, indirect, incidental, special,
1780 exemplary, or consequential damages (including, but not limited to,
1781 procurement of substitute goods or services; loss of use, data, or
1782 profits; or business interruption) however caused and on any theory of
1783 liability, whether in contract, strict liability, or tort (including
1784 negligence or otherwise) arising in any way out of the use of this
1785 software, even if advised of the possibility of such damage.
1786
1787 */