1 <?php
2 #
3 # Markdown Extra - A text-to-HTML conversion tool for web writers
4 #
5 # PHP Markdown & Extra
6 # Copyright (c) 2004-2012 Michel Fortin
7 # <http://michelf.com/projects/php-markdown/>
8 #
9 # Original Markdown
10 # Copyright (c) 2004-2006 John Gruber
11 # <http://daringfireball.net/projects/markdown/>
12 #
13
14 /**
15 * Markdown Extra Parser Class
16 */
17 class MarkdownExtra_Parser extends Markdown_Parser
18 {
19 # Prefix for footnote ids.
20 var $fn_id_prefix = "";
21
22 # Optional title attribute for footnote links and backlinks.
23 var $fn_link_title = MARKDOWN_FN_LINK_TITLE;
24 var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE;
25
26 # Optional class attribute for footnote links and backlinks.
27 var $fn_link_class = MARKDOWN_FN_LINK_CLASS;
28 var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS;
29
30 # Predefined abbreviations.
31 var $predef_abbr = array();
32
33
34 function MarkdownExtra_Parser() {
35 #
36 # Constructor function. Initialize the parser object.
37 #
38 # Add extra escapable characters before parent constructor
39 # initialize the table.
40 $this->escape_chars .= ':|';
41
42 # Insert extra document, block, and span transformations.
43 # Parent constructor will do the sorting.
44 $this->document_gamut += array(
45 //"doFencedCodeBlocks" => 5,
46 "stripFootnotes" => 15,
47 "stripAbbreviations" => 25,
48 "appendFootnotes" => 50,
49 );
50 $this->block_gamut += array(
51 //"doFencedCodeBlocks" => 5,
52 "doTables" => 15,
53 "doDefLists" => 45,
54 );
55 $this->span_gamut += array(
56 "doFootnotes" => 5,
57 "doAbbreviations" => 70,
58 );
59
60 parent::Markdown_Parser();
61 }
62
63
64 # Extra variables used during extra transformations.
65 var $footnotes = array();
66 var $footnotes_ordered = array();
67 var $abbr_desciptions = array();
68 var $abbr_word_re = '';
69
70 # Give the current footnote number.
71 var $footnote_counter = 1;
72
73
74 function setup() {
75 #
76 # Setting up Extra-specific variables.
77 #
78 parent::setup();
79
80 $this->footnotes = array();
81 $this->footnotes_ordered = array();
82 $this->abbr_desciptions = array();
83 $this->abbr_word_re = '';
84 $this->footnote_counter = 1;
85
86 foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
87 if ($this->abbr_word_re)
88 $this->abbr_word_re .= '|';
89 $this->abbr_word_re .= preg_quote($abbr_word);
90 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
91 }
92 }
93
94 function teardown() {
95 #
96 # Clearing Extra-specific variables.
97 #
98 $this->footnotes = array();
99 $this->footnotes_ordered = array();
100 $this->abbr_desciptions = array();
101 $this->abbr_word_re = '';
102
103 parent::teardown();
104 }
105
106
107 ### HTML Block Parser ###
108
109 # Tags that are always treated as block tags:
110 var $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend';
111
112 # Tags treated as block tags only if the opening tag is alone on it's line:
113 var $context_block_tags_re = 'script|noscript|math|ins|del';
114
115 # Tags where markdown="1" default to span mode:
116 var $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
117
118 # Tags which must not have their contents modified, no matter where
119 # they appear:
120 var $clean_tags_re = 'script|math';
121
122 # Tags that do not need to be closed.
123 var $auto_close_tags_re = 'hr|img';
124
125
126 function hashHTMLBlocks($text) {
127 #
128 # Hashify HTML Blocks and "clean tags".
129 #
130 # We only want to do this for block-level HTML tags, such as headers,
131 # lists, and tables. That's because we still want to wrap <p>s around
132 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
133 # phrase emphasis, and spans. The list of tags we're looking for is
134 # hard-coded.
135 #
136 # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
137 # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
138 # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
139 # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
140 # These two functions are calling each other. It's recursive!
141 #
142 #
143 # Call the HTML-in-Markdown hasher.
144 #
145 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
146
147 return $text;
148 }
149 function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
150 $enclosing_tag_re = '', $span = false)
151 {
152 #
153 # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
154 #
155 # * $indent is the number of space to be ignored when checking for code
156 # blocks. This is important because if we don't take the indent into
157 # account, something like this (which looks right) won't work as expected:
158 #
159 # <div>
160 # <div markdown="1">
161 # Hello World. <-- Is this a Markdown code block or text?
162 # </div> <-- Is this a Markdown code block or a real tag?
163 # <div>
164 #
165 # If you don't like this, just don't indent the tag on which
166 # you apply the markdown="1" attribute.
167 #
168 # * If $enclosing_tag_re is not empty, stops at the first unmatched closing
169 # tag with that name. Nested tags supported.
170 #
171 # * If $span is true, text inside must treated as span. So any double
172 # newline will be replaced by a single newline so that it does not create
173 # paragraphs.
174 #
175 # Returns an array of that form: ( processed text , remaining text )
176 #
177 if ($text === '') return array('', '');
178
179 # Regex to check for the presense of newlines around a block tag.
180 $newline_before_re = '/(?:^\n?|\n\n)*$/';
181 $newline_after_re =
182 '{
183 ^ # Start of text following the tag.
184 (?>[ ]*<!--.*?-->)? # Optional comment.
185 [ ]*\n # Must be followed by newline.
186 }xs';
187
188 # Regex to match any tag.
189 $block_tag_re =
190 '{
191 ( # $2: Capture hole tag.
192 </? # Any opening or closing tag.
193 (?> # Tag name.
194 '.$this->block_tags_re.' |
195 '.$this->context_block_tags_re.' |
196 '.$this->clean_tags_re.' |
197 (?!\s)'.$enclosing_tag_re.'
198 )
199 (?:
200 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name.
201 (?>
202 ".*?" | # Double quotes (can contain `>`)
203 \'.*?\' | # Single quotes (can contain `>`)
204 .+? # Anything but quotes and `>`.
205 )*?
206 )?
207 > # End of tag.
208 |
209 <!-- .*? --> # HTML Comment
210 |
211 <\?.*?\?> | <%.*?%> # Processing instruction
212 |
213 <!\[CDATA\[.*?\]\]> # CData Block
214 |
215 # Code span marker
216 `+
217 '. ( !$span ? ' # If not in span.
218 |
219 # Indented code block
220 (?: ^[ ]*\n | ^ | \n[ ]*\n )
221 [ ]{'.($indent+4).'}[^\n]* \n
222 (?>
223 (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n
224 )*
225 |
226 # Fenced code block marker
227 (?> ^ | \n )
228 [ ]{0,'.($indent).'}~~~+[ ]*\n
229 ' : '' ). ' # End (if not is span).
230 )
231 }xs';
232
233
234 $depth = 0; # Current depth inside the tag tree.
235 $parsed = ""; # Parsed text that will be returned.
236
237 #
238 # Loop through every tag until we find the closing tag of the parent
239 # or loop until reaching the end of text if no parent tag specified.
240 #
241 do {
242 #
243 # Split the text using the first $tag_match pattern found.
244 # Text before pattern will be first in the array, text after
245 # pattern will be at the end, and between will be any catches made
246 # by the pattern.
247 #
248 $parts = preg_split($block_tag_re, $text, 2,
249 PREG_SPLIT_DELIM_CAPTURE);
250
251 # If in Markdown span mode, add a empty-string span-level hash
252 # after each newline to prevent triggering any block element.
253 if ($span) {
254 $void = $this->hashPart("", ':');
255 $newline = "$void\n";
256 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
257 }
258
259 $parsed .= $parts[0]; # Text before current tag.
260
261 # If end of $text has been reached. Stop loop.
262 if (count($parts) < 3) {
263 $text = "";
264 break;
265 }
266
267 $tag = $parts[1]; # Tag to handle.
268 $text = $parts[2]; # Remaining text after current tag.
269 $tag_re = preg_quote($tag); # For use in a regular expression.
270
271 #
272 # Check for: Code span marker
273 #
274 if ($tag{0} == "`") {
275 # Find corresponding end marker.
276 $tag_re = preg_quote($tag);
277 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',
278 $text, $matches))
279 {
280 # End marker found: pass text unchanged until marker.
281 $parsed .= $tag . $matches[0];
282 $text = substr($text, strlen($matches[0]));
283 }
284 else {
285 # Unmatched marker: just skip it.
286 $parsed .= $tag;
287 }
288 }
289 #
290 # Check for: Fenced code block marker.
291 #
292 else if (preg_match('{^\n?[ ]{0,'.($indent+3).'}~}', $tag)) {
293 # Fenced code block marker: find matching end marker.
294 $tag_re = preg_quote(trim($tag));
295 if (preg_match('{^(?>.*\n)+?[ ]{0,'.($indent).'}'.$tag_re.'[ ]*\n}', $text,
296 $matches))
297 {
298 # End marker found: pass text unchanged until marker.
299 $parsed .= $tag . $matches[0];
300 $text = substr($text, strlen($matches[0]));
301 }
302 else {
303 # No end marker: just skip it.
304 $parsed .= $tag;
305 }
306 }
307 #
308 # Check for: Indented code block.
309 #
310 else if ($tag{0} == "\n" || $tag{0} == " ") {
311 # Indented code block: pass it unchanged, will be handled
312 # later.
313 $parsed .= $tag;
314 }
315 #
316 # Check for: Opening Block level tag or
317 # Opening Context Block tag (like ins and del)
318 # used as a block tag (tag is alone on it's line).
319 #
320 else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) ||
321 ( preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) &&
322 preg_match($newline_before_re, $parsed) &&
323 preg_match($newline_after_re, $text) )
324 )
325 {
326 # Need to parse tag and following text using the HTML parser.
327 list($block_text, $text) =
328 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
329
330 # Make sure it stays outside of any paragraph by adding newlines.
331 $parsed .= "\n\n$block_text\n\n";
332 }
333 #
334 # Check for: Clean tag (like script, math)
335 # HTML Comments, processing instructions.
336 #
337 else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) ||
338 $tag{1} == '!' || $tag{1} == '?')
339 {
340 # Need to parse tag and following text using the HTML parser.
341 # (don't check for markdown attribute)
342 list($block_text, $text) =
343 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
344
345 $parsed .= $block_text;
346 }
347 #
348 # Check for: Tag with same name as enclosing tag.
349 #
350 else if ($enclosing_tag_re !== '' &&
351 # Same name as enclosing tag.
352 preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag))
353 {
354 #
355 # Increase/decrease nested tag count.
356 #
357 if ($tag{1} == '/') $depth--;
358 else if ($tag{strlen($tag)-2} != '/') $depth++;
359
360 if ($depth < 0) {
361 #
362 # Going out of parent element. Clean up and break so we
363 # return to the calling function.
364 #
365 $text = $tag . $text;
366 break;
367 }
368
369 $parsed .= $tag;
370 }
371 else {
372 $parsed .= $tag;
373 }
374 } while ($depth >= 0);
375
376 return array($parsed, $text);
377 }
378 function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
379 #
380 # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
381 #
382 # * Calls $hash_method to convert any blocks.
383 # * Stops when the first opening tag closes.
384 # * $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
385 # (it is not inside clean tags)
386 #
387 # Returns an array of that form: ( processed text , remaining text )
388 #
389 if ($text === '') return array('', '');
390
391 # Regex to match `markdown` attribute inside of a tag.
392 $markdown_attr_re = '
393 {
394 \s* # Eat whitespace before the `markdown` attribute
395 markdown
396 \s*=\s*
397 (?>
398 (["\']) # $1: quote delimiter
399 (.*?) # $2: attribute value
400 \1 # matching delimiter
401 |
402 ([^\s>]*) # $3: unquoted attribute value
403 )
404 () # $4: make $3 always defined (avoid warnings)
405 }xs';
406
407 # Regex to match any tag.
408 $tag_re = '{
409 ( # $2: Capture hole tag.
410 </? # Any opening or closing tag.
411 [\w:$]+ # Tag name.
412 (?:
413 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name.
414 (?>
415 ".*?" | # Double quotes (can contain `>`)
416 \'.*?\' | # Single quotes (can contain `>`)
417 .+? # Anything but quotes and `>`.
418 )*?
419 )?
420 > # End of tag.
421 |
422 <!-- .*? --> # HTML Comment
423 |
424 <\?.*?\?> | <%.*?%> # Processing instruction
425 |
426 <!\[CDATA\[.*?\]\]> # CData Block
427 )
428 }xs';
429
430 $original_text = $text; # Save original text in case of faliure.
431
432 $depth = 0; # Current depth inside the tag tree.
433 $block_text = ""; # Temporary text holder for current text.
434 $parsed = ""; # Parsed text that will be returned.
435
436 #
437 # Get the name of the starting tag.
438 # (This pattern makes $base_tag_name_re safe without quoting.)
439 #
440 if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
441 $base_tag_name_re = $matches[1];
442
443 #
444 # Loop through every tag until we find the corresponding closing tag.
445 #
446 do {
447 #
448 # Split the text using the first $tag_match pattern found.
449 # Text before pattern will be first in the array, text after
450 # pattern will be at the end, and between will be any catches made
451 # by the pattern.
452 #
453 $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
454
455 if (count($parts) < 3) {
456 #
457 # End of $text reached with unbalenced tag(s).
458 # In that case, we return original text unchanged and pass the
459 # first character as filtered to prevent an infinite loop in the
460 # parent function.
461 #
462 return array($original_text{0}, substr($original_text, 1));
463 }
464
465 $block_text .= $parts[0]; # Text before current tag.
466 $tag = $parts[1]; # Tag to handle.
467 $text = $parts[2]; # Remaining text after current tag.
468
469 #
470 # Check for: Auto-close tag (like <hr/>)
471 # Comments and Processing Instructions.
472 #
473 if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) ||
474 $tag{1} == '!' || $tag{1} == '?')
475 {
476 # Just add the tag to the block as if it was text.
477 $block_text .= $tag;
478 }
479 else {
480 #
481 # Increase/decrease nested tag count. Only do so if
482 # the tag's name match base tag's.
483 #
484 if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) {
485 if ($tag{1} == '/') $depth--;
486 else if ($tag{strlen($tag)-2} != '/') $depth++;
487 }
488
489 #
490 # Check for `markdown="1"` attribute and handle it.
491 #
492 if ($md_attr &&
493 preg_match($markdown_attr_re, $tag, $attr_m) &&
494 preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
495 {
496 # Remove `markdown` attribute from opening tag.
497 $tag = preg_replace($markdown_attr_re, '', $tag);
498
499 # Check if text inside this tag must be parsed in span mode.
500 $this->mode = $attr_m[2] . $attr_m[3];
501 $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
502 preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag);
503
504 # Calculate indent before tag.
505 if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
506 $strlen = $this->utf8_strlen;
507 $indent = $strlen($matches[1], 'UTF-8');
508 } else {
509 $indent = 0;
510 }
511
512 # End preceding block with this tag.
513 $block_text .= $tag;
514 $parsed .= $this->$hash_method($block_text);
515
516 # Get enclosing tag name for the ParseMarkdown function.
517 # (This pattern makes $tag_name_re safe without quoting.)
518 preg_match('/^<([\w:$]*)\b/', $tag, $matches);
519 $tag_name_re = $matches[1];
520
521 # Parse the content using the HTML-in-Markdown parser.
522 list ($block_text, $text)
523 = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
524 $tag_name_re, $span_mode);
525
526 # Outdent markdown text.
527 if ($indent > 0) {
528 $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
529 $block_text);
530 }
531
532 # Append tag content to parsed text.
533 if (!$span_mode) $parsed .= "\n\n$block_text\n\n";
534 else $parsed .= "$block_text";
535
536 # Start over a new block.
537 $block_text = "";
538 }
539 else $block_text .= $tag;
540 }
541
542 } while ($depth > 0);
543
544 #
545 # Hash last block text that wasn't processed inside the loop.
546 #
547 $parsed .= $this->$hash_method($block_text);
548
549 return array($parsed, $text);
550 }
551
552
553 function hashClean($text) {
554 #
555 # Called whenever a tag must be hashed when a function insert a "clean" tag
556 # in $text, it pass through this function and is automaticaly escaped,
557 # blocking invalid nested overlap.
558 #
559 return $this->hashPart($text, 'C');
560 }
561
562
563 function doHeaders($text) {
564 #
565 # Redefined to add id attribute support.
566 #
567 # Setext-style headers:
568 # Header 1 {#header1}
569 # ========
570 #
571 # Header 2 {#header2}
572 # --------
573 #
574 $text = preg_replace_callback(
575 '{
576 (^.+?) # $1: Header text
577 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # $2: Id attribute
578 [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer
579 }mx',
580 array(&$this, '_doHeaders_callback_setext'), $text);
581
582 # atx-style headers:
583 # # Header 1 {#header1}
584 # ## Header 2 {#header2}
585 # ## Header 2 with closing hashes ## {#header3}
586 # ...
587 # ###### Header 6 {#header2}
588 #
589 $text = preg_replace_callback('{
590 ^(\#{1,6}) # $1 = string of #\'s
591 [ ]*
592 (.+?) # $2 = Header text
593 [ ]*
594 \#* # optional closing #\'s (not counted)
595 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # id attribute
596 [ ]*
597 \n+
598 }xm',
599 array(&$this, '_doHeaders_callback_atx'), $text);
600
601 return $text;
602 }
603 function _doHeaders_attr($attr) {
604 if (empty($attr)) return "";
605 return " id=\"$attr\"";
606 }
607 function _doHeaders_callback_setext($matches) {
608 if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))
609 return $matches[0];
610 $level = $matches[3]{0} == '=' ? 1 : 2;
611 $attr = $this->_doHeaders_attr($id =& $matches[2]);
612 $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
613 return "\n" . $this->hashBlock($block) . "\n\n";
614 }
615 function _doHeaders_callback_atx($matches) {
616 $level = strlen($matches[1]);
617 $attr = $this->_doHeaders_attr($id =& $matches[3]);
618 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
619 return "\n" . $this->hashBlock($block) . "\n\n";
620 }
621
622
623 function doTables($text) {
624 #
625 # Form HTML tables.
626 #
627 $less_than_tab = $this->tab_width - 1;
628 #
629 # Find tables with leading pipe.
630 #
631 # | Header 1 | Header 2
632 # | -------- | --------
633 # | Cell 1 | Cell 2
634 # | Cell 3 | Cell 4
635 #
636 $text = preg_replace_callback('
637 {
638 ^ # Start of a line
639 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
640 [|] # Optional leading pipe (present)
641 (.+) \n # $1: Header row (at least one pipe)
642
643 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
644 [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline
645
646 ( # $3: Cells
647 (?>
648 [ ]* # Allowed whitespace.
649 [|] .* \n # Row content.
650 )*
651 )
652 (?=\n|\Z) # Stop at final double newline.
653 }xm',
654 array(&$this, '_doTable_leadingPipe_callback'), $text);
655
656 #
657 # Find tables without leading pipe.
658 #
659 # Header 1 | Header 2
660 # -------- | --------
661 # Cell 1 | Cell 2
662 # Cell 3 | Cell 4
663 #
664 $text = preg_replace_callback('
665 {
666 ^ # Start of a line
667 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
668 (\S.*[|].*) \n # $1: Header row (at least one pipe)
669
670 [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
671 ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline
672
673 ( # $3: Cells
674 (?>
675 .* [|] .* \n # Row content
676 )*
677 )
678 (?=\n|\Z) # Stop at final double newline.
679 }xm',
680 array(&$this, '_DoTable_callback'), $text);
681
682 return $text;
683 }
684 function _doTable_leadingPipe_callback($matches) {
685 $head = $matches[1];
686 $underline = $matches[2];
687 $content = $matches[3];
688
689 # Remove leading pipe for each row.
690 $content = preg_replace('/^ *[|]/m', '', $content);
691
692 return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
693 }
694 function _doTable_callback($matches) {
695 $head = $matches[1];
696 $underline = $matches[2];
697 $content = $matches[3];
698
699 # Remove any tailing pipes for each line.
700 $head = preg_replace('/[|] *$/m', '', $head);
701 $underline = preg_replace('/[|] *$/m', '', $underline);
702 $content = preg_replace('/[|] *$/m', '', $content);
703
704 # Reading alignement from header underline.
705 $separators = preg_split('/ *[|] */', $underline);
706 foreach ($separators as $n => $s) {
707 if (preg_match('/^ *-+: *$/', $s)) $attr[$n] = ' align="right"';
708 else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"';
709 else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"';
710 else $attr[$n] = '';
711 }
712
713 # Parsing span elements, including code spans, character escapes,
714 # and inline HTML tags, so that pipes inside those gets ignored.
715 $head = $this->parseSpan($head);
716 $headers = preg_split('/ *[|] */', $head);
717 $col_count = count($headers);
718
719 # Write column headers.
720 $text = "<table>\n";
721 $text .= "<thead>\n";
722 $text .= "<tr>\n";
723 foreach ($headers as $n => $header)
724 $text .= " <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
725 $text .= "</tr>\n";
726 $text .= "</thead>\n";
727
728 # Split content by row.
729 $rows = explode("\n", trim($content, "\n"));
730
731 $text .= "<tbody>\n";
732 foreach ($rows as $row) {
733 # Parsing span elements, including code spans, character escapes,
734 # and inline HTML tags, so that pipes inside those gets ignored.
735 $row = $this->parseSpan($row);
736
737 # Split row by cell.
738 $row_cells = preg_split('/ *[|] */', $row, $col_count);
739 $row_cells = array_pad($row_cells, $col_count, '');
740
741 $text .= "<tr>\n";
742 foreach ($row_cells as $n => $cell)
743 $text .= " <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
744 $text .= "</tr>\n";
745 }
746 $text .= "</tbody>\n";
747 $text .= "</table>";
748
749 return $this->hashBlock($text) . "\n";
750 }
751
752
753 function doDefLists($text) {
754 #
755 # Form HTML definition lists.
756 #
757 $less_than_tab = $this->tab_width - 1;
758
759 # Re-usable pattern to match any entire dl list:
760 $whole_list_re = '(?>
761 ( # $1 = whole list
762 ( # $2
763 [ ]{0,'.$less_than_tab.'}
764 ((?>.*\S.*\n)+) # $3 = defined term
765 \n?
766 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
767 )
768 (?s:.+?)
769 ( # $4
770 \z
771 |
772 \n{2,}
773 (?=\S)
774 (?! # Negative lookahead for another term
775 [ ]{0,'.$less_than_tab.'}
776 (?: \S.*\n )+? # defined term
777 \n?
778 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
779 )
780 (?! # Negative lookahead for another definition
781 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
782 )
783 )
784 )
785 )'; // mx
786
787 $text = preg_replace_callback('{
788 (?>\A\n?|(?<=\n\n))
789 '.$whole_list_re.'
790 }mx',
791 array(&$this, '_doDefLists_callback'), $text);
792
793 return $text;
794 }
795 function _doDefLists_callback($matches) {
796 # Re-usable patterns to match list item bullets and number markers:
797 $list = $matches[1];
798
799 # Turn double returns into triple returns, so that we can make a
800 # paragraph for the last item in a list, if necessary:
801 $result = trim($this->processDefListItems($list));
802 $result = "<dl>\n" . $result . "\n</dl>";
803 return $this->hashBlock($result) . "\n\n";
804 }
805
806
807 function processDefListItems($list_str) {
808 #
809 # Process the contents of a single definition list, splitting it
810 # into individual term and definition list items.
811 #
812 $less_than_tab = $this->tab_width - 1;
813
814 # trim trailing blank lines:
815 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
816
817 # Process definition terms.
818 $list_str = preg_replace_callback('{
819 (?>\A\n?|\n\n+) # leading line
820 ( # definition terms = $1
821 [ ]{0,'.$less_than_tab.'} # leading whitespace
822 (?![:][ ]|[ ]) # negative lookahead for a definition
823 # mark (colon) or more whitespace.
824 (?> \S.* \n)+? # actual term (not whitespace).
825 )
826 (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed
827 # with a definition mark.
828 }xm',
829 array(&$this, '_processDefListItems_callback_dt'), $list_str);
830
831 # Process actual definitions.
832 $list_str = preg_replace_callback('{
833 \n(\n+)? # leading line = $1
834 ( # marker space = $2
835 [ ]{0,'.$less_than_tab.'} # whitespace before colon
836 [:][ ]+ # definition mark (colon)
837 )
838 ((?s:.+?)) # definition text = $3
839 (?= \n+ # stop at next definition mark,
840 (?: # next term or end of text
841 [ ]{0,'.$less_than_tab.'} [:][ ] |
842 <dt> | \z
843 )
844 )
845 }xm',
846 array(&$this, '_processDefListItems_callback_dd'), $list_str);
847
848 return $list_str;
849 }
850 function _processDefListItems_callback_dt($matches) {
851 $terms = explode("\n", trim($matches[1]));
852 $text = '';
853 foreach ($terms as $term) {
854 $term = $this->runSpanGamut(trim($term));
855 $text .= "\n<dt>" . $term . "</dt>";
856 }
857 return $text . "\n";
858 }
859 function _processDefListItems_callback_dd($matches) {
860 $leading_line = $matches[1];
861 $marker_space = $matches[2];
862 $def = $matches[3];
863
864 if ($leading_line || preg_match('/\n{2,}/', $def)) {
865 # Replace marker with the appropriate whitespace indentation
866 $def = str_repeat(' ', strlen($marker_space)) . $def;
867 $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
868 $def = "\n". $def ."\n";
869 }
870 else {
871 $def = rtrim($def);
872 $def = $this->runSpanGamut($this->outdent($def));
873 }
874
875 return "\n<dd>" . $def . "</dd>\n";
876 }
877
878 /**
879 * Adding the fenced code block syntax to regular Markdown:
880 *
881 * ~~~
882 * Code block
883 * ~~~
884 *
885 * ```php
886 * <?php echo "php!"
887 * ```
888 *
889 * @param string $text
890 */
891 protected function doFencedCodeBlocks($text)
892 {
893 $less_than_tab = $this->tab_width;
894
895 $text = preg_replace_callback('{
896 (?:\n|\A)
897 # 1: Opening marker
898 (
899 [~`]{3,} # Marker: three tilde or more. // @olvlv: ~ or `
900 )
901 (\S*)[ ]* \n # Whitespace and newline following marker. // @olvlv code block type
902
903 # 2: Content
904 (
905 (?>
906 (?!\1 [ ]* \n) # Not a closing marker.
907 .*\n+
908 )+
909 )
910
911 # Closing marker.
912 \1 [ ]* \n
913 }xm',
914 array($this, '_doFencedCodeBlocks_callback'), $text);
915
916 return $text;
917 }
918
919 protected function _doFencedCodeBlocks_callback($matches) // @olvlv update to support "type"
920 {
921 $class = $matches[2];
922 $codeblock = $matches[3];
923
924 if ($class)
925 {
926 $codeblock = $this->format_codeblock($codeblock, $class);
927 $codeblock = $this->unhash($codeblock);
928 }
929 else
930 {
931 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
932 $codeblock = preg_replace_callback('/^\n+/', array($this, '_doFencedCodeBlocks_newlines'), $codeblock);
933 }
934
935 $codeblock = '<pre' . ($class ? ' class="' . $class . '"' : '') . '><code>' . $codeblock . '</code></pre>';
936
937 return "\n\n".$this->hashBlock($codeblock)."\n\n";
938 }
939
940 function _doFencedCodeBlocks_newlines($matches)
941 {
942 return str_repeat("<br$this->empty_element_suffix", strlen($matches[0]));
943 }
944
945
946 #
947 # Redefining emphasis markers so that emphasis by underscore does not
948 # work in the middle of a word.
949 #
950 var $em_relist = array(
951 '' => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?=\S|$)(?![\.,:;]\s)',
952 '*' => '(?<=\S|^)(?<!\*)\*(?!\*)',
953 '_' => '(?<=\S|^)(?<!_)_(?![a-zA-Z0-9_])',
954 );
955 var $strong_relist = array(
956 '' => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?=\S|$)(?![\.,:;]\s)',
957 '**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)',
958 '__' => '(?<=\S|^)(?<!_)__(?![a-zA-Z0-9_])',
959 );
960 var $em_strong_relist = array(
961 '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?=\S|$)(?![\.,:;]\s)',
962 '***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)',
963 '___' => '(?<=\S|^)(?<!_)___(?![a-zA-Z0-9_])',
964 );
965
966
967 function formParagraphs($text) {
968 #
969 # Params:
970 # $text - string to process with html <p> tags
971 #
972 # Strip leading and trailing lines:
973 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
974
975 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
976
977 #
978 # Wrap <p> tags and unhashify HTML blocks
979 #
980 foreach ($grafs as $key => $value) {
981 $value = trim($this->runSpanGamut($value));
982
983 # Check if this should be enclosed in a paragraph.
984 # Clean tag hashes & block tag hashes are left alone.
985 $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
986
987 if ($is_p) {
988 $value = "<p>$value</p>";
989 }
990 $grafs[$key] = $value;
991 }
992
993 # Join grafs in one text, then unhash HTML tags.
994 $text = implode("\n\n", $grafs);
995
996 # Finish by removing any tag hashes still present in $text.
997 $text = $this->unhash($text);
998
999 return $text;
1000 }
1001
1002
1003 ### Footnotes
1004
1005 function stripFootnotes($text) {
1006 #
1007 # Strips link definitions from text, stores the URLs and titles in
1008 # hash references.
1009 #
1010 $less_than_tab = $this->tab_width - 1;
1011
1012 # Link defs are in the form: [^id]: url "optional title"
1013 $text = preg_replace_callback('{
1014 ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?: # note_id = $1
1015 [ ]*
1016 \n? # maybe *one* newline
1017 ( # text = $2 (no blank lines allowed)
1018 (?:
1019 .+ # actual text
1020 |
1021 \n # newlines but
1022 (?!\[\^.+?\]:\s)# negative lookahead for footnote marker.
1023 (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
1024 # by non-indented content
1025 )*
1026 )
1027 }xm',
1028 array(&$this, '_stripFootnotes_callback'),
1029 $text);
1030 return $text;
1031 }
1032 function _stripFootnotes_callback($matches) {
1033 $note_id = $this->fn_id_prefix . $matches[1];
1034 $this->footnotes[$note_id] = $this->outdent($matches[2]);
1035 return ''; # String that will replace the block
1036 }
1037
1038
1039 function doFootnotes($text) {
1040 #
1041 # Replace footnote references in $text [^id] with a special text-token
1042 # which will be replaced by the actual footnote marker in appendFootnotes.
1043 #
1044 if (!$this->in_anchor) {
1045 $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
1046 }
1047 return $text;
1048 }
1049
1050
1051 function appendFootnotes($text) {
1052 #
1053 # Append footnote list to text.
1054 #
1055 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
1056 array(&$this, '_appendFootnotes_callback'), $text);
1057
1058 if (!empty($this->footnotes_ordered)) {
1059 $text .= "\n\n";
1060 $text .= "<div class=\"footnotes\">\n";
1061 $text .= "<hr". $this->empty_element_suffix ."\n";
1062 $text .= "<ol>\n\n";
1063
1064 $attr = " rev=\"footnote\"";
1065 if ($this->fn_backlink_class != "") {
1066 $class = $this->fn_backlink_class;
1067 $class = $this->encodeAttribute($class);
1068 $attr .= " class=\"$class\"";
1069 }
1070 if ($this->fn_backlink_title != "") {
1071 $title = $this->fn_backlink_title;
1072 $title = $this->encodeAttribute($title);
1073 $attr .= " title=\"$title\"";
1074 }
1075 $num = 0;
1076
1077 while (!empty($this->footnotes_ordered)) {
1078 $footnote = reset($this->footnotes_ordered);
1079 $note_id = key($this->footnotes_ordered);
1080 unset($this->footnotes_ordered[$note_id]);
1081
1082 $footnote .= "\n"; # Need to append newline before parsing.
1083 $footnote = $this->runBlockGamut("$footnote\n");
1084 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
1085 array(&$this, '_appendFootnotes_callback'), $footnote);
1086
1087 $attr = str_replace("%%", ++$num, $attr);
1088 $note_id = $this->encodeAttribute($note_id);
1089
1090 # Add backlink to last paragraph; create new paragraph if needed.
1091 $backlink = "<a href=\"#fnref:$note_id\"$attr>↩</a>";
1092 if (preg_match('{</p>$}', $footnote)) {
1093 $footnote = substr($footnote, 0, -4) . " $backlink</p>";
1094 } else {
1095 $footnote .= "\n\n<p>$backlink</p>";
1096 }
1097
1098 $text .= "<li id=\"fn:$note_id\">\n";
1099 $text .= $footnote . "\n";
1100 $text .= "</li>\n\n";
1101 }
1102
1103 $text .= "</ol>\n";
1104 $text .= "</div>";
1105 }
1106 return $text;
1107 }
1108 function _appendFootnotes_callback($matches) {
1109 $node_id = $this->fn_id_prefix . $matches[1];
1110
1111 # Create footnote marker only if it has a corresponding footnote *and*
1112 # the footnote hasn't been used by another marker.
1113 if (isset($this->footnotes[$node_id])) {
1114 # Transfert footnote content to the ordered list.
1115 $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
1116 unset($this->footnotes[$node_id]);
1117
1118 $num = $this->footnote_counter++;
1119 $attr = " rel=\"footnote\"";
1120 if ($this->fn_link_class != "") {
1121 $class = $this->fn_link_class;
1122 $class = $this->encodeAttribute($class);
1123 $attr .= " class=\"$class\"";
1124 }
1125 if ($this->fn_link_title != "") {
1126 $title = $this->fn_link_title;
1127 $title = $this->encodeAttribute($title);
1128 $attr .= " title=\"$title\"";
1129 }
1130
1131 $attr = str_replace("%%", $num, $attr);
1132 $node_id = $this->encodeAttribute($node_id);
1133
1134 return
1135 "<sup id=\"fnref:$node_id\">".
1136 "<a href=\"#fn:$node_id\"$attr>$num</a>".
1137 "</sup>";
1138 }
1139
1140 return "[^".$matches[1]."]";
1141 }
1142
1143
1144 ### Abbreviations ###
1145
1146 function stripAbbreviations($text) {
1147 #
1148 # Strips abbreviations from text, stores titles in hash references.
1149 #
1150 $less_than_tab = $this->tab_width - 1;
1151
1152 # Link defs are in the form: [id]*: url "optional title"
1153 $text = preg_replace_callback('{
1154 ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?: # abbr_id = $1
1155 (.*) # text = $2 (no blank lines allowed)
1156 }xm',
1157 array(&$this, '_stripAbbreviations_callback'),
1158 $text);
1159 return $text;
1160 }
1161 function _stripAbbreviations_callback($matches) {
1162 $abbr_word = $matches[1];
1163 $abbr_desc = $matches[2];
1164 if ($this->abbr_word_re)
1165 $this->abbr_word_re .= '|';
1166 $this->abbr_word_re .= preg_quote($abbr_word);
1167 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
1168 return ''; # String that will replace the block
1169 }
1170
1171
1172 function doAbbreviations($text) {
1173 #
1174 # Find defined abbreviations in text and wrap them in <abbr> elements.
1175 #
1176 if ($this->abbr_word_re) {
1177 // cannot use the /x modifier because abbr_word_re may
1178 // contain significant spaces:
1179 $text = preg_replace_callback('{'.
1180 '(?<![\w\x1A])'.
1181 '(?:'.$this->abbr_word_re.')'.
1182 '(?![\w\x1A])'.
1183 '}',
1184 array(&$this, '_doAbbreviations_callback'), $text);
1185 }
1186 return $text;
1187 }
1188 function _doAbbreviations_callback($matches) {
1189 $abbr = $matches[0];
1190 if (isset($this->abbr_desciptions[$abbr])) {
1191 $desc = $this->abbr_desciptions[$abbr];
1192 if (empty($desc)) {
1193 return $this->hashPart("<abbr>$abbr</abbr>");
1194 } else {
1195 $desc = $this->encodeAttribute($desc);
1196 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
1197 }
1198 } else {
1199 return $matches[0];
1200 }
1201 }
1202
1203 }
1204
1205
1206 /*
1207
1208 PHP Markdown Extra
1209 ==================
1210
1211 Description
1212 -----------
1213
1214 This is a PHP port of the original Markdown formatter written in Perl
1215 by John Gruber. This special "Extra" version of PHP Markdown features
1216 further enhancements to the syntax for making additional constructs
1217 such as tables and definition list.
1218
1219 Markdown is a text-to-HTML filter; it translates an easy-to-read /
1220 easy-to-write structured text format into HTML. Markdown's text format
1221 is most similar to that of plain text email, and supports features such
1222 as headers, *emphasis*, code blocks, blockquotes, and links.
1223
1224 Markdown's syntax is designed not as a generic markup language, but
1225 specifically to serve as a front-end to (X)HTML. You can use span-level
1226 HTML tags anywhere in a Markdown document, and you can use block level
1227 HTML tags (like <div> and <table> as well).
1228
1229 For more information about Markdown's syntax, see:
1230
1231 <http://daringfireball.net/projects/markdown/>
1232
1233
1234 Bugs
1235 ----
1236
1237 To file bug reports please send email to:
1238
1239 <michel.fortin@michelf.com>
1240
1241 Please include with your report: (1) the example input; (2) the output you
1242 expected; (3) the output Markdown actually produced.
1243
1244
1245 Version History
1246 ---------------
1247
1248 See the readme file for detailed release notes for this version.
1249
1250
1251 Copyright and License
1252 ---------------------
1253
1254 PHP Markdown & Extra
1255 Copyright (c) 2004-2009 Michel Fortin
1256 <http://michelf.com/>
1257 All rights reserved.
1258
1259 Based on Markdown
1260 Copyright (c) 2003-2006 John Gruber
1261 <http://daringfireball.net/>
1262 All rights reserved.
1263
1264 Redistribution and use in source and binary forms, with or without
1265 modification, are permitted provided that the following conditions are
1266 met:
1267
1268 * Redistributions of source code must retain the above copyright notice,
1269 this list of conditions and the following disclaimer.
1270
1271 * Redistributions in binary form must reproduce the above copyright
1272 notice, this list of conditions and the following disclaimer in the
1273 documentation and/or other materials provided with the distribution.
1274
1275 * Neither the name "Markdown" nor the names of its contributors may
1276 be used to endorse or promote products derived from this software
1277 without specific prior written permission.
1278
1279 This software is provided by the copyright holders and contributors "as
1280 is" and any express or implied warranties, including, but not limited
1281 to, the implied warranties of merchantability and fitness for a
1282 particular purpose are disclaimed. In no event shall the copyright owner
1283 or contributors be liable for any direct, indirect, incidental, special,
1284 exemplary, or consequential damages (including, but not limited to,
1285 procurement of substitute goods or services; loss of use, data, or
1286 profits; or business interruption) however caused and on any theory of
1287 liability, whether in contract, strict liability, or tort (including
1288 negligence or otherwise) arising in any way out of the use of this
1289 software, even if advised of the possibility of such damage.
1290
1291 */