File vendor/icybee/patron/lib/markdown/markdown_extras.php

   1 <?php
   2 #
   3 # Markdown Extra  -  A text-to-HTML conversion tool for web writers
   4 #
   5 # PHP Markdown & Extra
   6 # Copyright (c) 2004-2012 Michel Fortin  
   7 # <http://michelf.com/projects/php-markdown/>
   8 #
   9 # Original Markdown
  10 # Copyright (c) 2004-2006 John Gruber  
  11 # <http://daringfireball.net/projects/markdown/>
  12 #
  13 
  14 /**
  15  * Markdown Extra Parser Class
  16  */
  17 class MarkdownExtra_Parser extends Markdown_Parser
  18 {
  19     # Prefix for footnote ids.
  20     var $fn_id_prefix = "";
  21 
  22     # Optional title attribute for footnote links and backlinks.
  23     var $fn_link_title = MARKDOWN_FN_LINK_TITLE;
  24     var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE;
  25 
  26     # Optional class attribute for footnote links and backlinks.
  27     var $fn_link_class = MARKDOWN_FN_LINK_CLASS;
  28     var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS;
  29 
  30     # Predefined abbreviations.
  31     var $predef_abbr = array();
  32 
  33 
  34     function MarkdownExtra_Parser() {
  35     #
  36     # Constructor function. Initialize the parser object.
  37     #
  38         # Add extra escapable characters before parent constructor
  39         # initialize the table.
  40         $this->escape_chars .= ':|';
  41 
  42         # Insert extra document, block, and span transformations.
  43         # Parent constructor will do the sorting.
  44         $this->document_gamut += array(
  45             //"doFencedCodeBlocks" => 5,
  46             "stripFootnotes"     => 15,
  47             "stripAbbreviations" => 25,
  48             "appendFootnotes"    => 50,
  49             );
  50         $this->block_gamut += array(
  51             //"doFencedCodeBlocks" => 5,
  52             "doTables"           => 15,
  53             "doDefLists"         => 45,
  54             );
  55         $this->span_gamut += array(
  56             "doFootnotes"        => 5,
  57             "doAbbreviations"    => 70,
  58             );
  59 
  60         parent::Markdown_Parser();
  61     }
  62 
  63 
  64     # Extra variables used during extra transformations.
  65     var $footnotes = array();
  66     var $footnotes_ordered = array();
  67     var $abbr_desciptions = array();
  68     var $abbr_word_re = '';
  69 
  70     # Give the current footnote number.
  71     var $footnote_counter = 1;
  72 
  73 
  74     function setup() {
  75     #
  76     # Setting up Extra-specific variables.
  77     #
  78         parent::setup();
  79 
  80         $this->footnotes = array();
  81         $this->footnotes_ordered = array();
  82         $this->abbr_desciptions = array();
  83         $this->abbr_word_re = '';
  84         $this->footnote_counter = 1;
  85 
  86         foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
  87             if ($this->abbr_word_re)
  88                 $this->abbr_word_re .= '|';
  89             $this->abbr_word_re .= preg_quote($abbr_word);
  90             $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
  91         }
  92     }
  93 
  94     function teardown() {
  95     #
  96     # Clearing Extra-specific variables.
  97     #
  98         $this->footnotes = array();
  99         $this->footnotes_ordered = array();
 100         $this->abbr_desciptions = array();
 101         $this->abbr_word_re = '';
 102 
 103         parent::teardown();
 104     }
 105 
 106 
 107     ### HTML Block Parser ###
 108 
 109     # Tags that are always treated as block tags:
 110     var $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend';
 111 
 112     # Tags treated as block tags only if the opening tag is alone on it's line:
 113     var $context_block_tags_re = 'script|noscript|math|ins|del';
 114 
 115     # Tags where markdown="1" default to span mode:
 116     var $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
 117 
 118     # Tags which must not have their contents modified, no matter where
 119     # they appear:
 120     var $clean_tags_re = 'script|math';
 121 
 122     # Tags that do not need to be closed.
 123     var $auto_close_tags_re = 'hr|img';
 124 
 125 
 126     function hashHTMLBlocks($text) {
 127     #
 128     # Hashify HTML Blocks and "clean tags".
 129     #
 130     # We only want to do this for block-level HTML tags, such as headers,
 131     # lists, and tables. That's because we still want to wrap <p>s around
 132     # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
 133     # phrase emphasis, and spans. The list of tags we're looking for is
 134     # hard-coded.
 135     #
 136     # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
 137     # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
 138     # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
 139     #  _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
 140     # These two functions are calling each other. It's recursive!
 141     #
 142         #
 143         # Call the HTML-in-Markdown hasher.
 144         #
 145         list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
 146 
 147         return $text;
 148     }
 149     function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
 150                                         $enclosing_tag_re = '', $span = false)
 151     {
 152     #
 153     # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
 154     #
 155     # *   $indent is the number of space to be ignored when checking for code
 156     #     blocks. This is important because if we don't take the indent into
 157     #     account, something like this (which looks right) won't work as expected:
 158     #
 159     #     <div>
 160     #         <div markdown="1">
 161     #         Hello World.  <-- Is this a Markdown code block or text?
 162     #         </div>  <-- Is this a Markdown code block or a real tag?
 163     #     <div>
 164     #
 165     #     If you don't like this, just don't indent the tag on which
 166     #     you apply the markdown="1" attribute.
 167     #
 168     # *   If $enclosing_tag_re is not empty, stops at the first unmatched closing
 169     #     tag with that name. Nested tags supported.
 170     #
 171     # *   If $span is true, text inside must treated as span. So any double
 172     #     newline will be replaced by a single newline so that it does not create
 173     #     paragraphs.
 174     #
 175     # Returns an array of that form: ( processed text , remaining text )
 176     #
 177         if ($text === '') return array('', '');
 178 
 179         # Regex to check for the presense of newlines around a block tag.
 180         $newline_before_re = '/(?:^\n?|\n\n)*$/';
 181         $newline_after_re =
 182             '{
 183                 ^                       # Start of text following the tag.
 184                 (?>[ ]*<!--.*?-->)?     # Optional comment.
 185                 [ ]*\n                  # Must be followed by newline.
 186             }xs';
 187 
 188         # Regex to match any tag.
 189         $block_tag_re =
 190             '{
 191                 (                   # $2: Capture hole tag.
 192                     </?                 # Any opening or closing tag.
 193                         (?>             # Tag name.
 194                             '.$this->block_tags_re.'            |
 195                             '.$this->context_block_tags_re.'    |
 196                             '.$this->clean_tags_re.'            |
 197                             (?!\s)'.$enclosing_tag_re.'
 198                         )
 199                         (?:
 200                             (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
 201                             (?>
 202                                 ".*?"       |   # Double quotes (can contain `>`)
 203                                 \'.*?\'     |   # Single quotes (can contain `>`)
 204                                 .+?             # Anything but quotes and `>`.
 205                             )*?
 206                         )?
 207                     >                   # End of tag.
 208                 |
 209                     <!--    .*?     --> # HTML Comment
 210                 |
 211                     <\?.*?\?> | <%.*?%> # Processing instruction
 212                 |
 213                     <!\[CDATA\[.*?\]\]> # CData Block
 214                 |
 215                     # Code span marker
 216                     `+
 217                 '. ( !$span ? ' # If not in span.
 218                 |
 219                     # Indented code block
 220                     (?: ^[ ]*\n | ^ | \n[ ]*\n )
 221                     [ ]{'.($indent+4).'}[^\n]* \n
 222                     (?>
 223                         (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n
 224                     )*
 225                 |
 226                     # Fenced code block marker
 227                     (?> ^ | \n )
 228                     [ ]{0,'.($indent).'}~~~+[ ]*\n
 229                 ' : '' ). ' # End (if not is span).
 230                 )
 231             }xs';
 232 
 233 
 234         $depth = 0;     # Current depth inside the tag tree.
 235         $parsed = "";   # Parsed text that will be returned.
 236 
 237         #
 238         # Loop through every tag until we find the closing tag of the parent
 239         # or loop until reaching the end of text if no parent tag specified.
 240         #
 241         do {
 242             #
 243             # Split the text using the first $tag_match pattern found.
 244             # Text before  pattern will be first in the array, text after
 245             # pattern will be at the end, and between will be any catches made
 246             # by the pattern.
 247             #
 248             $parts = preg_split($block_tag_re, $text, 2,
 249                                 PREG_SPLIT_DELIM_CAPTURE);
 250 
 251             # If in Markdown span mode, add a empty-string span-level hash
 252             # after each newline to prevent triggering any block element.
 253             if ($span) {
 254                 $void = $this->hashPart("", ':');
 255                 $newline = "$void\n";
 256                 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
 257             }
 258 
 259             $parsed .= $parts[0]; # Text before current tag.
 260 
 261             # If end of $text has been reached. Stop loop.
 262             if (count($parts) < 3) {
 263                 $text = "";
 264                 break;
 265             }
 266 
 267             $tag  = $parts[1]; # Tag to handle.
 268             $text = $parts[2]; # Remaining text after current tag.
 269             $tag_re = preg_quote($tag); # For use in a regular expression.
 270 
 271             #
 272             # Check for: Code span marker
 273             #
 274             if ($tag{0} == "`") {
 275                 # Find corresponding end marker.
 276                 $tag_re = preg_quote($tag);
 277                 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',
 278                     $text, $matches))
 279                 {
 280                     # End marker found: pass text unchanged until marker.
 281                     $parsed .= $tag . $matches[0];
 282                     $text = substr($text, strlen($matches[0]));
 283                 }
 284                 else {
 285                     # Unmatched marker: just skip it.
 286                     $parsed .= $tag;
 287                 }
 288             }
 289             #
 290             # Check for: Fenced code block marker.
 291             #
 292             else if (preg_match('{^\n?[ ]{0,'.($indent+3).'}~}', $tag)) {
 293                 # Fenced code block marker: find matching end marker.
 294                 $tag_re = preg_quote(trim($tag));
 295                 if (preg_match('{^(?>.*\n)+?[ ]{0,'.($indent).'}'.$tag_re.'[ ]*\n}', $text, 
 296                     $matches)) 
 297                 {
 298                     # End marker found: pass text unchanged until marker.
 299                     $parsed .= $tag . $matches[0];
 300                     $text = substr($text, strlen($matches[0]));
 301                 }
 302                 else {
 303                     # No end marker: just skip it.
 304                     $parsed .= $tag;
 305                 }
 306             }
 307             #
 308             # Check for: Indented code block.
 309             #
 310             else if ($tag{0} == "\n" || $tag{0} == " ") {
 311                 # Indented code block: pass it unchanged, will be handled 
 312                 # later.
 313                 $parsed .= $tag;
 314             }
 315             #
 316             # Check for: Opening Block level tag or
 317             #            Opening Context Block tag (like ins and del)
 318             #               used as a block tag (tag is alone on it's line).
 319             #
 320             else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) ||
 321                 (   preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) &&
 322                     preg_match($newline_before_re, $parsed) &&
 323                     preg_match($newline_after_re, $text)    )
 324                 )
 325             {
 326                 # Need to parse tag and following text using the HTML parser.
 327                 list($block_text, $text) =
 328                     $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
 329 
 330                 # Make sure it stays outside of any paragraph by adding newlines.
 331                 $parsed .= "\n\n$block_text\n\n";
 332             }
 333             #
 334             # Check for: Clean tag (like script, math)
 335             #            HTML Comments, processing instructions.
 336             #
 337             else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) ||
 338                 $tag{1} == '!' || $tag{1} == '?')
 339             {
 340                 # Need to parse tag and following text using the HTML parser.
 341                 # (don't check for markdown attribute)
 342                 list($block_text, $text) =
 343                     $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
 344 
 345                 $parsed .= $block_text;
 346             }
 347             #
 348             # Check for: Tag with same name as enclosing tag.
 349             #
 350             else if ($enclosing_tag_re !== '' &&
 351                 # Same name as enclosing tag.
 352                 preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag))
 353             {
 354                 #
 355                 # Increase/decrease nested tag count.
 356                 #
 357                 if ($tag{1} == '/')                     $depth--;
 358                 else if ($tag{strlen($tag)-2} != '/')   $depth++;
 359 
 360                 if ($depth < 0) {
 361                     #
 362                     # Going out of parent element. Clean up and break so we
 363                     # return to the calling function.
 364                     #
 365                     $text = $tag . $text;
 366                     break;
 367                 }
 368 
 369                 $parsed .= $tag;
 370             }
 371             else {
 372                 $parsed .= $tag;
 373             }
 374         } while ($depth >= 0);
 375 
 376         return array($parsed, $text);
 377     }
 378     function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
 379     #
 380     # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
 381     #
 382     # *   Calls $hash_method to convert any blocks.
 383     # *   Stops when the first opening tag closes.
 384     # *   $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
 385     #     (it is not inside clean tags)
 386     #
 387     # Returns an array of that form: ( processed text , remaining text )
 388     #
 389         if ($text === '') return array('', '');
 390 
 391         # Regex to match `markdown` attribute inside of a tag.
 392         $markdown_attr_re = '
 393             {
 394                 \s*         # Eat whitespace before the `markdown` attribute
 395                 markdown
 396                 \s*=\s*
 397                 (?>
 398                     (["\'])     # $1: quote delimiter
 399                     (.*?)       # $2: attribute value
 400                     \1          # matching delimiter
 401                 |
 402                     ([^\s>]*)   # $3: unquoted attribute value
 403                 )
 404                 ()              # $4: make $3 always defined (avoid warnings)
 405             }xs';
 406 
 407         # Regex to match any tag.
 408         $tag_re = '{
 409                 (                   # $2: Capture hole tag.
 410                     </?                 # Any opening or closing tag.
 411                         [\w:$]+         # Tag name.
 412                         (?:
 413                             (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
 414                             (?>
 415                                 ".*?"       |   # Double quotes (can contain `>`)
 416                                 \'.*?\'     |   # Single quotes (can contain `>`)
 417                                 .+?             # Anything but quotes and `>`.
 418                             )*?
 419                         )?
 420                     >                   # End of tag.
 421                 |
 422                     <!--    .*?     --> # HTML Comment
 423                 |
 424                     <\?.*?\?> | <%.*?%> # Processing instruction
 425                 |
 426                     <!\[CDATA\[.*?\]\]> # CData Block
 427                 )
 428             }xs';
 429 
 430         $original_text = $text;     # Save original text in case of faliure.
 431 
 432         $depth      = 0;    # Current depth inside the tag tree.
 433         $block_text = "";   # Temporary text holder for current text.
 434         $parsed     = "";   # Parsed text that will be returned.
 435 
 436         #
 437         # Get the name of the starting tag.
 438         # (This pattern makes $base_tag_name_re safe without quoting.)
 439         #
 440         if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
 441             $base_tag_name_re = $matches[1];
 442 
 443         #
 444         # Loop through every tag until we find the corresponding closing tag.
 445         #
 446         do {
 447             #
 448             # Split the text using the first $tag_match pattern found.
 449             # Text before  pattern will be first in the array, text after
 450             # pattern will be at the end, and between will be any catches made
 451             # by the pattern.
 452             #
 453             $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
 454 
 455             if (count($parts) < 3) {
 456                 #
 457                 # End of $text reached with unbalenced tag(s).
 458                 # In that case, we return original text unchanged and pass the
 459                 # first character as filtered to prevent an infinite loop in the
 460                 # parent function.
 461                 #
 462                 return array($original_text{0}, substr($original_text, 1));
 463             }
 464 
 465             $block_text .= $parts[0]; # Text before current tag.
 466             $tag         = $parts[1]; # Tag to handle.
 467             $text        = $parts[2]; # Remaining text after current tag.
 468 
 469             #
 470             # Check for: Auto-close tag (like <hr/>)
 471             #            Comments and Processing Instructions.
 472             #
 473             if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) ||
 474                 $tag{1} == '!' || $tag{1} == '?')
 475             {
 476                 # Just add the tag to the block as if it was text.
 477                 $block_text .= $tag;
 478             }
 479             else {
 480                 #
 481                 # Increase/decrease nested tag count. Only do so if
 482                 # the tag's name match base tag's.
 483                 #
 484                 if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) {
 485                     if ($tag{1} == '/')                     $depth--;
 486                     else if ($tag{strlen($tag)-2} != '/')   $depth++;
 487                 }
 488 
 489                 #
 490                 # Check for `markdown="1"` attribute and handle it.
 491                 #
 492                 if ($md_attr &&
 493                     preg_match($markdown_attr_re, $tag, $attr_m) &&
 494                     preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
 495                 {
 496                     # Remove `markdown` attribute from opening tag.
 497                     $tag = preg_replace($markdown_attr_re, '', $tag);
 498 
 499                     # Check if text inside this tag must be parsed in span mode.
 500                     $this->mode = $attr_m[2] . $attr_m[3];
 501                     $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
 502                         preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag);
 503 
 504                     # Calculate indent before tag.
 505                     if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
 506                         $strlen = $this->utf8_strlen;
 507                         $indent = $strlen($matches[1], 'UTF-8');
 508                     } else {
 509                         $indent = 0;
 510                     }
 511 
 512                     # End preceding block with this tag.
 513                     $block_text .= $tag;
 514                     $parsed .= $this->$hash_method($block_text);
 515 
 516                     # Get enclosing tag name for the ParseMarkdown function.
 517                     # (This pattern makes $tag_name_re safe without quoting.)
 518                     preg_match('/^<([\w:$]*)\b/', $tag, $matches);
 519                     $tag_name_re = $matches[1];
 520 
 521                     # Parse the content using the HTML-in-Markdown parser.
 522                     list ($block_text, $text)
 523                         = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
 524                             $tag_name_re, $span_mode);
 525 
 526                     # Outdent markdown text.
 527                     if ($indent > 0) {
 528                         $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
 529                                                     $block_text);
 530                     }
 531 
 532                     # Append tag content to parsed text.
 533                     if (!$span_mode)    $parsed .= "\n\n$block_text\n\n";
 534                     else                $parsed .= "$block_text";
 535 
 536                     # Start over a new block.
 537                     $block_text = "";
 538                 }
 539                 else $block_text .= $tag;
 540             }
 541 
 542         } while ($depth > 0);
 543 
 544         #
 545         # Hash last block text that wasn't processed inside the loop.
 546         #
 547         $parsed .= $this->$hash_method($block_text);
 548 
 549         return array($parsed, $text);
 550     }
 551 
 552 
 553     function hashClean($text) {
 554     #
 555     # Called whenever a tag must be hashed when a function insert a "clean" tag
 556     # in $text, it pass through this function and is automaticaly escaped,
 557     # blocking invalid nested overlap.
 558     #
 559         return $this->hashPart($text, 'C');
 560     }
 561 
 562 
 563     function doHeaders($text) {
 564     #
 565     # Redefined to add id attribute support.
 566     #
 567         # Setext-style headers:
 568         #     Header 1  {#header1}
 569         #     ========
 570         #
 571         #     Header 2  {#header2}
 572         #     --------
 573         #
 574         $text = preg_replace_callback(
 575             '{
 576                 (^.+?)                              # $1: Header text
 577                 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})?    # $2: Id attribute
 578                 [ ]*\n(=+|-+)[ ]*\n+                # $3: Header footer
 579             }mx',
 580             array(&$this, '_doHeaders_callback_setext'), $text);
 581 
 582         # atx-style headers:
 583         #   # Header 1        {#header1}
 584         #   ## Header 2       {#header2}
 585         #   ## Header 2 with closing hashes ##  {#header3}
 586         #   ...
 587         #   ###### Header 6   {#header2}
 588         #
 589         $text = preg_replace_callback('{
 590                 ^(\#{1,6})  # $1 = string of #\'s
 591                 [ ]*
 592                 (.+?)       # $2 = Header text
 593                 [ ]*
 594                 \#*         # optional closing #\'s (not counted)
 595                 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # id attribute
 596                 [ ]*
 597                 \n+
 598             }xm',
 599             array(&$this, '_doHeaders_callback_atx'), $text);
 600 
 601         return $text;
 602     }
 603     function _doHeaders_attr($attr) {
 604         if (empty($attr))  return "";
 605         return " id=\"$attr\"";
 606     }
 607     function _doHeaders_callback_setext($matches) {
 608         if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))
 609             return $matches[0];
 610         $level = $matches[3]{0} == '=' ? 1 : 2;
 611         $attr  = $this->_doHeaders_attr($id =& $matches[2]);
 612         $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
 613         return "\n" . $this->hashBlock($block) . "\n\n";
 614     }
 615     function _doHeaders_callback_atx($matches) {
 616         $level = strlen($matches[1]);
 617         $attr  = $this->_doHeaders_attr($id =& $matches[3]);
 618         $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
 619         return "\n" . $this->hashBlock($block) . "\n\n";
 620     }
 621 
 622 
 623     function doTables($text) {
 624     #
 625     # Form HTML tables.
 626     #
 627         $less_than_tab = $this->tab_width - 1;
 628         #
 629         # Find tables with leading pipe.
 630         #
 631         #   | Header 1 | Header 2
 632         #   | -------- | --------
 633         #   | Cell 1   | Cell 2
 634         #   | Cell 3   | Cell 4
 635         #
 636         $text = preg_replace_callback('
 637             {
 638                 ^                           # Start of a line
 639                 [ ]{0,'.$less_than_tab.'}   # Allowed whitespace.
 640                 [|]                         # Optional leading pipe (present)
 641                 (.+) \n                     # $1: Header row (at least one pipe)
 642 
 643                 [ ]{0,'.$less_than_tab.'}   # Allowed whitespace.
 644                 [|] ([ ]*[-:]+[-| :]*) \n   # $2: Header underline
 645 
 646                 (                           # $3: Cells
 647                     (?>
 648                         [ ]*                # Allowed whitespace.
 649                         [|] .* \n           # Row content.
 650                     )*
 651                 )
 652                 (?=\n|\Z)                   # Stop at final double newline.
 653             }xm',
 654             array(&$this, '_doTable_leadingPipe_callback'), $text);
 655 
 656         #
 657         # Find tables without leading pipe.
 658         #
 659         #   Header 1 | Header 2
 660         #   -------- | --------
 661         #   Cell 1   | Cell 2
 662         #   Cell 3   | Cell 4
 663         #
 664         $text = preg_replace_callback('
 665             {
 666                 ^                           # Start of a line
 667                 [ ]{0,'.$less_than_tab.'}   # Allowed whitespace.
 668                 (\S.*[|].*) \n              # $1: Header row (at least one pipe)
 669 
 670                 [ ]{0,'.$less_than_tab.'}   # Allowed whitespace.
 671                 ([-:]+[ ]*[|][-| :]*) \n    # $2: Header underline
 672 
 673                 (                           # $3: Cells
 674                     (?>
 675                         .* [|] .* \n        # Row content
 676                     )*
 677                 )
 678                 (?=\n|\Z)                   # Stop at final double newline.
 679             }xm',
 680             array(&$this, '_DoTable_callback'), $text);
 681 
 682         return $text;
 683     }
 684     function _doTable_leadingPipe_callback($matches) {
 685         $head       = $matches[1];
 686         $underline  = $matches[2];
 687         $content    = $matches[3];
 688 
 689         # Remove leading pipe for each row.
 690         $content    = preg_replace('/^ *[|]/m', '', $content);
 691 
 692         return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
 693     }
 694     function _doTable_callback($matches) {
 695         $head       = $matches[1];
 696         $underline  = $matches[2];
 697         $content    = $matches[3];
 698 
 699         # Remove any tailing pipes for each line.
 700         $head       = preg_replace('/[|] *$/m', '', $head);
 701         $underline  = preg_replace('/[|] *$/m', '', $underline);
 702         $content    = preg_replace('/[|] *$/m', '', $content);
 703 
 704         # Reading alignement from header underline.
 705         $separators = preg_split('/ *[|] */', $underline);
 706         foreach ($separators as $n => $s) {
 707             if (preg_match('/^ *-+: *$/', $s))      $attr[$n] = ' align="right"';
 708             else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"';
 709             else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"';
 710             else                                    $attr[$n] = '';
 711         }
 712 
 713         # Parsing span elements, including code spans, character escapes,
 714         # and inline HTML tags, so that pipes inside those gets ignored.
 715         $head       = $this->parseSpan($head);
 716         $headers    = preg_split('/ *[|] */', $head);
 717         $col_count  = count($headers);
 718 
 719         # Write column headers.
 720         $text = "<table>\n";
 721         $text .= "<thead>\n";
 722         $text .= "<tr>\n";
 723         foreach ($headers as $n => $header)
 724             $text .= "  <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
 725         $text .= "</tr>\n";
 726         $text .= "</thead>\n";
 727 
 728         # Split content by row.
 729         $rows = explode("\n", trim($content, "\n"));
 730 
 731         $text .= "<tbody>\n";
 732         foreach ($rows as $row) {
 733             # Parsing span elements, including code spans, character escapes,
 734             # and inline HTML tags, so that pipes inside those gets ignored.
 735             $row = $this->parseSpan($row);
 736 
 737             # Split row by cell.
 738             $row_cells = preg_split('/ *[|] */', $row, $col_count);
 739             $row_cells = array_pad($row_cells, $col_count, '');
 740 
 741             $text .= "<tr>\n";
 742             foreach ($row_cells as $n => $cell)
 743                 $text .= "  <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
 744             $text .= "</tr>\n";
 745         }
 746         $text .= "</tbody>\n";
 747         $text .= "</table>";
 748 
 749         return $this->hashBlock($text) . "\n";
 750     }
 751 
 752 
 753     function doDefLists($text) {
 754     #
 755     # Form HTML definition lists.
 756     #
 757         $less_than_tab = $this->tab_width - 1;
 758 
 759         # Re-usable pattern to match any entire dl list:
 760         $whole_list_re = '(?>
 761             (                               # $1 = whole list
 762               (                             # $2
 763                 [ ]{0,'.$less_than_tab.'}
 764                 ((?>.*\S.*\n)+)             # $3 = defined term
 765                 \n?
 766                 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
 767               )
 768               (?s:.+?)
 769               (                             # $4
 770                   \z
 771                 |
 772                   \n{2,}
 773                   (?=\S)
 774                   (?!                       # Negative lookahead for another term
 775                     [ ]{0,'.$less_than_tab.'}
 776                     (?: \S.*\n )+?          # defined term
 777                     \n?
 778                     [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
 779                   )
 780                   (?!                       # Negative lookahead for another definition
 781                     [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
 782                   )
 783               )
 784             )
 785         )'; // mx
 786 
 787         $text = preg_replace_callback('{
 788                 (?>\A\n?|(?<=\n\n))
 789                 '.$whole_list_re.'
 790             }mx',
 791             array(&$this, '_doDefLists_callback'), $text);
 792 
 793         return $text;
 794     }
 795     function _doDefLists_callback($matches) {
 796         # Re-usable patterns to match list item bullets and number markers:
 797         $list = $matches[1];
 798 
 799         # Turn double returns into triple returns, so that we can make a
 800         # paragraph for the last item in a list, if necessary:
 801         $result = trim($this->processDefListItems($list));
 802         $result = "<dl>\n" . $result . "\n</dl>";
 803         return $this->hashBlock($result) . "\n\n";
 804     }
 805 
 806 
 807     function processDefListItems($list_str) {
 808     #
 809     #   Process the contents of a single definition list, splitting it
 810     #   into individual term and definition list items.
 811     #
 812         $less_than_tab = $this->tab_width - 1;
 813 
 814         # trim trailing blank lines:
 815         $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
 816 
 817         # Process definition terms.
 818         $list_str = preg_replace_callback('{
 819             (?>\A\n?|\n\n+)                 # leading line
 820             (                               # definition terms = $1
 821                 [ ]{0,'.$less_than_tab.'}   # leading whitespace
 822                 (?![:][ ]|[ ])              # negative lookahead for a definition
 823                                             #   mark (colon) or more whitespace.
 824                 (?> \S.* \n)+?              # actual term (not whitespace).
 825             )
 826             (?=\n?[ ]{0,3}:[ ])             # lookahead for following line feed
 827                                             #   with a definition mark.
 828             }xm',
 829             array(&$this, '_processDefListItems_callback_dt'), $list_str);
 830 
 831         # Process actual definitions.
 832         $list_str = preg_replace_callback('{
 833             \n(\n+)?                        # leading line = $1
 834             (                               # marker space = $2
 835                 [ ]{0,'.$less_than_tab.'}   # whitespace before colon
 836                 [:][ ]+                     # definition mark (colon)
 837             )
 838             ((?s:.+?))                      # definition text = $3
 839             (?= \n+                         # stop at next definition mark,
 840                 (?:                         # next term or end of text
 841                     [ ]{0,'.$less_than_tab.'} [:][ ]    |
 842                     <dt> | \z
 843                 )
 844             )
 845             }xm',
 846             array(&$this, '_processDefListItems_callback_dd'), $list_str);
 847 
 848         return $list_str;
 849     }
 850     function _processDefListItems_callback_dt($matches) {
 851         $terms = explode("\n", trim($matches[1]));
 852         $text = '';
 853         foreach ($terms as $term) {
 854             $term = $this->runSpanGamut(trim($term));
 855             $text .= "\n<dt>" . $term . "</dt>";
 856         }
 857         return $text . "\n";
 858     }
 859     function _processDefListItems_callback_dd($matches) {
 860         $leading_line   = $matches[1];
 861         $marker_space   = $matches[2];
 862         $def            = $matches[3];
 863 
 864         if ($leading_line || preg_match('/\n{2,}/', $def)) {
 865             # Replace marker with the appropriate whitespace indentation
 866             $def = str_repeat(' ', strlen($marker_space)) . $def;
 867             $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
 868             $def = "\n". $def ."\n";
 869         }
 870         else {
 871             $def = rtrim($def);
 872             $def = $this->runSpanGamut($this->outdent($def));
 873         }
 874 
 875         return "\n<dd>" . $def . "</dd>\n";
 876     }
 877 
 878     /**
 879      * Adding the fenced code block syntax to regular Markdown:
 880      *
 881      * ~~~
 882      * Code block
 883      * ~~~
 884      *
 885      * ```php
 886      * <?php echo "php!"
 887      * ```
 888      *
 889      * @param string $text
 890      */
 891     protected function doFencedCodeBlocks($text)
 892     {
 893         $less_than_tab = $this->tab_width;
 894 
 895         $text = preg_replace_callback('{
 896                 (?:\n|\A)
 897                 # 1: Opening marker
 898                 (
 899                     [~`]{3,} # Marker: three tilde or more. // @olvlv: ~ or `
 900                 )
 901                 (\S*)[ ]* \n # Whitespace and newline following marker. // @olvlv code block type
 902 
 903                 # 2: Content
 904                 (
 905                     (?>
 906                         (?!\1 [ ]* \n)  # Not a closing marker.
 907                         .*\n+
 908                     )+
 909                 )
 910 
 911                 # Closing marker.
 912                 \1 [ ]* \n
 913             }xm',
 914             array($this, '_doFencedCodeBlocks_callback'), $text);
 915 
 916         return $text;
 917     }
 918 
 919     protected function _doFencedCodeBlocks_callback($matches) // @olvlv update to support "type"
 920     {
 921         $class = $matches[2];
 922         $codeblock = $matches[3];
 923 
 924         if ($class)
 925         {
 926             $codeblock = $this->format_codeblock($codeblock, $class);
 927             $codeblock = $this->unhash($codeblock);
 928         }
 929         else
 930         {
 931             $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
 932             $codeblock = preg_replace_callback('/^\n+/', array($this, '_doFencedCodeBlocks_newlines'), $codeblock);
 933         }
 934 
 935         $codeblock = '<pre' . ($class ? ' class="' . $class . '"' : '') . '><code>' . $codeblock . '</code></pre>';
 936 
 937         return "\n\n".$this->hashBlock($codeblock)."\n\n";
 938     }
 939 
 940     function _doFencedCodeBlocks_newlines($matches)
 941     {
 942         return str_repeat("<br$this->empty_element_suffix", strlen($matches[0]));
 943     }
 944 
 945 
 946     #
 947     # Redefining emphasis markers so that emphasis by underscore does not
 948     # work in the middle of a word.
 949     #
 950     var $em_relist = array(
 951         ''  => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?=\S|$)(?![\.,:;]\s)',
 952         '*' => '(?<=\S|^)(?<!\*)\*(?!\*)',
 953         '_' => '(?<=\S|^)(?<!_)_(?![a-zA-Z0-9_])',
 954         );
 955     var $strong_relist = array(
 956         ''   => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?=\S|$)(?![\.,:;]\s)',
 957         '**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)',
 958         '__' => '(?<=\S|^)(?<!_)__(?![a-zA-Z0-9_])',
 959         );
 960     var $em_strong_relist = array(
 961         ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?=\S|$)(?![\.,:;]\s)',
 962         '***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)',
 963         '___' => '(?<=\S|^)(?<!_)___(?![a-zA-Z0-9_])',
 964         );
 965 
 966 
 967     function formParagraphs($text) {
 968     #
 969     #   Params:
 970     #       $text - string to process with html <p> tags
 971     #
 972         # Strip leading and trailing lines:
 973         $text = preg_replace('/\A\n+|\n+\z/', '', $text);
 974 
 975         $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
 976 
 977         #
 978         # Wrap <p> tags and unhashify HTML blocks
 979         #
 980         foreach ($grafs as $key => $value) {
 981             $value = trim($this->runSpanGamut($value));
 982 
 983             # Check if this should be enclosed in a paragraph.
 984             # Clean tag hashes & block tag hashes are left alone.
 985             $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
 986 
 987             if ($is_p) {
 988                 $value = "<p>$value</p>";
 989             }
 990             $grafs[$key] = $value;
 991         }
 992 
 993         # Join grafs in one text, then unhash HTML tags.
 994         $text = implode("\n\n", $grafs);
 995 
 996         # Finish by removing any tag hashes still present in $text.
 997         $text = $this->unhash($text);
 998 
 999         return $text;
1000     }
1001 
1002 
1003     ### Footnotes
1004 
1005     function stripFootnotes($text) {
1006     #
1007     # Strips link definitions from text, stores the URLs and titles in
1008     # hash references.
1009     #
1010         $less_than_tab = $this->tab_width - 1;
1011 
1012         # Link defs are in the form: [^id]: url "optional title"
1013         $text = preg_replace_callback('{
1014             ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?:  # note_id = $1
1015               [ ]*
1016               \n?                   # maybe *one* newline
1017             (                       # text = $2 (no blank lines allowed)
1018                 (?:
1019                     .+              # actual text
1020                 |
1021                     \n              # newlines but
1022                     (?!\[\^.+?\]:\s)# negative lookahead for footnote marker.
1023                     (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
1024                                     # by non-indented content
1025                 )*
1026             )
1027             }xm',
1028             array(&$this, '_stripFootnotes_callback'),
1029             $text);
1030         return $text;
1031     }
1032     function _stripFootnotes_callback($matches) {
1033         $note_id = $this->fn_id_prefix . $matches[1];
1034         $this->footnotes[$note_id] = $this->outdent($matches[2]);
1035         return ''; # String that will replace the block
1036     }
1037 
1038 
1039     function doFootnotes($text) {
1040     #
1041     # Replace footnote references in $text [^id] with a special text-token
1042     # which will be replaced by the actual footnote marker in appendFootnotes.
1043     #
1044         if (!$this->in_anchor) {
1045             $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
1046         }
1047         return $text;
1048     }
1049 
1050 
1051     function appendFootnotes($text) {
1052     #
1053     # Append footnote list to text.
1054     #
1055         $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
1056             array(&$this, '_appendFootnotes_callback'), $text);
1057 
1058         if (!empty($this->footnotes_ordered)) {
1059             $text .= "\n\n";
1060             $text .= "<div class=\"footnotes\">\n";
1061             $text .= "<hr". $this->empty_element_suffix ."\n";
1062             $text .= "<ol>\n\n";
1063 
1064             $attr = " rev=\"footnote\"";
1065             if ($this->fn_backlink_class != "") {
1066                 $class = $this->fn_backlink_class;
1067                 $class = $this->encodeAttribute($class);
1068                 $attr .= " class=\"$class\"";
1069             }
1070             if ($this->fn_backlink_title != "") {
1071                 $title = $this->fn_backlink_title;
1072                 $title = $this->encodeAttribute($title);
1073                 $attr .= " title=\"$title\"";
1074             }
1075             $num = 0;
1076 
1077             while (!empty($this->footnotes_ordered)) {
1078                 $footnote = reset($this->footnotes_ordered);
1079                 $note_id = key($this->footnotes_ordered);
1080                 unset($this->footnotes_ordered[$note_id]);
1081 
1082                 $footnote .= "\n"; # Need to append newline before parsing.
1083                 $footnote = $this->runBlockGamut("$footnote\n");
1084                 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
1085                     array(&$this, '_appendFootnotes_callback'), $footnote);
1086 
1087                 $attr = str_replace("%%", ++$num, $attr);
1088                 $note_id = $this->encodeAttribute($note_id);
1089 
1090                 # Add backlink to last paragraph; create new paragraph if needed.
1091                 $backlink = "<a href=\"#fnref:$note_id\"$attr>&#8617;</a>";
1092                 if (preg_match('{</p>$}', $footnote)) {
1093                     $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
1094                 } else {
1095                     $footnote .= "\n\n<p>$backlink</p>";
1096                 }
1097 
1098                 $text .= "<li id=\"fn:$note_id\">\n";
1099                 $text .= $footnote . "\n";
1100                 $text .= "</li>\n\n";
1101             }
1102 
1103             $text .= "</ol>\n";
1104             $text .= "</div>";
1105         }
1106         return $text;
1107     }
1108     function _appendFootnotes_callback($matches) {
1109         $node_id = $this->fn_id_prefix . $matches[1];
1110 
1111         # Create footnote marker only if it has a corresponding footnote *and*
1112         # the footnote hasn't been used by another marker.
1113         if (isset($this->footnotes[$node_id])) {
1114             # Transfert footnote content to the ordered list.
1115             $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
1116             unset($this->footnotes[$node_id]);
1117 
1118             $num = $this->footnote_counter++;
1119             $attr = " rel=\"footnote\"";
1120             if ($this->fn_link_class != "") {
1121                 $class = $this->fn_link_class;
1122                 $class = $this->encodeAttribute($class);
1123                 $attr .= " class=\"$class\"";
1124             }
1125             if ($this->fn_link_title != "") {
1126                 $title = $this->fn_link_title;
1127                 $title = $this->encodeAttribute($title);
1128                 $attr .= " title=\"$title\"";
1129             }
1130 
1131             $attr = str_replace("%%", $num, $attr);
1132             $node_id = $this->encodeAttribute($node_id);
1133 
1134             return
1135                 "<sup id=\"fnref:$node_id\">".
1136                 "<a href=\"#fn:$node_id\"$attr>$num</a>".
1137                 "</sup>";
1138         }
1139 
1140         return "[^".$matches[1]."]";
1141     }
1142 
1143 
1144     ### Abbreviations ###
1145 
1146     function stripAbbreviations($text) {
1147     #
1148     # Strips abbreviations from text, stores titles in hash references.
1149     #
1150         $less_than_tab = $this->tab_width - 1;
1151 
1152         # Link defs are in the form: [id]*: url "optional title"
1153         $text = preg_replace_callback('{
1154             ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?:  # abbr_id = $1
1155             (.*)                    # text = $2 (no blank lines allowed)
1156             }xm',
1157             array(&$this, '_stripAbbreviations_callback'),
1158             $text);
1159         return $text;
1160     }
1161     function _stripAbbreviations_callback($matches) {
1162         $abbr_word = $matches[1];
1163         $abbr_desc = $matches[2];
1164         if ($this->abbr_word_re)
1165             $this->abbr_word_re .= '|';
1166         $this->abbr_word_re .= preg_quote($abbr_word);
1167         $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
1168         return ''; # String that will replace the block
1169     }
1170 
1171 
1172     function doAbbreviations($text) {
1173     #
1174     # Find defined abbreviations in text and wrap them in <abbr> elements.
1175     #
1176         if ($this->abbr_word_re) {
1177             // cannot use the /x modifier because abbr_word_re may
1178             // contain significant spaces:
1179             $text = preg_replace_callback('{'.
1180                 '(?<![\w\x1A])'.
1181                 '(?:'.$this->abbr_word_re.')'.
1182                 '(?![\w\x1A])'.
1183                 '}',
1184                 array(&$this, '_doAbbreviations_callback'), $text);
1185         }
1186         return $text;
1187     }
1188     function _doAbbreviations_callback($matches) {
1189         $abbr = $matches[0];
1190         if (isset($this->abbr_desciptions[$abbr])) {
1191             $desc = $this->abbr_desciptions[$abbr];
1192             if (empty($desc)) {
1193                 return $this->hashPart("<abbr>$abbr</abbr>");
1194             } else {
1195                 $desc = $this->encodeAttribute($desc);
1196                 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
1197             }
1198         } else {
1199             return $matches[0];
1200         }
1201     }
1202 
1203 }
1204 
1205 
1206 /*
1207 
1208 PHP Markdown Extra
1209 ==================
1210 
1211 Description
1212 -----------
1213 
1214 This is a PHP port of the original Markdown formatter written in Perl
1215 by John Gruber. This special "Extra" version of PHP Markdown features
1216 further enhancements to the syntax for making additional constructs
1217 such as tables and definition list.
1218 
1219 Markdown is a text-to-HTML filter; it translates an easy-to-read /
1220 easy-to-write structured text format into HTML. Markdown's text format
1221 is most similar to that of plain text email, and supports features such
1222 as headers, *emphasis*, code blocks, blockquotes, and links.
1223 
1224 Markdown's syntax is designed not as a generic markup language, but
1225 specifically to serve as a front-end to (X)HTML. You can use span-level
1226 HTML tags anywhere in a Markdown document, and you can use block level
1227 HTML tags (like <div> and <table> as well).
1228 
1229 For more information about Markdown's syntax, see:
1230 
1231 <http://daringfireball.net/projects/markdown/>
1232 
1233 
1234 Bugs
1235 ----
1236 
1237 To file bug reports please send email to:
1238 
1239 <michel.fortin@michelf.com>
1240 
1241 Please include with your report: (1) the example input; (2) the output you
1242 expected; (3) the output Markdown actually produced.
1243 
1244 
1245 Version History
1246 ---------------
1247 
1248 See the readme file for detailed release notes for this version.
1249 
1250 
1251 Copyright and License
1252 ---------------------
1253 
1254 PHP Markdown & Extra
1255 Copyright (c) 2004-2009 Michel Fortin
1256 <http://michelf.com/>
1257 All rights reserved.
1258 
1259 Based on Markdown
1260 Copyright (c) 2003-2006 John Gruber
1261 <http://daringfireball.net/>
1262 All rights reserved.
1263 
1264 Redistribution and use in source and binary forms, with or without
1265 modification, are permitted provided that the following conditions are
1266 met:
1267 
1268 *   Redistributions of source code must retain the above copyright notice,
1269     this list of conditions and the following disclaimer.
1270 
1271 *   Redistributions in binary form must reproduce the above copyright
1272     notice, this list of conditions and the following disclaimer in the
1273     documentation and/or other materials provided with the distribution.
1274 
1275 *   Neither the name "Markdown" nor the names of its contributors may
1276     be used to endorse or promote products derived from this software
1277     without specific prior written permission.
1278 
1279 This software is provided by the copyright holders and contributors "as
1280 is" and any express or implied warranties, including, but not limited
1281 to, the implied warranties of merchantability and fitness for a
1282 particular purpose are disclaimed. In no event shall the copyright owner
1283 or contributors be liable for any direct, indirect, incidental, special,
1284 exemplary, or consequential damages (including, but not limited to,
1285 procurement of substitute goods or services; loss of use, data, or
1286 profits; or business interruption) however caused and on any theory of
1287 liability, whether in contract, strict liability, or tort (including
1288 negligence or otherwise) arising in any way out of the use of this
1289 software, even if advised of the possibility of such damage.
1290 
1291 */
Namespaces

Classes

Functions