Autodoc
  • Namespace
  • Class
  • Tree

Namespaces

  • BlueTihi
    • Context
  • Brickrouge
    • Element
      • Nodes
    • Renderer
    • Widget
  • ICanBoogie
    • ActiveRecord
    • AutoConfig
    • CLDR
    • Composer
    • Core
    • Event
    • Exception
    • HTTP
      • Dispatcher
      • Request
    • I18n
      • Translator
    • Mailer
    • Modules
      • Taxonomy
        • Support
      • Thumbnailer
        • Versions
    • Object
    • Operation
      • Dispatcher
    • Prototype
    • Routes
    • Routing
      • Dispatcher
    • Session
  • Icybee
    • ActiveRecord
      • Model
    • ConfigOperation
    • Document
    • EditBlock
    • Element
      • ActionbarContextual
      • ActionbarSearch
      • ActionbarToolbar
    • FormBlock
    • Installer
    • ManageBlock
    • Modules
      • Articles
      • Cache
        • Collection
        • ManageBlock
      • Comments
        • ManageBlock
      • Contents
        • ManageBlock
      • Dashboard
      • Editor
        • Collection
      • Files
        • File
        • ManageBlock
      • Forms
        • Form
        • ManageBlock
      • I18n
      • Images
        • ManageBlock
      • Members
      • Modules
        • ManageBlock
      • Nodes
        • ManageBlock
        • Module
      • Pages
        • BreadcrumbElement
        • LanguagesElement
        • ManageBlock
        • NavigationBranchElement
        • NavigationElement
        • Page
        • PageController
      • Registry
      • Search
      • Seo
      • Sites
        • ManageBlock
      • Taxonomy
        • Terms
          • ManageBlock
        • Vocabulary
          • ManageBlock
      • Users
        • ManageBlock
        • NonceLogin
        • Roles
      • Views
        • ActiveRecordProvider
        • Collection
        • View
    • Operation
      • ActiveRecord
      • Constructor
      • Module
      • Widget
    • Rendering
  • None
  • Patron
  • PHP

Classes

  • AdminDecorator
  • AdminIndexController
  • BlockController
  • BlockDecorator
  • ConfigBlock
  • ConfigController
  • ConfigOperation
  • Core
  • DeleteBlock
  • DeleteController
  • Document
  • DocumentDecorator
  • EditBlock
  • EditController
  • FormBlock
  • Hooks
  • InterlockBlock
  • Kses
  • ManageBlock
  • Module
  • Modules
  • StatsDecorator

Constants

  • OPERATION_SAVE_MODE
  • OPERATION_SAVE_MODE_CONTINUE
  • OPERATION_SAVE_MODE_DISPLAY
  • OPERATION_SAVE_MODE_LIST
  • OPERATION_SAVE_MODE_NEW

Functions

  • slugize
  • start
  • strip_stopwords
   1 <?php
   2 
   3 namespace Icybee;
   4 
   5 /**
   6  * HTML/XHTML filter that only allows some elements and attributes
   7  *
   8  * Added wp_ prefix to avoid conflicts with existing kses users
   9  *
  10  * @version 0.2.2
  11  * @copyright (C) 2002, 2003, 2005
  12  * @author Ulf Harnhammar <metaur@users.sourceforge.net>
  13  *
  14  * @package External
  15  * @subpackage KSES
  16  *
  17  * @internal
  18  * *** CONTACT INFORMATION ***
  19  * E-mail:      metaur at users dot sourceforge dot net
  20  * Web page:    http://sourceforge.net/projects/kses
  21  * Paper mail:  Ulf Harnhammar
  22  *              Ymergatan 17 C
  23  *              753 25  Uppsala
  24  *              SWEDEN
  25  *
  26  * [kses strips evil scripts!]
  27  */
  28 
  29 
  30 class Kses
  31 {
  32     const VERSION = '0.2.2';
  33 
  34     /**
  35      * Kses allowed HTML elements.
  36      *
  37      * @global array $default_allowed_comment_tags
  38      * @since 1.0.0
  39      */
  40 
  41     static public $default_allowed_comment_tags = array
  42     (
  43         'a' => array
  44         (
  45             'href' => array(),
  46             'title' => array()
  47         ),
  48 
  49         'abbr' => array
  50         (
  51             'title' => array()
  52         ),
  53 
  54         'acronym' => array
  55         (
  56             'title' => array ()
  57         ),
  58 
  59         'b' => array(),
  60 
  61         'blockquote' => array
  62         (
  63             'cite' => array()
  64         ),
  65 
  66         'br' => array(),
  67         'cite' => array (),
  68         'code' => array
  69         (
  70             'class' => array()
  71         ),
  72         'del' => array
  73         (
  74             'datetime' => array ()
  75         ),
  76         //  'dd' => array(),
  77         //  'dl' => array(),
  78         //  'dt' => array(),
  79         'em' => array (),
  80         'i' => array (),
  81         'img' => array
  82         (
  83             'alt' => array (),
  84             'align' => array (),
  85             //'border' => array (),
  86             'class' => array (),
  87             'height' => array (),
  88             //'hspace' => array (),
  89             //'longdesc' => array (),
  90             //'vspace' => array (),
  91             'src' => array (),
  92             //'style' => array (),
  93             'width' => array ()
  94         ),
  95 
  96         'ins' => array
  97         (
  98             'datetime' => array(), 'cite' => array()
  99         ),
 100         'li' => array(),
 101         'ol' => array(),
 102         'p' => array(),
 103         'pre' => array('class' => array()),
 104         'q' => array
 105         (
 106             'cite' => array ()
 107         ),
 108         'span' => array
 109         (
 110             'class' => array()
 111         ),
 112         'strike' => array(),
 113         'strong' => array(),
 114         //  'sub' => array(),
 115         //  'sup' => array(),
 116         //  'u' => array(),
 117         'ul' => array()
 118     );
 119 
 120     /**
 121      * Kses global for default allowable HTML tags.
 122      *
 123      * @global array $allowedposttags
 124      * @since 2.0.0
 125      */
 126 
 127     static public $default_allowed_article_tags = array
 128     (
 129         'address' => array(),
 130         'a' => array(
 131             'class' => array (),
 132             'href' => array (),
 133             'id' => array (),
 134             'title' => array (),
 135             'rel' => array (),
 136             'rev' => array (),
 137             'name' => array (),
 138             'target' => array()),
 139         'abbr' => array(
 140             'class' => array (),
 141             'title' => array ()),
 142         'acronym' => array(
 143             'title' => array ()),
 144         'b' => array(),
 145         'big' => array(),
 146         'blockquote' => array(
 147             'id' => array (),
 148             'cite' => array (),
 149             'class' => array(),
 150             'lang' => array(),
 151             'xml:lang' => array()),
 152         'br' => array (
 153             'class' => array ()),
 154         'button' => array(
 155             'disabled' => array (),
 156             'name' => array (),
 157             'type' => array (),
 158             'value' => array ()),
 159         'caption' => array(
 160             'align' => array (),
 161             'class' => array ()),
 162         'cite' => array (
 163             'class' => array(),
 164             'dir' => array(),
 165             'lang' => array(),
 166             'title' => array ()),
 167         'code' => array (
 168             'style' => array()),
 169         'col' => array(
 170             'align' => array (),
 171             'char' => array (),
 172             'charoff' => array (),
 173             'span' => array (),
 174             'dir' => array(),
 175             'style' => array (),
 176             'valign' => array (),
 177             'width' => array ()),
 178         'del' => array(
 179             'datetime' => array ()),
 180         'dd' => array(),
 181         'div' => array(
 182             'align' => array (),
 183             'class' => array (),
 184             'dir' => array (),
 185             'lang' => array(),
 186             'style' => array (),
 187             'xml:lang' => array()),
 188         'dl' => array(),
 189         'dt' => array(),
 190         'em' => array(),
 191         'fieldset' => array(),
 192         'font' => array(
 193             'color' => array (),
 194             'face' => array (),
 195             'size' => array ()),
 196         'form' => array(
 197             'action' => array (),
 198             'accept' => array (),
 199             'accept-charset' => array (),
 200             'enctype' => array (),
 201             'method' => array (),
 202             'name' => array (),
 203             'target' => array ()),
 204         'h1' => array(
 205             'align' => array (),
 206             'class' => array (),
 207             'id'    => array (),
 208             'style' => array ()),
 209         'h2' => array (
 210             'align' => array (),
 211             'class' => array (),
 212             'id'    => array (),
 213             'style' => array ()),
 214         'h3' => array (
 215             'align' => array (),
 216             'class' => array (),
 217             'id'    => array (),
 218             'style' => array ()),
 219         'h4' => array (
 220             'align' => array (),
 221             'class' => array (),
 222             'id'    => array (),
 223             'style' => array ()),
 224         'h5' => array (
 225             'align' => array (),
 226             'class' => array (),
 227             'id'    => array (),
 228             'style' => array ()),
 229         'h6' => array (
 230             'align' => array (),
 231             'class' => array (),
 232             'id'    => array (),
 233             'style' => array ()),
 234         'hr' => array (
 235             'align' => array (),
 236             'class' => array (),
 237             'noshade' => array (),
 238             'size' => array (),
 239             'width' => array ()),
 240         'i' => array(),
 241         'img' => array(
 242             'alt' => array (),
 243             'align' => array (),
 244             'border' => array (),
 245             'class' => array (),
 246             'height' => array (),
 247             'hspace' => array (),
 248             'longdesc' => array (),
 249             'vspace' => array (),
 250             'src' => array (),
 251             'style' => array (),
 252             'width' => array ()),
 253         'ins' => array(
 254             'datetime' => array (),
 255             'cite' => array ()),
 256         'kbd' => array(),
 257         'label' => array(
 258             'for' => array ()),
 259         'legend' => array(
 260             'align' => array ()),
 261         'li' => array (
 262             'align' => array (),
 263             'class' => array ()),
 264         'p' => array(
 265             'class' => array (),
 266             'align' => array (),
 267             'dir' => array(),
 268             'lang' => array(),
 269             'style' => array (),
 270             'xml:lang' => array()),
 271         'param' => array
 272         (
 273             'name' => true,
 274             'value' => true
 275         ),
 276         'pre' => array(
 277             'style' => array(),
 278             'width' => array ()),
 279         'q' => array(
 280             'cite' => array ()),
 281         's' => array(),
 282         'span' => array (
 283             'class' => array (),
 284             'dir' => array (),
 285             'align' => array (),
 286             'lang' => array (),
 287             'style' => array (),
 288             'title' => array (),
 289             'xml:lang' => array()),
 290         'strike' => array(),
 291         'strong' => array(),
 292         'sub' => array(),
 293         'sup' => array(),
 294         'table' => array(
 295             'align' => array (),
 296             'bgcolor' => array (),
 297             'border' => array (),
 298             'cellpadding' => array (),
 299             'cellspacing' => array (),
 300             'class' => array (),
 301             'dir' => array(),
 302             'id' => array(),
 303             'rules' => array (),
 304             'style' => array (),
 305             'summary' => array (),
 306             'width' => array ()),
 307         'tbody' => array(
 308             'align' => array (),
 309             'char' => array (),
 310             'charoff' => array (),
 311             'valign' => array ()),
 312         'td' => array(
 313             'abbr' => array (),
 314             'align' => array (),
 315             'axis' => array (),
 316             'bgcolor' => array (),
 317             'char' => array (),
 318             'charoff' => array (),
 319             'class' => array (),
 320             'colspan' => array (),
 321             'dir' => array(),
 322             'headers' => array (),
 323             'height' => array (),
 324             'nowrap' => array (),
 325             'rowspan' => array (),
 326             'scope' => array (),
 327             'style' => array (),
 328             'valign' => array (),
 329             'width' => array ()),
 330         'textarea' => array(
 331             'cols' => array (),
 332             'rows' => array (),
 333             'disabled' => array (),
 334             'name' => array (),
 335             'readonly' => array ()),
 336         'tfoot' => array(
 337             'align' => array (),
 338             'char' => array (),
 339             'class' => array (),
 340             'charoff' => array (),
 341             'valign' => array ()),
 342         'th' => array(
 343             'abbr' => array (),
 344             'align' => array (),
 345             'axis' => array (),
 346             'bgcolor' => array (),
 347             'char' => array (),
 348             'charoff' => array (),
 349             'class' => array (),
 350             'colspan' => array (),
 351             'headers' => array (),
 352             'height' => array (),
 353             'nowrap' => array (),
 354             'rowspan' => array (),
 355             'scope' => array (),
 356             'valign' => array (),
 357             'width' => array ()),
 358         'thead' => array(
 359             'align' => array (),
 360             'char' => array (),
 361             'charoff' => array (),
 362             'class' => array (),
 363             'valign' => array ()),
 364         'title' => array(),
 365         'tr' => array(
 366             'align' => array (),
 367             'bgcolor' => array (),
 368             'char' => array (),
 369             'charoff' => array (),
 370             'class' => array (),
 371             'style' => array (),
 372             'valign' => array ()),
 373         'tt' => array(),
 374         'u' => array(),
 375         'ul' => array (
 376             'class' => array (),
 377             'style' => array (),
 378             'type' => array ()),
 379         'object' => array
 380         (
 381             'width' => array(),
 382             'height' => array(),
 383             'align' => array(),
 384             'data' => array()
 385         ),
 386         'ol' => array (
 387             'class' => array (),
 388             'start' => array (),
 389             'style' => array (),
 390             'type' => array ()),
 391         'var' => array ());
 392 
 393     /**
 394      * Filters content and keeps only allowable HTML elements.
 395      *
 396      * This function makes sure that only the allowed HTML element names, attribute
 397      * names and attribute values plus only sane HTML entities will occur in
 398      * $string. You have to remove any slashes from PHP's magic quotes before you
 399      * call this function.
 400      *
 401      * The default allowed protocols are 'http', 'https', 'ftp', 'mailto', 'news',
 402      * 'irc', 'gopher', 'nntp', 'feed', and finally 'telnet. This covers all common
 403      * link protocols, except for 'javascript' which should not be allowed for
 404      * untrusted users.
 405      *
 406      * @since 1.0.0
 407      *
 408      * @param string $string Content to filter through kses
 409      * @param array $allowed_html List of allowed HTML elements
 410      * @param array $allowed_protocols Optional. Allowed protocol in links.
 411      * @return string Filtered content with only allowed HTML elements
 412      */
 413 
 414     static public function sanitize($string, $allowed_html=array(), $allowed_protocols = array('http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet'))
 415     {
 416         $string = self::no_null($string);
 417         $string = self::js_entities($string);
 418         $string = self::normalize_entities($string);
 419         $allowed_html_fixed = self::array_lc_keys($allowed_html);
 420 
 421         return self::split($string, $allowed_html_fixed, $allowed_protocols);
 422     }
 423 
 424     static public function sanitizeComment($str)
 425     {
 426         return self::sanitize($str, self::$default_allowed_comment_tags, array('http', 'https'));
 427         }
 428 
 429     static public function sanitizeArticle($str)
 430     {
 431         return self::sanitize($str, self::$default_allowed_article_tags, array('http', 'https'));
 432     }
 433 
 434     /**
 435      * Removes any NULL characters in $string.
 436      *
 437      * @since 1.0.0
 438      *
 439      * @param string $string
 440      * @return string
 441      */
 442 
 443     static function no_null($string)
 444     {
 445         $string = preg_replace('/\0+/', '', $string);
 446         $string = preg_replace('/(\\\\0)+/', '', $string);
 447 
 448         return $string;
 449     }
 450 
 451     /**
 452      * Removes the HTML JavaScript entities found in early versions of Netscape 4.
 453      *
 454      * @since 1.0.0
 455      *
 456      * @param string $string
 457      * @return string
 458      */
 459 
 460     static function js_entities($string)
 461     {
 462         return preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
 463     }
 464 
 465     /**
 466      * Converts and fixes HTML entities.
 467      *
 468      * This function normalizes HTML entities. It will convert "AT&T" to the correct
 469      * "AT&amp;T", "&#00058;" to "&#58;", "&#XYZZY;" to "&amp;#XYZZY;" and so on.
 470      *
 471      * @since 1.0.0
 472      *
 473      * @param string $string Content to normalize entities
 474      * @return string Content with normalized entities
 475      */
 476 
 477     static protected function normalize_entities($string)
 478     {
 479         # Disarm all entities by converting & to &amp;
 480 
 481         $string = str_replace('&', '&amp;', $string);
 482 
 483         # Change back the allowed entities in our entity whitelist
 484 
 485         $string = preg_replace('/&amp;([A-Za-z][A-Za-z0-9]{0,19});/', '&\\1;', $string);
 486         $string = preg_replace_callback('/&amp;#0*([0-9]{1,5});/', array(__CLASS__, 'normalize_entities_callback1'), $string);
 487         $string = preg_replace_callback('/&amp;#([Xx])0*(([0-9A-Fa-f]{2}){1,2});/', array(__CLASS__, 'normalize_entities_callback2'), $string);
 488 
 489         return $string;
 490     }
 491 
 492     /**
 493      * Callback for normalize_entities() regular expression.
 494      *
 495      * This function helps normalize_entities() to only accept 16 bit values
 496      * and nothing more for &#number; entities.
 497      *
 498      * @access private
 499      * @since 1.0.0
 500      *
 501      * @param array $matches preg_replace_callback() matches array
 502      * @return string Correctly encoded entity
 503      */
 504 
 505     static protected function normalize_entities_callback1($matches)
 506     {
 507         if (! isset($matches[1]) || empty($matches[1]) )
 508             return '';
 509 
 510         $i = $matches[1];
 511 
 512         return ( ( ! self::isUnicodeValid($i) ) || ($i > 65535) ? "&amp;#$i;" : "&#$i;" );
 513     }
 514 
 515     /**
 516      * Callback for normalize_entities() for regular expression.
 517      *
 518      * This function helps normalize_entities() to only accept valid Unicode
 519      * numeric entities in hex form.
 520      *
 521      * @access private
 522      *
 523      * @param array $matches preg_replace_callback() matches array
 524      * @return string Correctly encoded entity
 525      */
 526 
 527     static protected function normalize_entities_callback2($matches)
 528     {
 529         if ( ! isset($matches[2]) || empty($matches[2]) )
 530             return '';
 531 
 532         $hexchars = $matches[2];
 533 
 534         return ( ( ! self::isUnicodeValid(hexdec($hexchars)) ) ? "&amp;#x$hexchars;" : "&#x$hexchars;" );
 535     }
 536 
 537     /**
 538      * Handles parsing errors in hair().
 539      *
 540      * The general plan is to remove everything to and including some whitespace,
 541      * but it deals with quotes and apostrophes as well.
 542      *
 543      * @since 1.0.0
 544      *
 545      * @param string $string
 546      * @return string
 547      */
 548 
 549     static function html_error($string)
 550     {
 551         return preg_replace('/^("[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*/', '', $string);
 552     }
 553 
 554     /**
 555      * Goes through an array and changes the keys to all lower case.
 556      *
 557      * @since 1.0.0
 558      *
 559      * @param array $inarray Unfiltered array
 560      * @return array Fixed array with all lowercase keys
 561      */
 562 
 563     static public function array_lc_keys($inarray)
 564     {
 565         $outarray = array ();
 566 
 567         foreach ( (array) $inarray as $inkey => $inval) {
 568             $outkey = strtolower($inkey);
 569             $outarray[$outkey] = array ();
 570 
 571             foreach ( (array) $inval as $inkey2 => $inval2) {
 572                 $outkey2 = strtolower($inkey2);
 573                 $outarray[$outkey][$outkey2] = $inval2;
 574             } # foreach $inval
 575         } # foreach $inarray
 576 
 577         return $outarray;
 578     }
 579 
 580     /**
 581      * Helper function to determine if a Unicode value is valid.
 582      *
 583      * @param int $i Unicode value
 584      * @return bool true if the value was a valid Unicode number
 585      */
 586 
 587     static public function isUnicodeValid($i)
 588     {
 589         return ( $i == 0x9 || $i == 0xa || $i == 0xd ||
 590                 ($i >= 0x20 && $i <= 0xd7ff) ||
 591                 ($i >= 0xe000 && $i <= 0xfffd) ||
 592                 ($i >= 0x10000 && $i <= 0x10ffff) );
 593     }
 594 
 595     /**
 596      * Searches for HTML tags, no matter how malformed.
 597      *
 598      * It also matches stray ">" characters.
 599      *
 600      * @since 1.0.0
 601      *
 602      * @param string $string Content to filter
 603      * @param array $allowed_html Allowed HTML elements
 604      * @param array $allowed_protocols Allowed protocols to keep
 605      * @return string Content with fixed HTML tags
 606      */
 607 
 608     static protected $pass_allowed_html;
 609     static protected $pass_allowed_protocols;
 610 
 611     static function split($string, $allowed_html, $allowed_protocols)
 612     {
 613         self::$pass_allowed_html = $allowed_html;
 614         self::$pass_allowed_protocols = $allowed_protocols;
 615 
 616         return preg_replace_callback('%((<!--.*?(-->|$))|(<[^>]*(>|$)|>))%', array(__CLASS__, 'split_callback'), $string);
 617     }
 618 
 619     /**
 620      * Callback for wp_kses_split for fixing malformed HTML tags.
 621      *
 622      * This function does a lot of work. It rejects some very malformed things like
 623      * <:::>. It returns an empty string, if the element isn't allowed (look ma, no
 624      * strip_tags()!). Otherwise it splits the tag into an element and an attribute
 625      * list.
 626      *
 627      * After the tag is split into an element and an attribute list, it is run
 628      * through another filter which will remove illegal attributes and once that is
 629      * completed, will be returned.
 630      *
 631      * @access private
 632      * @since 1.0.0
 633      * @uses attr()
 634      *
 635      * @param string $string Content to filter
 636      * @param array $allowed_html Allowed HTML elements
 637      * @param array $allowed_protocols Allowed protocols to keep
 638      * @return string Fixed HTML element
 639      */
 640 
 641     static protected function split_callback($matches)
 642     {
 643         $string = $matches[1];
 644 
 645         $allowed_html = self::$pass_allowed_html;
 646         $allowed_protocols = self::$pass_allowed_protocols;
 647 
 648         $string = self::stripslashes($string);
 649 
 650         if (substr($string, 0, 1) != '<')
 651         {
 652             return '&gt;';
 653         }
 654 
 655         # It matched a ">" character
 656 
 657         if (preg_match('%^<!--(.*?)(-->)?$%', $string, $matches))
 658         {
 659             $string = str_replace(array('<!--', '-->'), '', $matches[1]);
 660             while ( $string != $newstring = self::sanitize($string, $allowed_html, $allowed_protocols) )
 661                 $string = $newstring;
 662             if ( $string == '' )
 663                 return '';
 664             // prevent multiple dashes in comments
 665             $string = preg_replace('/--+/', '-', $string);
 666             // prevent three dashes closing a comment
 667             $string = preg_replace('/-$/', '', $string);
 668             return "<!--{$string}-->";
 669         }
 670         # Allow HTML comments
 671 
 672         if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches))
 673             return '';
 674         # It's seriously malformed
 675 
 676         $slash = trim($matches[1]);
 677         $elem = $matches[2];
 678         $attrlist = $matches[3];
 679 
 680         if (!isset($allowed_html[strtolower($elem)]))
 681         {
 682             return '';
 683         }
 684 
 685         # They are using a not allowed HTML element
 686 
 687         if ($slash != '')
 688         {
 689             return "<$slash$elem>";
 690         }
 691 
 692         # No attributes are allowed for closing elements
 693 
 694         return self::attr("$slash$elem", $attrlist, $allowed_html, $allowed_protocols);
 695     }
 696 
 697     /**
 698      * Removes all attributes, if none are allowed for this element.
 699      *
 700      * If some are allowed it calls hair() to split them further, and then
 701      * it builds up new HTML code from the data that kses_hair() returns. It also
 702      * removes "<" and ">" characters, if there are any left. One more thing it does
 703      * is to check if the tag has a closing XHTML slash, and if it does, it puts one
 704      * in the returned code as well.
 705      *
 706      * @since 1.0.0
 707      *
 708      * @param string $element HTML element/tag
 709      * @param string $attr HTML attributes from HTML element to closing HTML element tag
 710      * @param array $allowed_html Allowed HTML elements
 711      * @param array $allowed_protocols Allowed protocols to keep
 712      * @return string Sanitized HTML element
 713      */
 714 
 715     static function attr($element, $attr, $allowed_html, $allowed_protocols)
 716     {
 717         # Is there a closing XHTML slash at the end of the attributes?
 718 
 719         $xhtml_slash = '';
 720         if (preg_match('%\s/\s*$%', $attr))
 721             $xhtml_slash = ' /';
 722 
 723         # Are any attributes allowed at all for this element?
 724 
 725         if (@ count($allowed_html[strtolower($element)]) == 0)
 726             return "<$element$xhtml_slash>";
 727 
 728         # Split it
 729 
 730         $attrarr = self::hair($attr, $allowed_protocols);
 731 
 732         # Go through $attrarr, and save the allowed attributes for this element
 733         # in $attr2
 734 
 735         $attr2 = '';
 736 
 737         foreach ($attrarr as $arreach) {
 738             if (!@ isset ($allowed_html[strtolower($element)][strtolower($arreach['name'])]))
 739                 continue; # the attribute is not allowed
 740 
 741             $current = $allowed_html[strtolower($element)][strtolower($arreach['name'])];
 742             if ($current == '')
 743                 continue; # the attribute is not allowed
 744 
 745             if (!is_array($current))
 746                 $attr2 .= ' '.$arreach['whole'];
 747             # there are no checks
 748 
 749             else {
 750                 # there are some checks
 751                 $ok = true;
 752                 foreach ($current as $currkey => $currval)
 753                     if (!self::check_attr_val($arreach['value'], $arreach['vless'], $currkey, $currval)) {
 754                         $ok = false;
 755                         break;
 756                     }
 757 
 758                 if ( $arreach['name'] == 'style' ) {
 759                     $orig_value = $arreach['value'];
 760 
 761                     $value = self::safecss_filter_attr($orig_value);
 762 
 763                     if ( empty($value) )
 764                         continue;
 765 
 766                     $arreach['value'] = $value;
 767 
 768                     $arreach['whole'] = str_replace($orig_value, $value, $arreach['whole']);
 769                 }
 770 
 771                 if ($ok)
 772                     $attr2 .= ' '.$arreach['whole']; # it passed them
 773             } # if !is_array($current)
 774         } # foreach
 775 
 776         # Remove any "<" or ">" characters
 777 
 778         $attr2 = preg_replace('/[<>]/', '', $attr2);
 779 
 780         return "<$element$attr2$xhtml_slash>";
 781     }
 782 
 783     /**
 784      * Performs different checks for attribute values.
 785      *
 786      * The currently implemented checks are "maxlen", "minlen", "maxval", "minval"
 787      * and "valueless" with even more checks to come soon.
 788      *
 789      * @since 1.0.0
 790      *
 791      * @param string $value Attribute value
 792      * @param string $vless Whether the value is valueless or not. Use 'y' or 'n'
 793      * @param string $checkname What $checkvalue is checking for.
 794      * @param mixed $checkvalue What constraint the value should pass
 795      * @return bool Whether check passes (true) or not (false)
 796      */
 797 
 798     static function check_attr_val($value, $vless, $checkname, $checkvalue)
 799     {
 800         $ok = true;
 801 
 802         switch (strtolower($checkname)) {
 803             case 'maxlen' :
 804                 # The maxlen check makes sure that the attribute value has a length not
 805                 # greater than the given value. This can be used to avoid Buffer Overflows
 806                 # in WWW clients and various Internet servers.
 807 
 808                 if (strlen($value) > $checkvalue)
 809                     $ok = false;
 810                 break;
 811 
 812             case 'minlen' :
 813                 # The minlen check makes sure that the attribute value has a length not
 814                 # smaller than the given value.
 815 
 816                 if (strlen($value) < $checkvalue)
 817                     $ok = false;
 818                 break;
 819 
 820             case 'maxval' :
 821                 # The maxval check does two things: it checks that the attribute value is
 822                 # an integer from 0 and up, without an excessive amount of zeroes or
 823                 # whitespace (to avoid Buffer Overflows). It also checks that the attribute
 824                 # value is not greater than the given value.
 825                 # This check can be used to avoid Denial of Service attacks.
 826 
 827                 if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
 828                     $ok = false;
 829                 if ($value > $checkvalue)
 830                     $ok = false;
 831                 break;
 832 
 833             case 'minval' :
 834                 # The minval check checks that the attribute value is a positive integer,
 835                 # and that it is not smaller than the given value.
 836 
 837                 if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
 838                     $ok = false;
 839                 if ($value < $checkvalue)
 840                     $ok = false;
 841                 break;
 842 
 843             case 'valueless' :
 844                 # The valueless check checks if the attribute has a value
 845                 # (like <a href="blah">) or not (<option selected>). If the given value
 846                 # is a "y" or a "Y", the attribute must not have a value.
 847                 # If the given value is an "n" or an "N", the attribute must have one.
 848 
 849                 if (strtolower($checkvalue) != $vless)
 850                     $ok = false;
 851                 break;
 852         } # switch
 853 
 854         return $ok;
 855     }
 856 
 857     /**
 858      * Builds an attribute list from string containing attributes.
 859      *
 860      * This function does a lot of work. It parses an attribute list into an array
 861      * with attribute data, and tries to do the right thing even if it gets weird
 862      * input. It will add quotes around attribute values that don't have any quotes
 863      * or apostrophes around them, to make it easier to produce HTML code that will
 864      * conform to W3C's HTML specification. It will also remove bad URL protocols
 865      * from attribute values.  It also reduces duplicate attributes by using the
 866      * attribute defined first (foo='bar' foo='baz' will result in foo='bar').
 867      *
 868      * @since 1.0.0
 869      *
 870      * @param string $attr Attribute list from HTML element to closing HTML element tag
 871      * @param array $allowed_protocols Allowed protocols to keep
 872      * @return array List of attributes after parsing
 873      */
 874 
 875     static function hair($attr, $allowed_protocols)
 876     {
 877         $attrarr = array ();
 878         $mode = 0;
 879         $attrname = '';
 880         $uris = array('xmlns', 'profile', 'href', 'src', 'cite', 'classid', 'codebase', 'data', 'usemap', 'longdesc', 'action');
 881 
 882         # Loop through the whole attribute list
 883 
 884         while (strlen($attr) != 0) {
 885             $working = 0; # Was the last operation successful?
 886 
 887             switch ($mode) {
 888                 case 0 : # attribute name, href for instance
 889 
 890                     if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) {
 891                         $attrname = $match[1];
 892                         $working = $mode = 1;
 893                         $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
 894                     }
 895 
 896                     break;
 897 
 898                 case 1 : # equals sign or valueless ("selected")
 899 
 900                     if (preg_match('/^\s*=\s*/', $attr)) # equals sign
 901                         {
 902                         $working = 1;
 903                         $mode = 2;
 904                         $attr = preg_replace('/^\s*=\s*/', '', $attr);
 905                         break;
 906                     }
 907 
 908                     if (preg_match('/^\s+/', $attr)) # valueless
 909                         {
 910                         $working = 1;
 911                         $mode = 0;
 912                         if(FALSE === array_key_exists($attrname, $attrarr)) {
 913                             $attrarr[$attrname] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y');
 914                         }
 915                         $attr = preg_replace('/^\s+/', '', $attr);
 916                     }
 917 
 918                     break;
 919 
 920                 case 2 : # attribute value, a URL after href= for instance
 921 
 922                     if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match))
 923                         # "value"
 924                         {
 925                         $thisval = $match[1];
 926                         if ( in_array($attrname, $uris) )
 927                             $thisval = self::bad_protocol($thisval, $allowed_protocols);
 928 
 929                         if(FALSE === array_key_exists($attrname, $attrarr)) {
 930                             $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n');
 931                         }
 932                         $working = 1;
 933                         $mode = 0;
 934                         $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
 935                         break;
 936                     }
 937 
 938                     if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match))
 939                         # 'value'
 940                     {
 941                         $thisval = $match[1];
 942                         if ( in_array($attrname, $uris) )
 943                             $thisval = self::bad_protocol($thisval, $allowed_protocols);
 944 
 945                         if (array_key_exists($attrname, $attrarr) === false)
 946                         {
 947                             $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname='$thisval'", 'vless' => 'n');
 948                         }
 949 
 950                         $working = 1;
 951                         $mode = 0;
 952                         $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
 953                         break;
 954                     }
 955 
 956                     if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match))
 957                         # value
 958                     {
 959                         $thisval = $match[1];
 960                         if ( in_array($attrname, $uris) )
 961                             $thisval = self::bad_protocol($thisval, $allowed_protocols);
 962 
 963                         if(FALSE === array_key_exists($attrname, $attrarr)) {
 964                             $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n');
 965                         }
 966                         # We add quotes to conform to W3C's HTML spec.
 967                         $working = 1;
 968                         $mode = 0;
 969                         $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
 970                     }
 971 
 972                 break;
 973             } # switch
 974 
 975             if ($working == 0) # not well formed, remove and try again
 976             {
 977                 $attr = self::html_error($attr);
 978                 $mode = 0;
 979             }
 980         } # while
 981 
 982         if ($mode == 1 && FALSE === array_key_exists($attrname, $attrarr))
 983             # special case, for when the attribute list ends with a valueless
 984             # attribute like "selected"
 985             $attrarr[$attrname] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y');
 986 
 987         return $attrarr;
 988     }
 989 
 990     /**
 991      * Sanitize string from bad protocols.
 992      *
 993      * This function removes all non-allowed protocols from the beginning of
 994      * $string. It ignores whitespace and the case of the letters, and it does
 995      * understand HTML entities. It does its work in a while loop, so it won't be
 996      * fooled by a string like "javascript:javascript:alert(57)".
 997      *
 998      * @since 1.0.0
 999      *
1000      * @param string $string Content to filter bad protocols from
1001      * @param array $allowed_protocols Allowed protocols to keep
1002      * @return string Filtered content
1003      */
1004 
1005     static function bad_protocol($string, $allowed_protocols)
1006     {
1007         $string = self::no_null($string);
1008         $string2 = $string.'a';
1009 
1010         while ($string != $string2) {
1011             $string2 = $string;
1012             $string = self::bad_protocol_once($string, $allowed_protocols);
1013         } # while
1014 
1015         return $string;
1016     }
1017 
1018     /**
1019      * Sanitizes content from bad protocols and other characters.
1020      *
1021      * This function searches for URL protocols at the beginning of $string, while
1022      * handling whitespace and HTML entities.
1023      *
1024      * @since 1.0.0
1025      *
1026      * @param string $string Content to check for bad protocols
1027      * @param string $allowed_protocols Allowed protocols
1028      * @return string Sanitized content
1029      */
1030 
1031     static protected $_kses_allowed_protocols;
1032 
1033     static protected function bad_protocol_once($string, $allowed_protocols)
1034     {
1035         self::$_kses_allowed_protocols = $allowed_protocols;
1036 
1037         $string2 = preg_split('/:|&#58;|&#x3a;/i', $string, 2);
1038         if ( isset($string2[1]) && !preg_match('%/\?%', $string2[0]) )
1039             $string = self::bad_protocol_once2($string2[0]) . trim($string2[1]);
1040         else
1041             $string = preg_replace_callback('/^((&[^;]*;|[\sA-Za-z0-9])*)'.'(:|&#58;|&#[Xx]3[Aa];)\s*/', array(__CLASS__, 'bad_protocol_once2'), $string);
1042 
1043         return $string;
1044     }
1045 
1046     /**
1047      * Callback for bad_protocol_once() regular expression.
1048      *
1049      * This function processes URL protocols, checks to see if they're in the
1050      * white-list or not, and returns different data depending on the answer.
1051      *
1052      * @access private
1053      * @since 1.0.0
1054      *
1055      * @param mixed $matches string or preg_replace_callback() matches array to check for bad protocols
1056      * @return string Sanitized content
1057      */
1058 
1059     static protected function bad_protocol_once2($matches)
1060     {
1061         if ( is_array($matches) ) {
1062             if ( ! isset($matches[1]) || empty($matches[1]) )
1063                 return '';
1064 
1065             $string = $matches[1];
1066         } else {
1067             $string = $matches;
1068         }
1069 
1070         $string2 = self::decode_entities($string);
1071         $string2 = preg_replace('/\s/', '', $string2);
1072         $string2 = self::no_null($string2);
1073         $string2 = strtolower($string2);
1074 
1075         $allowed = false;
1076 
1077         foreach ( (array) self::$_kses_allowed_protocols as $one_protocol)
1078         {
1079             if (strtolower($one_protocol) == $string2)
1080             {
1081                 $allowed = true;
1082                 break;
1083             }
1084         }
1085 
1086         if ($allowed)
1087             return "$string2:";
1088         else
1089             return '';
1090     }
1091 
1092     /**
1093      * Strips slashes from in front of quotes.
1094      *
1095      * This function changes the character sequence  \"  to just  ". It leaves all
1096      * other slashes alone. It's really weird, but the quoting from
1097      * preg_replace(//e) seems to require this.
1098      *
1099      * @since 1.0.0
1100      *
1101      * @param string $string String to strip slashes
1102      * @return string Fixed strings with quoted slashes
1103      */
1104 
1105     static protected function stripslashes($string)
1106     {
1107         return preg_replace('%\\\\"%', '"', $string);
1108     }
1109 
1110     /**
1111      * Convert all entities to their character counterparts.
1112      *
1113      * This function decodes numeric HTML entities (&#65; and &#x41;). It doesn't do
1114      * anything with other entities like &auml;, but we don't need them in the URL
1115      * protocol whitelisting system anyway.
1116      *
1117      * @since 1.0.0
1118      *
1119      * @param string $string Content to change entities
1120      * @return string Content after decoded entities
1121      */
1122 
1123     static protected function decode_entities($string)
1124     {
1125         $string = preg_replace_callback('/&#([0-9]+);/', array(__CLASS__, 'decode_entities_chr'), $string);
1126         $string = preg_replace_callback('/&#[Xx]([0-9A-Fa-f]+);/', array(__CLASS__, 'decode_entities_chr_hexdec'), $string);
1127 
1128         return $string;
1129     }
1130 
1131     /**
1132      * Regex callback for wp_kses_decode_entities()
1133      *
1134      * @param array $match preg match
1135      * @return string
1136      */
1137 
1138     static protected function decode_entities_chr($match)
1139     {
1140         return chr($match[1]);
1141     }
1142 
1143     /**
1144      * Regex callback for wp_kses_decode_entities()
1145      *
1146      * @param array $match preg match
1147      * @return string
1148      */
1149 
1150     static protected function decode_entities_chr_hexdec($match)
1151     {
1152         return chr(hexdec($match[1]));
1153     }
1154 
1155     static function safecss_filter_attr( $css, $deprecated = '' )
1156     {
1157         $css = self::no_null($css);
1158         $css = str_replace(array("\n","\r","\t"), '', $css);
1159 
1160         if ( preg_match( '%[\\(&]|/\*%', $css ) ) // remove any inline css containing \ ( & or comments
1161             return '';
1162 
1163         $css_array = explode( ';', trim( $css ) );
1164         $allowed_attr = array( 'text-align', 'margin', 'color', 'float',
1165         'border', 'background', 'background-color', 'border-bottom', 'border-bottom-color',
1166         'border-bottom-style', 'border-bottom-width', 'border-collapse', 'border-color', 'border-left',
1167         'border-left-color', 'border-left-style', 'border-left-width', 'border-right', 'border-right-color',
1168         'border-right-style', 'border-right-width', 'border-spacing', 'border-style', 'border-top',
1169         'border-top-color', 'border-top-style', 'border-top-width', 'border-width', 'caption-side',
1170         'clear', 'cursor', 'direction', 'font', 'font-family', 'font-size', 'font-style',
1171         'font-variant', 'font-weight', 'height', 'letter-spacing', 'line-height', 'margin-bottom',
1172         'margin-left', 'margin-right', 'margin-top', 'overflow', 'padding', 'padding-bottom',
1173         'padding-left', 'padding-right', 'padding-top', 'text-decoration', 'text-indent', 'vertical-align',
1174         'width');
1175 
1176         if ( empty($allowed_attr) )
1177             return $css;
1178 
1179         $css = '';
1180         foreach ( $css_array as $css_item ) {
1181             if ( $css_item == '' )
1182                 continue;
1183             $css_item = trim( $css_item );
1184             $found = false;
1185             if ( strpos( $css_item, ':' ) === false ) {
1186                 $found = true;
1187             } else {
1188                 $parts = explode( ':', $css_item );
1189                 if ( in_array( trim( $parts[0] ), $allowed_attr ) )
1190                     $found = true;
1191             }
1192             if ( $found ) {
1193                 if( $css != '' )
1194                     $css .= ';';
1195                 $css .= $css_item;
1196             }
1197         }
1198 
1199         return $css;
1200     }
1201 }
Autodoc API documentation generated by ApiGen 2.8.0