Autodoc
  • Namespace
  • Class
  • Tree

Namespaces

  • BlueTihi
    • Context
  • Brickrouge
    • Element
      • Nodes
    • Renderer
    • Widget
  • ICanBoogie
    • ActiveRecord
    • AutoConfig
    • CLDR
    • Composer
    • Core
    • Event
    • Exception
    • HTTP
      • Dispatcher
      • Request
    • I18n
      • Translator
    • Mailer
    • Modules
      • Taxonomy
        • Support
      • Thumbnailer
        • Versions
    • Object
    • Operation
      • Dispatcher
    • Prototype
    • Routes
    • Routing
      • Dispatcher
    • Session
  • Icybee
    • ActiveRecord
      • Model
    • ConfigOperation
    • Document
    • EditBlock
    • Element
      • ActionbarContextual
      • ActionbarSearch
      • ActionbarToolbar
    • FormBlock
    • Installer
    • ManageBlock
    • Modules
      • Articles
      • Cache
        • Collection
        • ManageBlock
      • Comments
        • ManageBlock
      • Contents
        • ManageBlock
      • Dashboard
      • Editor
        • Collection
      • Files
        • File
        • ManageBlock
      • Forms
        • Form
        • ManageBlock
      • I18n
      • Images
        • ManageBlock
      • Members
      • Modules
        • ManageBlock
      • Nodes
        • ManageBlock
        • Module
      • Pages
        • BreadcrumbElement
        • LanguagesElement
        • ManageBlock
        • NavigationBranchElement
        • NavigationElement
        • Page
        • PageController
      • Registry
      • Search
      • Seo
      • Sites
        • ManageBlock
      • Taxonomy
        • Terms
          • ManageBlock
        • Vocabulary
          • ManageBlock
      • Users
        • ManageBlock
        • NonceLogin
        • Roles
      • Views
        • ActiveRecordProvider
        • Collection
        • View
    • Operation
      • ActiveRecord
      • Constructor
      • Module
      • Widget
    • Rendering
  • None
  • Patron
  • PHP

Classes

  • Compiler
  • ControlNode
  • Engine
  • EvaluateNode
  • ExpressionNode
  • Hook
  • Hooks
  • HTMLParser
  • Node
  • Template
  • TextHole
  • TextNode
  • TranslateNode
  • URLNode

Functions

  • by_columns
  • render
  • tr
  1 <?php
  2 
  3 /*
  4  * This file is part of the Patron package.
  5  *
  6  * (c) Olivier Laviale <olivier.laviale@gmail.com>
  7  *
  8  * For the full copyright and license information, please view the LICENSE
  9  * file that was distributed with this source code.
 10  */
 11 
 12 namespace Patron;
 13 
 14 class HTMLParser
 15 {
 16     const T_ERROR_HANDLER = 'error-handler';
 17 
 18     private $encoding;
 19     private $matches;
 20     private $escaped;
 21     private $opened = array();
 22 
 23     protected $error_handler;
 24     protected $namespace;
 25 
 26     public function __construct(array $tags=array())
 27     {
 28         $tags += array
 29         (
 30             self::T_ERROR_HANDLER => function($str, $args) {
 31 
 32                 trigger_error(\ICanBoogie\format($str, $args));
 33 
 34             }
 35         );
 36 
 37         $this->error_handler = $tags[self::T_ERROR_HANDLER];
 38     }
 39 
 40     public function parse($html, $namespace=null, $encoding='utf-8')
 41     {
 42         $this->encoding = $encoding;
 43         $this->namespace = $namespace;
 44 
 45         #
 46         # we take care of escaping comments and processing options. they will not be parsed
 47         # and will end as text nodes
 48         #
 49 
 50         $html = $this->escapeSpecials($html);
 51 
 52         #
 53         # in order to create a tree, we first need to split the HTML using the markups,
 54         # creating a nice flat array of texts and opening and closing markups.
 55         #
 56         # the array can be read as follows :
 57         #
 58         # i+0 => some text
 59         # i+1 => '/' for closing markups, nothing otherwise
 60         # i+2 => the markup it self, without the '<' '>'
 61         #
 62         # note that i+2 might end with a '/' indicating an auto-closing markup
 63         #
 64 
 65         $this->matches = preg_split
 66         (
 67             '#<(/?)' . $namespace . '([^>]*)>#', $html, -1, PREG_SPLIT_DELIM_CAPTURE
 68         );
 69 
 70         #
 71         # the flat representation is now ready, we can create our tree
 72         #
 73 
 74         $tree = $this->buildTree();
 75 
 76         #
 77         # if comments or processing options where escaped, we can
 78         # safely unescape them now
 79         #
 80 
 81         if ($this->escaped)
 82         {
 83             $tree = $this->unescapeSpecials($tree);
 84         }
 85 
 86         return $tree;
 87     }
 88 
 89     protected function escapeSpecials($html)
 90     {
 91         #
 92         # here we escape comments
 93         #
 94 
 95         $html = preg_replace_callback('#<\!--.+-->#sU', array($this, 'escapeSpecials_callback'), $html);
 96 
 97         #
 98         # and processing options
 99         #
100 
101         $html = preg_replace_callback('#<\?.+\?>#sU', array($this, 'escapeSpecials_callback'), $html);
102 
103         return $html;
104     }
105 
106     protected function escapeSpecials_callback($m)
107     {
108         $this->escaped = true;
109 
110         $text = $m[0];
111 
112         $text = str_replace
113         (
114             array('<', '>'),
115             array("\x01", "\x02"),
116             $text
117         );
118 
119         return $text;
120     }
121 
122     protected function unescapeSpecials($tree)
123     {
124         return is_array($tree) ? array_map(array($this, 'unescapeSpecials'), $tree) : str_replace
125         (
126             array("\x01", "\x02"),
127             array('<', '>'),
128             $tree
129         );
130     }
131 
132     protected function buildTree()
133     {
134         $nodes = array();
135 
136         $i = 0;
137         $text = null;
138 
139         while (($value = array_shift($this->matches)) !== null)
140         {
141             switch ($i++ % 3)
142             {
143                 case 0:
144                 {
145                     #
146                     # if the trimed value is not empty we preserve the value,
147                     # otherwise we discard it.
148                     #
149 
150                     if (trim($value))
151                     {
152                         $nodes[] = $value;
153                     }
154                 }
155                 break;
156 
157                 case 1:
158                 {
159                     $closing = ($value == '/');
160                 }
161                 break;
162 
163                 case 2:
164                 {
165                     if (substr($value, -1, 1) == '/')
166                     {
167                         #
168                         # auto closing
169                         #
170 
171                         $nodes[] = $this->parseMarkup(substr($value, 0, -1));
172                     }
173                     else if ($closing)
174                     {
175                         #
176                         # closing markup
177                         #
178 
179                         $open = array_pop($this->opened);
180 
181                         if ($value != $open)
182                         {
183                             $this->error($value, $open);
184                         }
185 
186                         return $nodes;
187                     }
188                     else
189                     {
190                         #
191                         # this is an open markup with possible children
192                         #
193 
194                         $node = $this->parseMarkup($value);
195 
196                         #
197                         # push the markup name into the opened markups
198                         #
199 
200                         $this->opened[] = $node['name'];
201 
202                         #
203                         # create the node and parse its children
204                         #
205 
206                         $node['children'] = $this->buildTree($this->matches);
207 
208                         $nodes[] = $node;
209                     }
210                 }
211             }
212         }
213 
214         return $nodes;
215     }
216 
217     protected function parseMarkup($markup)
218     {
219         #
220         # get markup's name
221         #
222 
223         preg_match('#^[^\s]+#', $markup, $matches);
224 
225         $name = $matches[0];
226 
227         #
228         # get markup's arguments
229         #
230 
231         preg_match_all('#\s+([^=]+)\s*=\s*"([^"]+)"#', $markup, $matches, PREG_SET_ORDER);
232 
233         #
234         # transform the matches into a nice key/value array
235         #
236 
237         $args = array();
238 
239         foreach ($matches as $m)
240         {
241             #
242             # we unescape the html entities of the argument's value
243             #
244 
245             $args[$m[1]] = html_entity_decode($m[2], ENT_QUOTES, $this->encoding);
246         }
247 
248         return array('name' => $name, 'args' => $args);
249     }
250 
251     protected function error($markup, $expected)
252     {
253         $this->malformed = true;
254 
255         call_user_func
256         (
257             $this->error_handler, $expected
258             ? 'unexpected closing markup %markup, should be %expected'
259             : 'unexpected closing markup %markup, when none was opened', array
260             (
261                 '%markup' => $this->namespace . $markup, '%expected' => $expected
262             )
263         );
264     }
265 
266     static public function collectMarkup($nodes, $markup)
267     {
268         $collected = array();
269 
270         foreach ($nodes as $node)
271         {
272             if (!is_array($node))
273             {
274                 continue;
275             }
276 
277             if ($node['name'] == $markup)
278             {
279                 $collected[] = $node;
280             }
281 
282             if (isset($node['children']))
283             {
284                 $collected = array_merge($collected, self::collectMarkup($node['children'], $markup));
285             }
286         }
287 
288         return $collected;
289     }
290 }
Autodoc API documentation generated by ApiGen 2.8.0