1 <?php
2
3 /*
4 * This file is part of the ICanBoogie package.
5 *
6 * (c) Olivier Laviale <olivier.laviale@gmail.com>
7 *
8 * For the full copyright and license information, please view the LICENSE
9 * file that was distributed with this source code.
10 */
11
12 namespace ICanBoogie\HTTP;
13
14 use ICanBoogie\PropertyNotDefined;
15
16 /**
17 * Representation of a header parameter.
18 *
19 * @property-read string $attribute The attribute of the parameter.
20 * @property-read string $charset The charset of the parameter's value.
21 *
22 * @see http://tools.ietf.org/html/rfc2231
23 * @see http://tools.ietf.org/html/rfc5987
24 * @see http://greenbytes.de/tech/tc2231/#attwithfn2231utf8
25 */
26 class HeaderParameter
27 {
28 /**
29 * Token of the parameter.
30 *
31 * @var string
32 */
33 protected $attribute;
34
35 /**
36 * Value of the parameter.
37 *
38 * @var string
39 */
40 public $value;
41
42 /**
43 * Language of the value.
44 *
45 * @var string
46 */
47 public $language;
48
49 /**
50 * Creates a {@link HeaderParameter} instance from the provided source.
51 *
52 * @param mixed $source
53 *
54 * @return \ICanBoogie\HTTP\HeaderParameter
55 */
56 static public function from($source)
57 {
58 if ($source instanceof self)
59 {
60 return $source;
61 }
62
63 $equal_pos = strpos($source, '=');
64 $language = null;
65
66 if ($source[$equal_pos - 1] === '*')
67 {
68 $attribute = substr($source, 0, $equal_pos - 1);
69 $value = substr($source, $equal_pos + 1);
70
71 preg_match('#^([a-zA-Z0-9\-]+)?(\'([a-z\-]+)?\')?(")?([^"]+)(")?$#', $value, $matches);
72
73 if ($matches[3])
74 {
75 $language = $matches[3];
76 }
77
78 $value = urldecode($matches[5]);
79
80 if ($matches[1] === 'iso-8859-1')
81 {
82 $value = utf8_encode($value);
83 }
84 }
85 else
86 {
87 $attribute = substr($source, 0, $equal_pos);
88 $value = substr($source, $equal_pos + 1);
89
90 if ($value[0] === '"')
91 {
92 $value = substr($value, 1, -1);
93 }
94 }
95
96 $value = mb_convert_encoding($value, 'UTF-8');
97
98 return new static($attribute, $value, $language);
99 }
100
101 /**
102 * Checks if the provided string is a token.
103 *
104 * <pre>
105 * token = 1*<any CHAR except CTLs or separators>
106 * separators = "(" | ")" | "<" | ">" | "@"
107 * | "," | ";" | ":" | "\" | <">
108 * | "/" | "[" | "]" | "?" | "="
109 * | "{" | "}" | SP | HT
110 * CHAR = <any US-ASCII character (octets 0 - 127)>
111 * CTL = <any US-ASCII control character (octets 0 - 31) and DEL (127)>
112 * SP = <US-ASCII SP, space (32)>
113 * HT = <US-ASCII HT, horizontal-tab (9)>
114 *</pre>
115 *
116 * @param string $str
117 *
118 * @return boolean `true` if the provided string is a token, `false` otherwise.
119 */
120 static public function is_token($str)
121 {
122 // \x21 = CHAR except 0 - 31 (\x1f) and SP (\x20)
123 // \x7e = CHAR except DEL
124
125 return !preg_match('#[^\x21-\x7e]#', $str, $matches) && !preg_match('#[\(\)\<\>\@\,\;\:\\\\"\/\[\]\?\=\{\}\x9]#', $str);
126 }
127
128 /**
129 * Converts a string to the ASCI charset.
130 *
131 * Accents are converted using {@link \ICanBoogie\remove_accents()}. Characters that are not
132 * in the ASCII range are discarted.
133 *
134 * @param string $str The string to convert.
135 *
136 * @return string
137 */
138 static public function to_ascii($str)
139 {
140 $str = \ICanBoogie\remove_accents($str);
141 $str = preg_replace('/[^\x20-\x7F]+/', '', $str);
142
143 return $str;
144 }
145
146 /**
147 * Initializes the {@link $attribute}, {@link $value} and {@link $language} properties.
148 *
149 * @param string $attribute
150 * @param string $value
151 * @param string|null $language
152 */
153 public function __construct($attribute, $value=null, $language=null)
154 {
155 $this->attribute = $attribute;
156 $this->value = $value;
157 $this->language = $language;
158 }
159
160 /**
161 * Handles the {@link $attribute} and {@link $charset} magic properties.
162 *
163 * @param string $property
164 *
165 * @throws PropertyNotDefined in attempt to get an undefined property.
166 *
167 * @return mixed
168 */
169 public function __get($property)
170 {
171 switch ($property)
172 {
173 case 'attribute': return $this->attribute;
174 case 'charset': return mb_detect_encoding($this->value) ?: 'ISO-8859-1';
175 }
176
177 throw new PropertyNotDefined(array($property, $this));
178 }
179
180 /**
181 * Renders the attribute and value into a string.
182 *
183 * <pre>
184 * A string of text is parsed as a single word if it is quoted using
185 * double-quote marks.
186 *
187 * quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
188 * qdtext = <any TEXT except <">>
189 *
190 * The backslash character ("\") MAY be used as a single-character
191 * quoting mechanism only within quoted-string and comment constructs.
192 *
193 * quoted-pair = "\" CHAR
194 * </pre>
195 *
196 * @return string
197 */
198 public function render()
199 {
200 $value = $this->value;
201
202 if (!$value)
203 {
204 return;
205 }
206
207 $attribute = $this->attribute;
208
209 #
210 # token
211 #
212
213 if (self::is_token($value))
214 {
215 return "{$attribute}={$value}";
216 }
217
218 #
219 # quoted string
220 #
221
222 $encoding = mb_detect_encoding($value);
223
224 if (($encoding === 'ASCII' || $encoding === 'ISO-8859-1') && strpos($value, '"') === false)
225 {
226 return "{$attribute}=\"{$value}\"";
227 }
228
229 #
230 # escaped, with fallback
231 #
232 # @see http://greenbytes.de/tech/tc2231/#encoding-2231-fb
233 #
234
235 if ($encoding !== 'UTF-8')
236 {
237 $value = mb_convert_encoding($value, 'UTF-8', $encoding);
238 $encoding = mb_detect_encoding($value);
239 }
240
241 $normalized_value = self::to_ascii($value);
242 $normalized_value = str_replace(array('"', ';'), '', $normalized_value);
243
244 return "{$attribute}=\"{$normalized_value}\"; {$attribute}*=" . $encoding . "'{$this->language}'" . rawurlencode($value);
245 }
246
247 /**
248 * Returns the value of the parameter.
249 *
250 * Note: {@link render()} to render the attribute and value of the parameter.
251 *
252 * @return string
253 */
254 public function __toString()
255 {
256 return (string) $this->value;
257 }
258 }