00001 <?php 00002 # Copyright (C) 2009 Aryeh Gregor 00003 # http://www.mediawiki.org/ 00004 # 00005 # This program is free software; you can redistribute it and/or modify 00006 # it under the terms of the GNU General Public License as published by 00007 # the Free Software Foundation; either version 2 of the License, or 00008 # (at your option) any later version. 00009 # 00010 # This program is distributed in the hope that it will be useful, 00011 # but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 # GNU General Public License for more details. 00014 # 00015 # You should have received a copy of the GNU General Public License along 00016 # with this program; if not, write to the Free Software Foundation, Inc., 00017 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 00018 # http://www.gnu.org/copyleft/gpl.html 00019 00042 class Html { 00043 # List of void elements from HTML5, section 9.1.2 as of 2009-08-10 00044 private static $voidElements = array( 00045 'area', 00046 'base', 00047 'br', 00048 'col', 00049 'command', 00050 'embed', 00051 'hr', 00052 'img', 00053 'input', 00054 'keygen', 00055 'link', 00056 'meta', 00057 'param', 00058 'source', 00059 ); 00060 00061 # Boolean attributes, which may have the value omitted entirely. Manually 00062 # collected from the HTML5 spec as of 2009-08-10. 00063 private static $boolAttribs = array( 00064 'async', 00065 'autobuffer', 00066 'autofocus', 00067 'autoplay', 00068 'checked', 00069 'controls', 00070 'defer', 00071 'disabled', 00072 'formnovalidate', 00073 'hidden', 00074 'ismap', 00075 'loop', 00076 'multiple', 00077 'novalidate', 00078 'open', 00079 'readonly', 00080 'required', 00081 'reversed', 00082 'scoped', 00083 'seamless', 00084 ); 00085 00108 public static function rawElement( $element, $attribs = array(), $contents = '' ) { 00109 global $wgWellFormedXml; 00110 $start = self::openElement( $element, $attribs ); 00111 if ( in_array( $element, self::$voidElements ) ) { 00112 if ( $wgWellFormedXml ) { 00113 # Silly XML. 00114 return substr( $start, 0, -1 ) . ' />'; 00115 } 00116 return $start; 00117 } else { 00118 return "$start$contents</$element>"; 00119 } 00120 } 00121 00126 public static function element( $element, $attribs = array(), $contents = '' ) { 00127 return self::rawElement( $element, $attribs, strtr( $contents, array( 00128 # There's no point in escaping quotes, >, etc. in the contents of 00129 # elements. 00130 '&' => '&', 00131 '<' => '<' 00132 ) ) ); 00133 } 00134 00139 public static function openElement( $element, $attribs = array() ) { 00140 global $wgHtml5; 00141 $attribs = (array)$attribs; 00142 # This is not required in HTML5, but let's do it anyway, for 00143 # consistency and better compression. 00144 $element = strtolower( $element ); 00145 00146 # Remove HTML5-only attributes if we aren't doing HTML5, and disable 00147 # form validation regardless (see bug 23769 and the more detailed 00148 # comment in expandAttributes()) 00149 if ( $element == 'input' ) { 00150 # Whitelist of types that don't cause validation. All except 00151 # 'search' are valid in XHTML1. 00152 $validTypes = array( 00153 'hidden', 00154 'text', 00155 'password', 00156 'checkbox', 00157 'radio', 00158 'file', 00159 'submit', 00160 'image', 00161 'reset', 00162 'button', 00163 'search', 00164 ); 00165 if ( isset( $attribs['type'] ) 00166 && !in_array( $attribs['type'], $validTypes ) ) { 00167 unset( $attribs['type'] ); 00168 } 00169 if ( isset( $attribs['type'] ) && $attribs['type'] == 'search' 00170 && !$wgHtml5 ) { 00171 unset( $attribs['type'] ); 00172 } 00173 # Here we're blacklisting some HTML5-only attributes... 00174 $html5attribs = array( 00175 'autocomplete', 00176 'autofocus', 00177 'max', 00178 'min', 00179 'multiple', 00180 'pattern', 00181 'placeholder', 00182 'required', 00183 'step', 00184 'spellcheck', 00185 ); 00186 foreach ( $html5attribs as $badAttr ) { 00187 unset( $attribs[$badAttr] ); 00188 } 00189 } 00190 if ( !$wgHtml5 && $element == 'textarea' && isset( $attribs['maxlength'] ) ) { 00191 unset( $attribs['maxlength'] ); 00192 } 00193 00194 return "<$element" . self::expandAttributes( 00195 self::dropDefaults( $element, $attribs ) ) . '>'; 00196 } 00197 00215 private static function dropDefaults( $element, $attribs ) { 00216 # Don't bother doing anything if we aren't outputting HTML5; it's too 00217 # much of a pain to maintain two sets of defaults. 00218 global $wgHtml5; 00219 if ( !$wgHtml5 ) { 00220 return $attribs; 00221 } 00222 00223 static $attribDefaults = array( 00224 'area' => array( 'shape' => 'rect' ), 00225 'button' => array( 00226 'formaction' => 'GET', 00227 'formenctype' => 'application/x-www-form-urlencoded', 00228 'type' => 'submit', 00229 ), 00230 'canvas' => array( 00231 'height' => '150', 00232 'width' => '300', 00233 ), 00234 'command' => array( 'type' => 'command' ), 00235 'form' => array( 00236 'action' => 'GET', 00237 'autocomplete' => 'on', 00238 'enctype' => 'application/x-www-form-urlencoded', 00239 ), 00240 'input' => array( 00241 'formaction' => 'GET', 00242 'type' => 'text', 00243 'value' => '', 00244 ), 00245 'keygen' => array( 'keytype' => 'rsa' ), 00246 'link' => array( 'media' => 'all' ), 00247 'menu' => array( 'type' => 'list' ), 00248 # Note: the use of text/javascript here instead of other JavaScript 00249 # MIME types follows the HTML5 spec. 00250 'script' => array( 'type' => 'text/javascript' ), 00251 'style' => array( 00252 'media' => 'all', 00253 'type' => 'text/css', 00254 ), 00255 'textarea' => array( 'wrap' => 'soft' ), 00256 ); 00257 00258 $element = strtolower( $element ); 00259 00260 foreach ( $attribs as $attrib => $value ) { 00261 $lcattrib = strtolower( $attrib ); 00262 $value = strval( $value ); 00263 00264 # Simple checks using $attribDefaults 00265 if ( isset( $attribDefaults[$element][$lcattrib] ) && 00266 $attribDefaults[$element][$lcattrib] == $value ) { 00267 unset( $attribs[$attrib] ); 00268 } 00269 00270 if ( $lcattrib == 'class' && $value == '' ) { 00271 unset( $attribs[$attrib] ); 00272 } 00273 } 00274 00275 # More subtle checks 00276 if ( $element === 'link' && isset( $attribs['type'] ) 00277 && strval( $attribs['type'] ) == 'text/css' ) { 00278 unset( $attribs['type'] ); 00279 } 00280 if ( $element === 'select' && isset( $attribs['size'] ) ) { 00281 if ( in_array( 'multiple', $attribs ) 00282 || ( isset( $attribs['multiple'] ) && $attribs['multiple'] !== false ) 00283 ) { 00284 # A multi-select 00285 if ( strval( $attribs['size'] ) == '4' ) { 00286 unset( $attribs['size'] ); 00287 } 00288 } else { 00289 # Single select 00290 if ( strval( $attribs['size'] ) == '1' ) { 00291 unset( $attribs['size'] ); 00292 } 00293 } 00294 } 00295 00296 return $attribs; 00297 } 00298 00316 public static function expandAttributes( $attribs ) { 00317 global $wgHtml5, $wgWellFormedXml; 00318 00319 $ret = ''; 00320 $attribs = (array)$attribs; 00321 foreach ( $attribs as $key => $value ) { 00322 if ( $value === false ) { 00323 continue; 00324 } 00325 00326 # For boolean attributes, support array( 'foo' ) instead of 00327 # requiring array( 'foo' => 'meaningless' ). 00328 if ( is_int( $key ) 00329 && in_array( strtolower( $value ), self::$boolAttribs ) ) { 00330 $key = $value; 00331 } 00332 00333 # Not technically required in HTML5, but required in XHTML 1.0, 00334 # and we'd like consistency and better compression anyway. 00335 $key = strtolower( $key ); 00336 00337 # Bug 23769: Blacklist all form validation attributes for now. Current 00338 # (June 2010) WebKit has no UI, so the form just refuses to submit 00339 # without telling the user why, which is much worse than failing 00340 # server-side validation. Opera is the only other implementation at 00341 # this time, and has ugly UI, so just kill the feature entirely until 00342 # we have at least one good implementation. 00343 if ( in_array( $key, array( 'max', 'min', 'pattern', 'required', 'step' ) ) ) { 00344 continue; 00345 } 00346 00347 # See the "Attributes" section in the HTML syntax part of HTML5, 00348 # 9.1.2.3 as of 2009-08-10. Most attributes can have quotation 00349 # marks omitted, but not all. (Although a literal " is not 00350 # permitted, we don't check for that, since it will be escaped 00351 # anyway.) 00352 # 00353 # See also research done on further characters that need to be 00354 # escaped: http://code.google.com/p/html5lib/issues/detail?id=93 00355 $badChars = "\\x00- '=<>`/\x{00a0}\x{1680}\x{180e}\x{180F}\x{2000}\x{2001}" 00356 . "\x{2002}\x{2003}\x{2004}\x{2005}\x{2006}\x{2007}\x{2008}\x{2009}" 00357 . "\x{200A}\x{2028}\x{2029}\x{202F}\x{205F}\x{3000}"; 00358 if ( $wgWellFormedXml || $value === '' 00359 || preg_match( "![$badChars]!u", $value ) ) { 00360 $quote = '"'; 00361 } else { 00362 $quote = ''; 00363 } 00364 00365 if ( in_array( $key, self::$boolAttribs ) ) { 00366 # In XHTML 1.0 Transitional, the value needs to be equal to the 00367 # key. In HTML5, we can leave the value empty instead. If we 00368 # don't need well-formed XML, we can omit the = entirely. 00369 if ( !$wgWellFormedXml ) { 00370 $ret .= " $key"; 00371 } elseif ( $wgHtml5 ) { 00372 $ret .= " $key=\"\""; 00373 } else { 00374 $ret .= " $key=\"$key\""; 00375 } 00376 } else { 00377 # Apparently we need to entity-encode \n, \r, \t, although the 00378 # spec doesn't mention that. Since we're doing strtr() anyway, 00379 # and we don't need <> escaped here, we may as well not call 00380 # htmlspecialchars(). FIXME: verify that we actually need to 00381 # escape \n\r\t here, and explain why, exactly. 00382 # 00383 # We could call Sanitizer::encodeAttribute() for this, but we 00384 # don't because we're stubborn and like our marginal savings on 00385 # byte size from not having to encode unnecessary quotes. 00386 $map = array( 00387 '&' => '&', 00388 '"' => '"', 00389 "\n" => ' ', 00390 "\r" => ' ', 00391 "\t" => '	' 00392 ); 00393 if ( $wgWellFormedXml ) { 00394 # This is allowed per spec: <http://www.w3.org/TR/xml/#NT-AttValue> 00395 # But reportedly it breaks some XML tools? FIXME: is this 00396 # really true? 00397 $map['<'] = '<'; 00398 } 00399 $ret .= " $key=$quote" . strtr( $value, $map ) . $quote; 00400 } 00401 } 00402 return $ret; 00403 } 00404 00413 public static function inlineScript( $contents ) { 00414 global $wgHtml5, $wgJsMimeType, $wgWellFormedXml; 00415 00416 $attrs = array(); 00417 if ( !$wgHtml5 ) { 00418 $attrs['type'] = $wgJsMimeType; 00419 } 00420 if ( $wgWellFormedXml && preg_match( '/[<&]/', $contents ) ) { 00421 $contents = "/*<![CDATA[*/$contents/*]]>*/"; 00422 } 00423 return self::rawElement( 'script', $attrs, $contents ); 00424 } 00425 00433 public static function linkedScript( $url ) { 00434 global $wgHtml5, $wgJsMimeType; 00435 00436 $attrs = array( 'src' => $url ); 00437 if ( !$wgHtml5 ) { 00438 $attrs['type'] = $wgJsMimeType; 00439 } 00440 return self::element( 'script', $attrs ); 00441 } 00442 00452 public static function inlineStyle( $contents, $media = 'all' ) { 00453 global $wgWellFormedXml; 00454 00455 if ( $wgWellFormedXml && preg_match( '/[<&]/', $contents ) ) { 00456 $contents = "/*<![CDATA[*/$contents/*]]>*/"; 00457 } 00458 return self::rawElement( 'style', array( 00459 'type' => 'text/css', 00460 'media' => $media, 00461 ), $contents ); 00462 } 00463 00472 public static function linkedStyle( $url, $media = 'all' ) { 00473 return self::element( 'link', array( 00474 'rel' => 'stylesheet', 00475 'href' => $url, 00476 'type' => 'text/css', 00477 'media' => $media, 00478 ) ); 00479 } 00480 00493 public static function input( $name, $value = '', $type = 'text', $attribs = array() ) { 00494 $attribs['type'] = $type; 00495 $attribs['value'] = $value; 00496 $attribs['name'] = $name; 00497 00498 return self::element( 'input', $attribs ); 00499 } 00500 00511 public static function hidden( $name, $value, $attribs = array() ) { 00512 return self::input( $name, $value, 'hidden', $attribs ); 00513 } 00514 00528 public static function textarea( $name, $value = '', $attribs = array() ) { 00529 global $wgHtml5; 00530 $attribs['name'] = $name; 00531 if ( !$wgHtml5 ) { 00532 if ( !isset( $attribs['cols'] ) ) 00533 $attribs['cols'] = ""; 00534 if ( !isset( $attribs['rows'] ) ) 00535 $attribs['rows'] = ""; 00536 } 00537 return self::element( 'textarea', $attribs, $value ); 00538 } 00539 }