00001 <?php
00002 # Copyright (C) 2009 Aryeh Gregor
00003 # http://www.mediawiki.org/
00004 #
00005 # This program is free software; you can redistribute it and/or modify
00006 # it under the terms of the GNU General Public License as published by
00007 # the Free Software Foundation; either version 2 of the License, or
00008 # (at your option) any later version.
00009 #
00010 # This program is distributed in the hope that it will be useful,
00011 # but WITHOUT ANY WARRANTY; without even the implied warranty of
00012 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00013 # GNU General Public License for more details.
00014 #
00015 # You should have received a copy of the GNU General Public License along
00016 # with this program; if not, write to the Free Software Foundation, Inc.,
00017 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00018 # http://www.gnu.org/copyleft/gpl.html
00019
00042 class Html {
00043 # List of void elements from HTML5, section 9.1.2 as of 2009-08-10
00044 private static $voidElements = array(
00045 'area',
00046 'base',
00047 'br',
00048 'col',
00049 'command',
00050 'embed',
00051 'hr',
00052 'img',
00053 'input',
00054 'keygen',
00055 'link',
00056 'meta',
00057 'param',
00058 'source',
00059 );
00060
00061 # Boolean attributes, which may have the value omitted entirely. Manually
00062 # collected from the HTML5 spec as of 2009-08-10.
00063 private static $boolAttribs = array(
00064 'async',
00065 'autobuffer',
00066 'autofocus',
00067 'autoplay',
00068 'checked',
00069 'controls',
00070 'defer',
00071 'disabled',
00072 'formnovalidate',
00073 'hidden',
00074 'ismap',
00075 'loop',
00076 'multiple',
00077 'novalidate',
00078 'open',
00079 'readonly',
00080 'required',
00081 'reversed',
00082 'scoped',
00083 'seamless',
00084 );
00085
00108 public static function rawElement( $element, $attribs = array(), $contents = '' ) {
00109 global $wgWellFormedXml;
00110 $start = self::openElement( $element, $attribs );
00111 if ( in_array( $element, self::$voidElements ) ) {
00112 if ( $wgWellFormedXml ) {
00113 # Silly XML.
00114 return substr( $start, 0, -1 ) . ' />';
00115 }
00116 return $start;
00117 } else {
00118 return "$start$contents</$element>";
00119 }
00120 }
00121
00126 public static function element( $element, $attribs = array(), $contents = '' ) {
00127 return self::rawElement( $element, $attribs, strtr( $contents, array(
00128 # There's no point in escaping quotes, >, etc. in the contents of
00129 # elements.
00130 '&' => '&',
00131 '<' => '<'
00132 ) ) );
00133 }
00134
00139 public static function openElement( $element, $attribs = array() ) {
00140 global $wgHtml5;
00141 $attribs = (array)$attribs;
00142 # This is not required in HTML5, but let's do it anyway, for
00143 # consistency and better compression.
00144 $element = strtolower( $element );
00145
00146 # Remove HTML5-only attributes if we aren't doing HTML5, and disable
00147 # form validation regardless (see bug 23769 and the more detailed
00148 # comment in expandAttributes())
00149 if ( $element == 'input' ) {
00150 # Whitelist of types that don't cause validation. All except
00151 # 'search' are valid in XHTML1.
00152 $validTypes = array(
00153 'hidden',
00154 'text',
00155 'password',
00156 'checkbox',
00157 'radio',
00158 'file',
00159 'submit',
00160 'image',
00161 'reset',
00162 'button',
00163 'search',
00164 );
00165 if ( isset( $attribs['type'] )
00166 && !in_array( $attribs['type'], $validTypes ) ) {
00167 unset( $attribs['type'] );
00168 }
00169 if ( isset( $attribs['type'] ) && $attribs['type'] == 'search'
00170 && !$wgHtml5 ) {
00171 unset( $attribs['type'] );
00172 }
00173 # Here we're blacklisting some HTML5-only attributes...
00174 $html5attribs = array(
00175 'autocomplete',
00176 'autofocus',
00177 'max',
00178 'min',
00179 'multiple',
00180 'pattern',
00181 'placeholder',
00182 'required',
00183 'step',
00184 'spellcheck',
00185 );
00186 foreach ( $html5attribs as $badAttr ) {
00187 unset( $attribs[$badAttr] );
00188 }
00189 }
00190 if ( !$wgHtml5 && $element == 'textarea' && isset( $attribs['maxlength'] ) ) {
00191 unset( $attribs['maxlength'] );
00192 }
00193
00194 return "<$element" . self::expandAttributes(
00195 self::dropDefaults( $element, $attribs ) ) . '>';
00196 }
00197
00215 private static function dropDefaults( $element, $attribs ) {
00216 # Don't bother doing anything if we aren't outputting HTML5; it's too
00217 # much of a pain to maintain two sets of defaults.
00218 global $wgHtml5;
00219 if ( !$wgHtml5 ) {
00220 return $attribs;
00221 }
00222
00223 static $attribDefaults = array(
00224 'area' => array( 'shape' => 'rect' ),
00225 'button' => array(
00226 'formaction' => 'GET',
00227 'formenctype' => 'application/x-www-form-urlencoded',
00228 'type' => 'submit',
00229 ),
00230 'canvas' => array(
00231 'height' => '150',
00232 'width' => '300',
00233 ),
00234 'command' => array( 'type' => 'command' ),
00235 'form' => array(
00236 'action' => 'GET',
00237 'autocomplete' => 'on',
00238 'enctype' => 'application/x-www-form-urlencoded',
00239 ),
00240 'input' => array(
00241 'formaction' => 'GET',
00242 'type' => 'text',
00243 'value' => '',
00244 ),
00245 'keygen' => array( 'keytype' => 'rsa' ),
00246 'link' => array( 'media' => 'all' ),
00247 'menu' => array( 'type' => 'list' ),
00248 # Note: the use of text/javascript here instead of other JavaScript
00249 # MIME types follows the HTML5 spec.
00250 'script' => array( 'type' => 'text/javascript' ),
00251 'style' => array(
00252 'media' => 'all',
00253 'type' => 'text/css',
00254 ),
00255 'textarea' => array( 'wrap' => 'soft' ),
00256 );
00257
00258 $element = strtolower( $element );
00259
00260 foreach ( $attribs as $attrib => $value ) {
00261 $lcattrib = strtolower( $attrib );
00262 $value = strval( $value );
00263
00264 # Simple checks using $attribDefaults
00265 if ( isset( $attribDefaults[$element][$lcattrib] ) &&
00266 $attribDefaults[$element][$lcattrib] == $value ) {
00267 unset( $attribs[$attrib] );
00268 }
00269
00270 if ( $lcattrib == 'class' && $value == '' ) {
00271 unset( $attribs[$attrib] );
00272 }
00273 }
00274
00275 # More subtle checks
00276 if ( $element === 'link' && isset( $attribs['type'] )
00277 && strval( $attribs['type'] ) == 'text/css' ) {
00278 unset( $attribs['type'] );
00279 }
00280 if ( $element === 'select' && isset( $attribs['size'] ) ) {
00281 if ( in_array( 'multiple', $attribs )
00282 || ( isset( $attribs['multiple'] ) && $attribs['multiple'] !== false )
00283 ) {
00284 # A multi-select
00285 if ( strval( $attribs['size'] ) == '4' ) {
00286 unset( $attribs['size'] );
00287 }
00288 } else {
00289 # Single select
00290 if ( strval( $attribs['size'] ) == '1' ) {
00291 unset( $attribs['size'] );
00292 }
00293 }
00294 }
00295
00296 return $attribs;
00297 }
00298
00316 public static function expandAttributes( $attribs ) {
00317 global $wgHtml5, $wgWellFormedXml;
00318
00319 $ret = '';
00320 $attribs = (array)$attribs;
00321 foreach ( $attribs as $key => $value ) {
00322 if ( $value === false ) {
00323 continue;
00324 }
00325
00326 # For boolean attributes, support array( 'foo' ) instead of
00327 # requiring array( 'foo' => 'meaningless' ).
00328 if ( is_int( $key )
00329 && in_array( strtolower( $value ), self::$boolAttribs ) ) {
00330 $key = $value;
00331 }
00332
00333 # Not technically required in HTML5, but required in XHTML 1.0,
00334 # and we'd like consistency and better compression anyway.
00335 $key = strtolower( $key );
00336
00337 # Bug 23769: Blacklist all form validation attributes for now. Current
00338 # (June 2010) WebKit has no UI, so the form just refuses to submit
00339 # without telling the user why, which is much worse than failing
00340 # server-side validation. Opera is the only other implementation at
00341 # this time, and has ugly UI, so just kill the feature entirely until
00342 # we have at least one good implementation.
00343 if ( in_array( $key, array( 'max', 'min', 'pattern', 'required', 'step' ) ) ) {
00344 continue;
00345 }
00346
00347 # See the "Attributes" section in the HTML syntax part of HTML5,
00348 # 9.1.2.3 as of 2009-08-10. Most attributes can have quotation
00349 # marks omitted, but not all. (Although a literal " is not
00350 # permitted, we don't check for that, since it will be escaped
00351 # anyway.)
00352 #
00353 # See also research done on further characters that need to be
00354 # escaped: http://code.google.com/p/html5lib/issues/detail?id=93
00355 $badChars = "\\x00- '=<>`/\x{00a0}\x{1680}\x{180e}\x{180F}\x{2000}\x{2001}"
00356 . "\x{2002}\x{2003}\x{2004}\x{2005}\x{2006}\x{2007}\x{2008}\x{2009}"
00357 . "\x{200A}\x{2028}\x{2029}\x{202F}\x{205F}\x{3000}";
00358 if ( $wgWellFormedXml || $value === ''
00359 || preg_match( "![$badChars]!u", $value ) ) {
00360 $quote = '"';
00361 } else {
00362 $quote = '';
00363 }
00364
00365 if ( in_array( $key, self::$boolAttribs ) ) {
00366 # In XHTML 1.0 Transitional, the value needs to be equal to the
00367 # key. In HTML5, we can leave the value empty instead. If we
00368 # don't need well-formed XML, we can omit the = entirely.
00369 if ( !$wgWellFormedXml ) {
00370 $ret .= " $key";
00371 } elseif ( $wgHtml5 ) {
00372 $ret .= " $key=\"\"";
00373 } else {
00374 $ret .= " $key=\"$key\"";
00375 }
00376 } else {
00377 # Apparently we need to entity-encode \n, \r, \t, although the
00378 # spec doesn't mention that. Since we're doing strtr() anyway,
00379 # and we don't need <> escaped here, we may as well not call
00380 # htmlspecialchars(). FIXME: verify that we actually need to
00381 # escape \n\r\t here, and explain why, exactly.
00382 #
00383 # We could call Sanitizer::encodeAttribute() for this, but we
00384 # don't because we're stubborn and like our marginal savings on
00385 # byte size from not having to encode unnecessary quotes.
00386 $map = array(
00387 '&' => '&',
00388 '"' => '"',
00389 "\n" => ' ',
00390 "\r" => ' ',
00391 "\t" => '	'
00392 );
00393 if ( $wgWellFormedXml ) {
00394 # This is allowed per spec: <http://www.w3.org/TR/xml/#NT-AttValue>
00395 # But reportedly it breaks some XML tools? FIXME: is this
00396 # really true?
00397 $map['<'] = '<';
00398 }
00399 $ret .= " $key=$quote" . strtr( $value, $map ) . $quote;
00400 }
00401 }
00402 return $ret;
00403 }
00404
00413 public static function inlineScript( $contents ) {
00414 global $wgHtml5, $wgJsMimeType, $wgWellFormedXml;
00415
00416 $attrs = array();
00417 if ( !$wgHtml5 ) {
00418 $attrs['type'] = $wgJsMimeType;
00419 }
00420 if ( $wgWellFormedXml && preg_match( '/[<&]/', $contents ) ) {
00421 $contents = "/*<![CDATA[*/$contents/*]]>*/";
00422 }
00423 return self::rawElement( 'script', $attrs, $contents );
00424 }
00425
00433 public static function linkedScript( $url ) {
00434 global $wgHtml5, $wgJsMimeType;
00435
00436 $attrs = array( 'src' => $url );
00437 if ( !$wgHtml5 ) {
00438 $attrs['type'] = $wgJsMimeType;
00439 }
00440 return self::element( 'script', $attrs );
00441 }
00442
00452 public static function inlineStyle( $contents, $media = 'all' ) {
00453 global $wgWellFormedXml;
00454
00455 if ( $wgWellFormedXml && preg_match( '/[<&]/', $contents ) ) {
00456 $contents = "/*<![CDATA[*/$contents/*]]>*/";
00457 }
00458 return self::rawElement( 'style', array(
00459 'type' => 'text/css',
00460 'media' => $media,
00461 ), $contents );
00462 }
00463
00472 public static function linkedStyle( $url, $media = 'all' ) {
00473 return self::element( 'link', array(
00474 'rel' => 'stylesheet',
00475 'href' => $url,
00476 'type' => 'text/css',
00477 'media' => $media,
00478 ) );
00479 }
00480
00493 public static function input( $name, $value = '', $type = 'text', $attribs = array() ) {
00494 $attribs['type'] = $type;
00495 $attribs['value'] = $value;
00496 $attribs['name'] = $name;
00497
00498 return self::element( 'input', $attribs );
00499 }
00500
00511 public static function hidden( $name, $value, $attribs = array() ) {
00512 return self::input( $name, $value, 'hidden', $attribs );
00513 }
00514
00528 public static function textarea( $name, $value = '', $attribs = array() ) {
00529 global $wgHtml5;
00530 $attribs['name'] = $name;
00531 if ( !$wgHtml5 ) {
00532 if ( !isset( $attribs['cols'] ) )
00533 $attribs['cols'] = "";
00534 if ( !isset( $attribs['rows'] ) )
00535 $attribs['rows'] = "";
00536 }
00537 return self::element( 'textarea', $attribs, $value );
00538 }
00539 }