Latest version of the patch for refactor filters to avoid potential XSS attacks, props sambauers and DD32, see #8767

git-svn-id: http://svn.automattic.com/wordpress/trunk@10298 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
azaozz 2009-01-04 23:37:47 +00:00
parent 19848b9d90
commit ec1ca2236f
2 changed files with 101 additions and 69 deletions

View File

@ -98,11 +98,20 @@ function _mb_strcut( $str, $start, $length=null, $encoding=null ) {
if ( !function_exists( 'htmlspecialchars_decode' ) ) { if ( !function_exists( 'htmlspecialchars_decode' ) ) {
// Added in PHP 5.1.0 // Added in PHP 5.1.0
// from php.net (modified by Sam Bauers to deal with some quirks in HTML_SPECIALCHARS constant) // Error checks from PEAR::PHP_Compat
function htmlspecialchars_decode( $str, $quote_style = ENT_COMPAT ) { function htmlspecialchars_decode( $str, $quote_style = ENT_COMPAT )
$table = array_flip( get_html_translation_table( HTML_SPECIALCHARS, $quote_style ) ); {
$table = array_merge( array( ''' => "'" ), $table, array( '&' => "&", '&' => "&" ) ); if ( !is_scalar( $string ) ) {
return strtr( $str, $table ); trigger_error( 'htmlspecialchars_decode() expects parameter 1 to be string, ' . gettype( $string ) . ' given', E_USER_WARNING );
return;
}
if ( !is_int( $quote_style ) && $quote_style !== null ) {
trigger_error( 'htmlspecialchars_decode() expects parameter 2 to be integer, ' . gettype( $quote_style ) . ' given', E_USER_WARNING );
return;
}
return wp_specialchars_decode( $str, $quote_style );
} }
} }

View File

@ -186,7 +186,7 @@ function seems_utf8($Str) { # by bmorel at ssi dot fr
/** /**
* Converts a number of special characters into their HTML entities. * Converts a number of special characters into their HTML entities.
* *
* Specifically changes: & to &#038;, < to &lt; and > to &gt;. * Specifically deals with: &, <, >, ", and '.
* *
* $quote_style can be set to ENT_COMPAT to encode " to * $quote_style can be set to ENT_COMPAT to encode " to
* &quot;, or ENT_QUOTES to do both. Default is ENT_NOQUOTES where no quotes are encoded. * &quot;, or ENT_QUOTES to do both. Default is ENT_NOQUOTES where no quotes are encoded.
@ -199,7 +199,10 @@ function seems_utf8($Str) { # by bmorel at ssi dot fr
* @param boolean $double_encode Optional. Whether or not to encode existing html entities. Default is false. * @param boolean $double_encode Optional. Whether or not to encode existing html entities. Default is false.
* @return string The encoded text with HTML entities. * @return string The encoded text with HTML entities.
*/ */
function wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = false, $double_encode = false ) { function wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = false, $double_encode = false )
{
$string = (string) $string;
if ( 0 === strlen( $string ) ) { if ( 0 === strlen( $string ) ) {
return ''; return '';
} }
@ -212,50 +215,46 @@ function wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = false
$charset = 'UTF-8'; $charset = 'UTF-8';
} }
// Backwards compatibility
switch ( $quote_style ) { switch ( $quote_style ) {
// Handle expected values first for speed
case ENT_NOQUOTES:
$_quote_style = ENT_NOQUOTES;
break;
case ENT_COMPAT:
$_quote_style = ENT_COMPAT;
break;
case ENT_QUOTES: case ENT_QUOTES:
default:
$quote_style = ENT_QUOTES;
$_quote_style = ENT_QUOTES; $_quote_style = ENT_QUOTES;
break; break;
// Old values case ENT_COMPAT:
case 'double':
$quote_style = ENT_COMPAT;
$_quote_style = ENT_COMPAT;
break;
case 'single':
$quote_style = ENT_NOQUOTES;
$_quote_style = 'single';
break;
case ENT_NOQUOTES:
case false: case false:
case 0: case 0:
case '': case '':
case null: case null:
case 'single': $quote_style = ENT_NOQUOTES;
$_quote_style = ENT_NOQUOTES; $_quote_style = ENT_NOQUOTES;
break; break;
case 'double':
$_quote_style = ENT_COMPAT;
break;
default:
$_quote_style = ENT_QUOTES;
break;
} }
if ( version_compare( PHP_VERSION, '5.2.3', '>=' ) ) { // Handle double encoding ourselves
$string = htmlspecialchars( $string, $_quote_style, $charset, $double_encode ); if ( !$double_encode ) {
} else { $string = wp_specialchars_decode( $string, $_quote_style );
// Handle double encoding for PHP versions that don't support it in htmlspecialchars() $string = preg_replace( '/&(#?x?[0-9]+|[a-z]+);/i', '|wp_entity|$1|/wp_entity|', $string );
if ( !$double_encode ) { }
$string = htmlspecialchars_decode( $string, $_quote_style );
// Backwards compatibility $string = htmlspecialchars( $string, $quote_style, $charset );
if ( 'single' === $quote_style ) {
$string = str_replace( array( '&#039;', '&#39;' ), "'", $string ); // Handle double encoding ourselves
} if ( !$double_encode ) {
} $string = str_replace( array( '|wp_entity|', '|/wp_entity|' ), array( '&', ';' ), $string );
$string = htmlspecialchars( $string, $_quote_style, $charset );
} }
// Backwards compatibility // Backwards compatibility
if ( 'single' === $quote_style ) { if ( 'single' === $_quote_style ) {
$string = str_replace( "'", '&#039;', $string ); $string = str_replace( "'", '&#039;', $string );
} }
@ -263,45 +262,65 @@ function wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = false
} }
/** /**
* Converts all special characters into their HTML entities. * Converts a number of HTML entities into their special characters.
* *
* $quote_style can be set to ENT_COMPAT to encode " to * Specifically deals with: &, <, >, ", and '.
* &quot;, or ENT_QUOTES to do both. Default is ENT_NOQUOTES where no quotes are encoded. *
* $quote_style can be set to ENT_COMPAT to decode " entities,
* or ENT_QUOTES to do both " and '. Default is ENT_NOQUOTES where no quotes are decoded.
* *
* @since 2.8 * @since 2.8
* *
* @param string $string The text which is to be encoded. * @param string $string The text which is to be decoded.
* @param mixed $quote_style Optional. Converts double quotes if set to ENT_COMPAT, both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. Default is ENT_NOQUOTES. * @param mixed $quote_style Optional. Converts double quotes if set to ENT_COMPAT, both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. Also compatible with old wp_specialchars() values; converting single quotes if set to 'single', double if set to 'double' or both if otherwise set. Default is ENT_NOQUOTES.
* @param string $charset Optional. The character encoding of the string. Default is false. * @return string The decoded text without HTML entities.
* @param boolean $double_encode Optional. Whether or not to encode existing html entities. Default is false.
* @return string The encoded text with HTML entities.
*/ */
function wp_entities( $string, $quote_style = ENT_NOQUOTES, $charset = false, $double_encode = false ) { function wp_specialchars_decode( $string, $quote_style = ENT_NOQUOTES )
{
$string = (string) $string;
if ( 0 === strlen( $string ) ) { if ( 0 === strlen( $string ) ) {
return ''; return '';
} }
if ( !$charset ) { // More complete than get_html_translation_table( HTML_SPECIALCHARS )
$charset = get_option( 'blog_charset' ); $single = array( '&#039;' => '\'', '&#x27;' => '\'' );
} $single_preg = array( '/&#0*39;/' => '&#039;', '/&#x0*27;/i' => '&#x27;' );
if ( in_array( $charset, array( 'utf8', 'utf-8', 'UTF8' ) ) ) { $double = array( '&quot;' => '"', '&#034;' => '"', '&#x22;' => '"' );
$charset = 'UTF-8'; $double_preg = array( '/&#0*34;/' => '&#034;', '/&#x0*22;/i' => '&#x22;' );
$others = array( '&lt;' => '<', '&#060;' => '<', '&gt;' => '>', '&#062;' => '>', '&amp;' => '&', '&#038;' => '&', '&#x26;' => '&' );
$others_preg = array( '/&#0*60;/' => '&#060;', '/&#0*62;/' => '&#062;', '/&#0*38;/' => '&#038;', '/&#x0*26;/i' => '&#x26;' );
switch ( $quote_style ) {
case ENT_QUOTES:
default:
$translation = array_merge( $single, $double, $others );
$translation_preg = array_merge( $single_preg, $double_preg, $others_preg );
break;
case ENT_COMPAT:
case 'double':
$translation = array_merge( $double, $others );
$translation_preg = array_merge( $double_preg, $others_preg );
break;
case 'single':
$translation = array_merge( $single, $others );
$translation_preg = array_merge( $single_preg, $others_preg );
break;
case ENT_NOQUOTES:
case false:
case 0:
case '':
case null:
$translation = $others;
$translation_preg = $others_preg;
break;
} }
if ( version_compare( PHP_VERSION, '5.2.3', '>=' ) ) { // Remove zero padding on numeric entities
$string = htmlentities( $string, $quote_style, $charset, $double_encode ); $string = preg_replace( array_keys( $translation_preg ), array_values( $translation_preg ), $string );
} else {
// Handle double encoding for PHP versions that don't support it in htmlentities()
if ( !$double_encode ) {
// Multi-byte charsets are not supported below PHP 5.0.0
// 'cp866', 'cp1251', 'KOI8-R' charsets are not supported below PHP 4.3.2
$string = html_entity_decode( $string, $quote_style, $charset );
}
// 'cp866', 'cp1251', 'KOI8-R' charsets are not supported below PHP 4.3.2
$string = htmlentities( $string, $quote_style, $charset );
}
return $string; // Replace characters according to translation table
return strtr( $string, $translation );
} }
/** /**
@ -313,7 +332,10 @@ function wp_entities( $string, $quote_style = ENT_NOQUOTES, $charset = false, $d
* @param boolean $strip Optional. Whether to attempt to strip out invalid UTF8. Default is false. * @param boolean $strip Optional. Whether to attempt to strip out invalid UTF8. Default is false.
* @return string The checked text. * @return string The checked text.
*/ */
function wp_check_invalid_utf8( $string, $strip = false ) { function wp_check_invalid_utf8( $string, $strip = false )
{
$string = (string) $string;
if ( 0 === strlen( $string ) ) { if ( 0 === strlen( $string ) ) {
return ''; return '';
} }
@ -1860,10 +1882,11 @@ function htmlentities2($myHTML) {
* @return string Escaped text. * @return string Escaped text.
*/ */
function js_escape($text) { function js_escape($text) {
$safe_text = wp_specialchars($text, 'double'); $safe_text = wp_check_invalid_utf8( $text );
$safe_text = preg_replace('/&#(x)?0*(?(1)27|39);?/i', "'", stripslashes($safe_text)); $safe_text = wp_specialchars( $safe_text, ENT_COMPAT );
$safe_text = preg_replace("/\r?\n/", "\\n", addslashes($safe_text)); $safe_text = preg_replace( '/&#(x)?0*(?(1)27|39);?/i', "'", stripslashes( $safe_text ) );
return apply_filters('js_escape', $safe_text, $text); $safe_text = preg_replace( "/\r?\n/", "\\n", addslashes( $safe_text ) );
return apply_filters( 'js_escape', $safe_text, $text );
} }
/** /**
@ -1876,7 +1899,7 @@ function js_escape($text) {
*/ */
function attribute_escape( $text ) { function attribute_escape( $text ) {
$safe_text = wp_check_invalid_utf8( $text ); $safe_text = wp_check_invalid_utf8( $text );
$safe_text = wp_entities( $safe_text, ENT_QUOTES ); $safe_text = wp_specialchars( $safe_text, ENT_QUOTES );
return apply_filters( 'attribute_escape', $safe_text, $text ); return apply_filters( 'attribute_escape', $safe_text, $text );
} }