From ec1ca2236fc693ada675688d7e9ae19bd7d7a49f Mon Sep 17 00:00:00 2001 From: azaozz Date: Sun, 4 Jan 2009 23:37:47 +0000 Subject: [PATCH] Latest version of the patch for refactor filters to avoid potential XSS attacks, props sambauers and DD32, see #8767 git-svn-id: http://svn.automattic.com/wordpress/trunk@10298 1a063a9b-81f0-0310-95a4-ce76da25c4cd --- wp-includes/compat.php | 19 +++-- wp-includes/formatting.php | 151 +++++++++++++++++++++---------------- 2 files changed, 101 insertions(+), 69 deletions(-) diff --git a/wp-includes/compat.php b/wp-includes/compat.php index d4ef4f11e..47e6c8ba7 100644 --- a/wp-includes/compat.php +++ b/wp-includes/compat.php @@ -98,11 +98,20 @@ function _mb_strcut( $str, $start, $length=null, $encoding=null ) { if ( !function_exists( 'htmlspecialchars_decode' ) ) { // Added in PHP 5.1.0 - // from php.net (modified by Sam Bauers to deal with some quirks in HTML_SPECIALCHARS constant) - function htmlspecialchars_decode( $str, $quote_style = ENT_COMPAT ) { - $table = array_flip( get_html_translation_table( HTML_SPECIALCHARS, $quote_style ) ); - $table = array_merge( array( ''' => "'" ), $table, array( '&' => "&", '&' => "&" ) ); - return strtr( $str, $table ); + // Error checks from PEAR::PHP_Compat + function htmlspecialchars_decode( $str, $quote_style = ENT_COMPAT ) + { + if ( !is_scalar( $string ) ) { + trigger_error( 'htmlspecialchars_decode() expects parameter 1 to be string, ' . gettype( $string ) . ' given', E_USER_WARNING ); + return; + } + + if ( !is_int( $quote_style ) && $quote_style !== null ) { + trigger_error( 'htmlspecialchars_decode() expects parameter 2 to be integer, ' . gettype( $quote_style ) . ' given', E_USER_WARNING ); + return; + } + + return wp_specialchars_decode( $str, $quote_style ); } } diff --git a/wp-includes/formatting.php b/wp-includes/formatting.php index a0f7c9108..dacef9227 100644 --- a/wp-includes/formatting.php +++ b/wp-includes/formatting.php @@ -186,7 +186,7 @@ function seems_utf8($Str) { # by bmorel at ssi dot fr /** * Converts a number of special characters into their HTML entities. * - * Specifically changes: & to &, < to < and > to >. + * Specifically deals with: &, <, >, ", and '. * * $quote_style can be set to ENT_COMPAT to encode " to * ", or ENT_QUOTES to do both. Default is ENT_NOQUOTES where no quotes are encoded. @@ -199,7 +199,10 @@ function seems_utf8($Str) { # by bmorel at ssi dot fr * @param boolean $double_encode Optional. Whether or not to encode existing html entities. Default is false. * @return string The encoded text with HTML entities. */ -function wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = false, $double_encode = false ) { +function wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = false, $double_encode = false ) +{ + $string = (string) $string; + if ( 0 === strlen( $string ) ) { return ''; } @@ -212,50 +215,46 @@ function wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = false $charset = 'UTF-8'; } - // Backwards compatibility switch ( $quote_style ) { - // Handle expected values first for speed - case ENT_NOQUOTES: - $_quote_style = ENT_NOQUOTES; - break; - case ENT_COMPAT: - $_quote_style = ENT_COMPAT; - break; case ENT_QUOTES: + default: + $quote_style = ENT_QUOTES; $_quote_style = ENT_QUOTES; break; - // Old values + case ENT_COMPAT: + case 'double': + $quote_style = ENT_COMPAT; + $_quote_style = ENT_COMPAT; + break; + case 'single': + $quote_style = ENT_NOQUOTES; + $_quote_style = 'single'; + break; + case ENT_NOQUOTES: case false: case 0: case '': case null: - case 'single': + $quote_style = ENT_NOQUOTES; $_quote_style = ENT_NOQUOTES; break; - case 'double': - $_quote_style = ENT_COMPAT; - break; - default: - $_quote_style = ENT_QUOTES; - break; } - if ( version_compare( PHP_VERSION, '5.2.3', '>=' ) ) { - $string = htmlspecialchars( $string, $_quote_style, $charset, $double_encode ); - } else { - // Handle double encoding for PHP versions that don't support it in htmlspecialchars() - if ( !$double_encode ) { - $string = htmlspecialchars_decode( $string, $_quote_style ); - // Backwards compatibility - if ( 'single' === $quote_style ) { - $string = str_replace( array( ''', ''' ), "'", $string ); - } - } - $string = htmlspecialchars( $string, $_quote_style, $charset ); + // Handle double encoding ourselves + if ( !$double_encode ) { + $string = wp_specialchars_decode( $string, $_quote_style ); + $string = preg_replace( '/&(#?x?[0-9]+|[a-z]+);/i', '|wp_entity|$1|/wp_entity|', $string ); + } + + $string = htmlspecialchars( $string, $quote_style, $charset ); + + // Handle double encoding ourselves + if ( !$double_encode ) { + $string = str_replace( array( '|wp_entity|', '|/wp_entity|' ), array( '&', ';' ), $string ); } // Backwards compatibility - if ( 'single' === $quote_style ) { + if ( 'single' === $_quote_style ) { $string = str_replace( "'", ''', $string ); } @@ -263,45 +262,65 @@ function wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = false } /** - * Converts all special characters into their HTML entities. + * Converts a number of HTML entities into their special characters. * - * $quote_style can be set to ENT_COMPAT to encode " to - * ", or ENT_QUOTES to do both. Default is ENT_NOQUOTES where no quotes are encoded. + * Specifically deals with: &, <, >, ", and '. + * + * $quote_style can be set to ENT_COMPAT to decode " entities, + * or ENT_QUOTES to do both " and '. Default is ENT_NOQUOTES where no quotes are decoded. * * @since 2.8 * - * @param string $string The text which is to be encoded. - * @param mixed $quote_style Optional. Converts double quotes if set to ENT_COMPAT, both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. Default is ENT_NOQUOTES. - * @param string $charset Optional. The character encoding of the string. Default is false. - * @param boolean $double_encode Optional. Whether or not to encode existing html entities. Default is false. - * @return string The encoded text with HTML entities. + * @param string $string The text which is to be decoded. + * @param mixed $quote_style Optional. Converts double quotes if set to ENT_COMPAT, both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. Also compatible with old wp_specialchars() values; converting single quotes if set to 'single', double if set to 'double' or both if otherwise set. Default is ENT_NOQUOTES. + * @return string The decoded text without HTML entities. */ -function wp_entities( $string, $quote_style = ENT_NOQUOTES, $charset = false, $double_encode = false ) { +function wp_specialchars_decode( $string, $quote_style = ENT_NOQUOTES ) +{ + $string = (string) $string; + if ( 0 === strlen( $string ) ) { return ''; } - if ( !$charset ) { - $charset = get_option( 'blog_charset' ); - } - if ( in_array( $charset, array( 'utf8', 'utf-8', 'UTF8' ) ) ) { - $charset = 'UTF-8'; + // More complete than get_html_translation_table( HTML_SPECIALCHARS ) + $single = array( ''' => '\'', ''' => '\'' ); + $single_preg = array( '/�*39;/' => ''', '/�*27;/i' => ''' ); + $double = array( '"' => '"', '"' => '"', '"' => '"' ); + $double_preg = array( '/�*34;/' => '"', '/�*22;/i' => '"' ); + $others = array( '<' => '<', '<' => '<', '>' => '>', '>' => '>', '&' => '&', '&' => '&', '&' => '&' ); + $others_preg = array( '/�*60;/' => '<', '/�*62;/' => '>', '/�*38;/' => '&', '/�*26;/i' => '&' ); + + switch ( $quote_style ) { + case ENT_QUOTES: + default: + $translation = array_merge( $single, $double, $others ); + $translation_preg = array_merge( $single_preg, $double_preg, $others_preg ); + break; + case ENT_COMPAT: + case 'double': + $translation = array_merge( $double, $others ); + $translation_preg = array_merge( $double_preg, $others_preg ); + break; + case 'single': + $translation = array_merge( $single, $others ); + $translation_preg = array_merge( $single_preg, $others_preg ); + break; + case ENT_NOQUOTES: + case false: + case 0: + case '': + case null: + $translation = $others; + $translation_preg = $others_preg; + break; } - if ( version_compare( PHP_VERSION, '5.2.3', '>=' ) ) { - $string = htmlentities( $string, $quote_style, $charset, $double_encode ); - } else { - // Handle double encoding for PHP versions that don't support it in htmlentities() - if ( !$double_encode ) { - // Multi-byte charsets are not supported below PHP 5.0.0 - // 'cp866', 'cp1251', 'KOI8-R' charsets are not supported below PHP 4.3.2 - $string = html_entity_decode( $string, $quote_style, $charset ); - } - // 'cp866', 'cp1251', 'KOI8-R' charsets are not supported below PHP 4.3.2 - $string = htmlentities( $string, $quote_style, $charset ); - } + // Remove zero padding on numeric entities + $string = preg_replace( array_keys( $translation_preg ), array_values( $translation_preg ), $string ); - return $string; + // Replace characters according to translation table + return strtr( $string, $translation ); } /** @@ -313,7 +332,10 @@ function wp_entities( $string, $quote_style = ENT_NOQUOTES, $charset = false, $d * @param boolean $strip Optional. Whether to attempt to strip out invalid UTF8. Default is false. * @return string The checked text. */ -function wp_check_invalid_utf8( $string, $strip = false ) { +function wp_check_invalid_utf8( $string, $strip = false ) +{ + $string = (string) $string; + if ( 0 === strlen( $string ) ) { return ''; } @@ -1860,10 +1882,11 @@ function htmlentities2($myHTML) { * @return string Escaped text. */ function js_escape($text) { - $safe_text = wp_specialchars($text, 'double'); - $safe_text = preg_replace('/&#(x)?0*(?(1)27|39);?/i', "'", stripslashes($safe_text)); - $safe_text = preg_replace("/\r?\n/", "\\n", addslashes($safe_text)); - return apply_filters('js_escape', $safe_text, $text); + $safe_text = wp_check_invalid_utf8( $text ); + $safe_text = wp_specialchars( $safe_text, ENT_COMPAT ); + $safe_text = preg_replace( '/&#(x)?0*(?(1)27|39);?/i', "'", stripslashes( $safe_text ) ); + $safe_text = preg_replace( "/\r?\n/", "\\n", addslashes( $safe_text ) ); + return apply_filters( 'js_escape', $safe_text, $text ); } /** @@ -1876,7 +1899,7 @@ function js_escape($text) { */ function attribute_escape( $text ) { $safe_text = wp_check_invalid_utf8( $text ); - $safe_text = wp_entities( $safe_text, ENT_QUOTES ); + $safe_text = wp_specialchars( $safe_text, ENT_QUOTES ); return apply_filters( 'attribute_escape', $safe_text, $text ); }