From 6a0af8f3b4ea985c8dca1a1d5fa78e9d3cf1210c Mon Sep 17 00:00:00 2001 From: physikerwelt Date: Fri, 3 Jan 2014 14:29:03 +0000 Subject: [PATCH] Validate TeX input for all renderers, not just texvc The user input specified in the math tag a. la E=m ^2 is verified in PNG rendering mode, but not in plaintext, MathJax or LaTeXML rendering mode. This is a potential security issue. Furthermore, the texvc specific commands such as $\reals$ that is expanded to $\mathbb{R}$ might be rendered differently depended on the rendering mode. Therefore, the security checking and rewriting portion of texvc have been extracted from the texvc source (see I1650e6ec2ccefff6335fbc36bbe8ca8f59db0faa) and are now available as a separate executable (texvccheck). This commit will now enable this enhancement in security and provide even more compatibility among the different rendering modes. Bug: 49169 Change-Id: Ida24b6bf339508753bed40d2e218c4a5b7fe7d0c --- Math.hooks.php | 11 ++- Math.php | 12 +++ MathInputCheck.php | 51 +++++++++++ MathInputCheckTexvc.php | 90 +++++++++++++++++++ MathRenderer.php | 30 ++++++- MathTexvc.php | 22 +---- tests/MathInputCheckTest.php | 33 +++++++ tests/MathInputCheckTexvcTest.php | 141 ++++++++++++++++++++++++++++++ 8 files changed, 369 insertions(+), 21 deletions(-) create mode 100644 MathInputCheck.php create mode 100644 MathInputCheckTexvc.php create mode 100644 tests/MathInputCheckTest.php create mode 100644 tests/MathInputCheckTexvcTest.php diff --git a/Math.hooks.php b/Math.hooks.php index 3c26d1f..3a3147a 100644 --- a/Math.hooks.php +++ b/Math.hooks.php @@ -42,7 +42,7 @@ class MathHooks { * @return string */ static function mathTagHook( $content, $attributes, $parser ) { - global $wgContLang, $wgUseMathJax; + global $wgMathDisableTexFilter, $wgContLang, $wgUseMathJax; if ( trim( $content ) === "" ) { // bug 8372 return ""; } @@ -51,6 +51,15 @@ class MathHooks { $renderer = MathRenderer::getRenderer( $content, $attributes, $mode ); + + if ( !$wgMathDisableTexFilter ) { + $checkResult = $renderer->checkTex(); + if ( $checkResult !== true ){ + // returns the error message + return $renderer->getLastError(); + } + } + $renderedMath = $renderer->render(); if ( $wgUseMathJax && $mode == MW_MATH_MATHJAX ) { $parser->getOutput()->addModules( array( 'ext.math.mathjax.enabler' ) ); diff --git a/Math.php b/Math.php index 57398e5..8a0cc3c 100644 --- a/Math.php +++ b/Math.php @@ -132,6 +132,16 @@ $wgLaTeXMLTimeout = 240; * See http://dlmf.nist.gov/LaTeXML/manual/commands/latexmlpost.xhtml for details. */ $wgDefaultLaTeXMLSetting = 'format=xhtml&whatsin=math&whatsout=math&pmml&cmml&nodefaultresources&preload=LaTeX.pool&preload=article.cls&preload=amsmath.sty&preload=amsthm.sty&preload=amstext.sty&preload=amssymb.sty&preload=eucal.sty&preload=[dvipsnames]xcolor.sty&preload=url.sty&preload=hyperref.sty&preload=[ids]latexml.sty&preload=texvc'; +/** + * The link to the texvc executable + */ +$wgMathTexvcCheckExecutable = dirname( __FILE__ ) . '/texvccheck/texvccheck'; +/** + * Option to disable the tex filter. If set to true any LaTeX espression is parsed + * this can be a potential security risk. If set to false only a subset of the TeX + * commands is allowed. See the wikipedia page Help:Math for details. + */ +$wgMathDisableTexFilter = false; ////////// end of config settings. $wgDefaultUserOptions['math'] = MW_MATH_PNG; @@ -150,6 +160,8 @@ $wgAutoloadClasses['MathRenderer'] = $dir . 'MathRenderer.php'; $wgAutoloadClasses['MathTexvc'] = $dir . 'MathTexvc.php'; $wgAutoloadClasses['MathSource'] = $dir . 'MathSource.php'; $wgAutoloadClasses['MathLaTeXML'] = $dir . 'MathLaTeXML.php'; +$wgAutoloadClasses['MathInputCheck'] = $dir . 'MathInputCheck.php'; +$wgAutoloadClasses['MathInputCheckTexvc'] = $dir . 'MathInputCheckTexvc.php'; $wgExtensionMessagesFiles['Math'] = $dir . 'Math.i18n.php'; $wgParserTestFiles[] = $dir . 'mathParserTests.txt'; diff --git a/MathInputCheck.php b/MathInputCheck.php new file mode 100644 index 0000000..04b3065 --- /dev/null +++ b/MathInputCheck.php @@ -0,0 +1,51 @@ +inputTeX = $tex; + $this->isValid = false; + } + + /** + * Returns true if the TeX input String is valid + * @return boolean + */ + public function isValid() { + return $this->isValid; + } + + /** + * Returns the string of the last error. + * @return string + */ + public function getError() { + return $this->lastError; + } + + /** + * Some TeX checking programs may return + * a modified tex string after having checked it. + * You can get the altered tex string with this method + * @return string A valid Tex string + */ + public function getValidTex() { + return $this->validTeX; + } +} diff --git a/MathInputCheckTexvc.php b/MathInputCheckTexvc.php new file mode 100644 index 0000000..0f426be --- /dev/null +++ b/MathInputCheckTexvc.php @@ -0,0 +1,90 @@ +inputTeX ); + } + + switch ($texvcStatus) { + case 'E': + $errMsg = $errorRenderer->getError( 'math_lexing_error' ); + break; + case 'S': + $errMsg = $errorRenderer->getError( 'math_syntax_error' ); + break; + case 'F': + $errMsg = $errorRenderer->getError( 'math_unknown_function', $errDetails ); + break; + default: + $errMsg = $errorRenderer->getError( 'math_unknown_error' ); + } + + return $errMsg; + } + + /** + * + * @global type $wgTexvc + * @return boolean + */ + public function isValid() { + global $wgMathTexvcCheckExecutable; + if ( !is_executable( $wgMathTexvcCheckExecutable ) ) { + $msg = wfMessage( 'math_notexvc' )->inContentLanguage()->escaped(); + trigger_error( $msg, E_USER_NOTICE ); + wfDebugLog( 'Math', $msg ); + return true; + } + + $cmd = $wgMathTexvcCheckExecutable . ' ' . wfEscapeShellArg( $this->inputTeX ); + + if ( wfIsWindows() ) { + # Invoke it within cygwin sh, because texvc expects sh features in its default shell + $cmd = 'sh -c ' . wfEscapeShellArg($cmd); + } + + wfDebugLog( 'Math', "TeX check command: $cmd\n" ); + $contents = wfShellExec( $cmd ); + wfDebugLog( 'Math', "TeX check result:\n $contents\n---\n" ); + + if ( strlen($contents) === 0 ) { + wfDebugLog( 'Math', "TeX check output was empty. \n" ); + $this->lastError = MathRenderer::getError( 'math_unknown_error' ); + + return false; + } + + $retval = substr( $contents, 0, 1 ); + + if ( $retval !== '+' ) { + $this->lastError = $this->convertTexvcError( $contents ); + wfDebugLog( 'Math', 'checkTex failed:' . $this->lastError ); + + return false; + } else { + $this->validTeX = substr( $contents, 1 ); + $this->isSecure = true; + wfDebugLog( 'Math', 'checkTex successful tex is now: ' . $this->validTeX ); + + return true; + } + } + +} diff --git a/MathRenderer.php b/MathRenderer.php index 1f601d0..2f0b16c 100644 --- a/MathRenderer.php +++ b/MathRenderer.php @@ -27,6 +27,8 @@ abstract class MathRenderer { */ protected $mode = MW_MATH_PNG; protected $tex = ''; + /** @var string the original user input string (which was used to caculate the inputhash) */ + protected $userInputTex = ''; /** * is calculated by texvc. * @var string @@ -36,6 +38,9 @@ abstract class MathRenderer { protected $mathml = ''; protected $conservativeness = 0; protected $params = ''; + //STATE OF THE CLASS INSTANCE + /** @var boolean has variable tex been security-checked */ + protected $texSecure = false; protected $changed = false; /** * @var boolean forces rerendering if set to true @@ -51,6 +56,7 @@ abstract class MathRenderer { * @param array $params (optional) HTML attributes */ public function __construct( $tex = '', $params = array() ) { + $this->userInputTex = $tex; $this->tex = $tex; $this->params = $params; } @@ -114,7 +120,7 @@ abstract class MathRenderer { * @param Varargs $parameters (optional) zero or more message parameters for specific error * @return string HTML error string */ - protected function getError( $msg /*, ... */ ) { + public function getError( $msg /*, ... */ ) { $mf = wfMessage( 'math_failure' )->inContentLanguage()->escaped(); $parameters = func_get_args(); array_shift( $parameters ); @@ -415,5 +421,27 @@ abstract class MathRenderer { function getLastError() { return $this->lastError; } + + /** + * Get if the input tex was marked as secure + * @return boolean + */ + public function isTexSecure() { + return $this->texSecure; + } + + public function checkTex() { + if ( !$this->texSecure ) { + $checker = new MathInputCheckTexvc( $this->userInputTex ); + if ( $checker->isValid() ){ + $this->setTex( $checker->getValidTex() ); + $this->texSecure = true; + return true; + } else { + $this->lastError = $checker->getError(); + return false; + } + } + } } diff --git a/MathTexvc.php b/MathTexvc.php index 23bc40e..1c69d47 100644 --- a/MathTexvc.php +++ b/MathTexvc.php @@ -100,24 +100,8 @@ class MathTexvc extends MathRenderer { * @param string $texvcResult error result returned by texvc */ public function convertTexvcError( $texvcResult ) { - $texvcStatus = substr( $texvcResult, 0, 1 ); - - $errDetails = htmlspecialchars( substr( $texvcResult, 1 ) ); - switch( $texvcStatus ) { - case 'E': - $errMsg = $this->getError( 'math_lexing_error' ); - break; - case 'S': - $errMsg = $this->getError( 'math_syntax_error' ); - break; - case 'F': - $errMsg = $this->getError( 'math_unknown_function', $errDetails ); - break; - default: - $errMsg = $this->getError( 'math_unknown_error' ); - } - - return $errMsg; + $errorConverter = new MathInputCheckTexvc(); + return $errorConverter->convertTexvcError( $texvcResult, $this ); } /** @@ -364,4 +348,4 @@ class MathTexvc extends MathRenderer { return false; } -} \ No newline at end of file +} diff --git a/tests/MathInputCheckTest.php b/tests/MathInputCheckTest.php new file mode 100644 index 0000000..69d3c62 --- /dev/null +++ b/tests/MathInputCheckTest.php @@ -0,0 +1,33 @@ +getMockBuilder( 'MathInputCheck' )->getMock(); + $this->assertEquals( $InputCheck->IsValid() , false ); + } + + /** + * @covers MathInputCheck::getError + * @todo Implement testGetError(). + */ + public function testGetError() { + $InputCheck = $this->getMockBuilder( 'MathInputCheck' )->getMock(); + $this->assertNull( $InputCheck->getError() ); + } + + /** + * @covers MathInputCheck::getValidTex + */ + public function testGetValidTex() { + $InputCheck = $this->getMockBuilder( 'MathInputCheck' ) + ->setConstructorArgs(array('some tex input')) + ->getMock(); + $this->assertNull( $InputCheck->getValidTex() ); + } +} diff --git a/tests/MathInputCheckTexvcTest.php b/tests/MathInputCheckTexvcTest.php new file mode 100644 index 0000000..c72fc9e --- /dev/null +++ b/tests/MathInputCheckTexvcTest.php @@ -0,0 +1,141 @@ +BadObject = new MathInputCheckTexvc( '\newcommand{\text{do evil things}}' ); + $this->GoodObject = new MathInputCheckTexvc( '\sin\left(\frac12x\right)' ); + if ( !is_executable( $wgMathTexvcCheckExecutable ) ) { + $this->markTestSkipped( "No texvc installed on server" ); + } + } + + /** + * This is not a real phpUnit test. + * It's just to discover whether setting default values dependent + * on the existence of executables influences the performance reaonably. + * The test is disabled by default. You can enable it via + * php .../tests/phpunit/phpunit.php --group Utility ... + * @group Utility + */ + public function testPerformanceIsExecutable() { + global $wgMathTexvcCheckExecutable; + /** @var int the number of runs used in that test */ + $numberOfRuns = 10; + /** @var double the maximal average time accetable for a execution of is_executable in seconds*/ + $maxAvgTime = .001; + $tstart = microtime( true ); + + for ($i = 1; $i <= $numberOfRuns; $i++){ + is_executable( $wgMathTexvcCheckExecutable ); + } + + $time = microtime( true ) - $tstart; + $this->assertTrue( $time < $maxAvgTime * $numberOfRuns, 'function is_executable consumes too much time' ); + } + + /** + * Tears down the fixture, for example, closes a network connection. + * This method is called after a test is executed. + */ + protected function tearDown() { + } + + /** + * @covers MathInputCheckTexvc::testGetError + */ + public function testGetError() { + $this->assertNull( $this->GoodObject->getError() ); + $this->assertNull( $this->BadObject->getError() ); + $this->BadObject->isValid(); + $this->GoodObject->isValid(); + $this->assertNull( $this->GoodObject->getError() ); + $expectedMessage = wfMessage( 'math_unknown_function', '\newcommand' )->inContentLanguage()->escaped(); + $this->assertContains( $expectedMessage , $this->BadObject->getError()); + } + + /** + * @covers MathInputCheckTexvc::isValid + */ + public function testIsValid() { + $this->assertFalse( $this->BadObject->isValid() ); + $this->assertTrue( $this->GoodObject->isValid() ); + } + + /** + * @covers MathInputCheckTexvc::getValidTex + */ + public function testGetValidTex() { + $this->assertNull( $this->GoodObject->getValidTex() ); + $this->assertNull( $this->BadObject->getValidTex() ); + $this->BadObject->isValid(); + $this->GoodObject->isValid(); + $this->assertNull( $this->BadObject->getValidTex() ); + + // Be aware of the additional brackets and spaces inserted here + $this->assertEquals( $this->GoodObject->getValidTex() , "\\sin \\left({\\frac 12}x\\right)" ); + } + + /** + * Test corner cases of texvccheck conversion + * @covers MathInputCheckTexvc::getValidTex + */ + public function testGetValidTexCornerCases() { + $Object = new MathInputCheckTexvc( '\reals' ); + $Object->isValid(); + $this->assertEquals( "\\mathbb{R} ", $Object->getValidTex() ); + $Object = new MathInputCheckTexvc( '\lbrack' ); // Bug: 54624 + $Object->isValid(); + $this->assertEquals( '\\lbrack ', $Object->getValidTex() ); + } + + /** + * Tests behavior of convertTexvcError + * The method was moved from MathTexvc to MathInputCheckTexvc + * @covers MathTexvc::convertTexvcError + */ + public function testConvertTexvcError() { + $texvc = $this->getMockBuilder( 'MathInputCheckTexvc' ) + ->setMethods(NULL) + ->disableOriginalConstructor() + ->getMock(); + + $mathFailure = wfMessage( 'math_failure' )->inContentLanguage()->escaped(); + + $actualLexing = $texvc->convertTexvcError( 'E' ); + $expectedLexing = wfMessage( 'math_lexing_error', '' )->inContentLanguage()->escaped(); + $this->assertContains( $mathFailure, $actualLexing, 'Lexing error contains general math failure message' ); + $this->assertContains( $expectedLexing, $actualLexing, 'Lexing error contains detailed error for lexing' ); + + $actualSyntax = $texvc->convertTexvcError( 'S' ); + $expectedSyntax = wfMessage( 'math_syntax_error', '' )->inContentLanguage()->escaped(); + $this->assertContains( $mathFailure, $actualSyntax, 'Syntax error contains general math failure message' ); + $this->assertContains( $expectedSyntax, $actualSyntax, 'Syntax error contains detailed error for syntax' ); + + $unknownFunction = 'figureEightIntegral'; + $actualUnknownFunction = $texvc->convertTexvcError( "F$unknownFunction" ); + $expectedUnknownFunction = wfMessage( 'math_unknown_function', $unknownFunction )->inContentLanguage()->escaped(); + $this->assertContains( $mathFailure, $actualUnknownFunction, 'Unknown function error contains general math failure message' ); + $this->assertContains( $expectedUnknownFunction, $actualUnknownFunction, 'Unknown function error contains detailed error for unknown function' ); + + $actualUnknownError = $texvc->convertTexvcError( 'Q' ); + $expectedUnknownError = wfMessage( 'math_unknown_error', '' )->inContentLanguage()->escaped(); + $this->assertContains( $mathFailure, $actualUnknownError, 'Unknown error contains general math failure message' ); + $this->assertContains( $expectedUnknownError, $actualUnknownError, 'Unknown error contains detailed error for unknownError' ); + } +}