Mathoid CLI interface

Renders formulae via mathoid without to run mathoid as a service.
Mathoid 0.7.1 or later must be installed locally and configured to be
accessed directly from the math extension. It has been tested with the
config.dev.yaml of version 0.7.1. If mathoid is installed in
'/srv/mathoid' the following line might be added to LocalSettings.php
$wgMathoidCli = ['/srv/mathoid/cli.js', '-c', '/srv/mathoid/config.dev.yaml'];
i.e., make sure to specify the -c parameter with an absolute path.
In addition mathoid uses more memory than the the default. With
the config.dev.yaml a value of
$wgMaxShellMemory = 2097152;
has been tested to work well.

Change-Id: I0600f056d21927963267cf979d342e313419e9fa
This commit is contained in:
Moritz Schubotz 2017-08-16 14:50:44 +09:00 committed by Moritz Schubotz (physikerwelt)
parent d4a154ade2
commit d8822169ab
No known key found for this signature in database
GPG Key ID: 73D26C61BAB32E94
14 changed files with 463 additions and 37 deletions

View File

@ -189,7 +189,7 @@ class MathHooks {
*/
static function mathTagHook( $content, $attributes, $parser ) {
static $n = 1;
if ( trim( $content ) === '' ) { // bug 8372
if ( trim( $content ) === '' ) { // bug 8372 https://phabricator.wikimedia.org/rSVN18870
return '';
}
@ -346,6 +346,10 @@ class MathHooks {
if ( in_array( $type, [ 'mysql', 'sqlite', 'postgres' ] ) ) {
$sql = __DIR__ . '/db/mathoid.' . $type . '.sql';
$updater->addExtensionTable( 'mathoid', $sql );
if ( $type == 'mysql' ) {
$sql = __DIR__ . '/db/patches/mathoid.add_png.mysql.sql';
$updater->addExtensionField( 'mathoid', 'math_png', $sql );
}
} else {
throw new Exception( "Math extension does not currently support $type database for Mathoid." );
}
@ -373,17 +377,14 @@ class MathHooks {
* @return bool
*/
public static function onParserAfterTidy( &$parser, &$text ) {
$rbis = [];
foreach ( self::$tags as $key => $tag ) {
/** @var MathRenderer $renderer */
$renderer = $tag[0];
$rbi = new MathRestbaseInterface( $renderer->getTex(), $renderer->getInputType() );
$renderer->setRestbaseInterface( $rbi );
$rbis[] = $rbi;
global $wgMathoidCli;
if ( $wgMathoidCli ) {
MathMathMLCli::batchEvaluate( self::$tags );
} else {
MathMathML::batchEvaluate( self::$tags );
}
MathRestbaseInterface::batchEvaluate( $rbis );
foreach ( self::$tags as $key => $tag ) {
$value = call_user_func_array( [ "MathHooks","mathPostTagHook" ], $tag );
$value = call_user_func_array( [ "MathHooks", "mathPostTagHook" ], $tag );
// Workaround for https://phabricator.wikimedia.org/T103269
$text = preg_replace( '/(<mw:editsection[^>]*>.*?)' . preg_quote( $key ) .
'(.*?)<\/mw:editsection>/',

View File

@ -49,6 +49,18 @@ class MathMathML extends MathRenderer {
}
}
public static function batchEvaluate( &$tags ) {
$rbis = [];
foreach ( $tags as $key => $tag ) {
/** @var MathRenderer $renderer */
$renderer = $tag[0];
$rbi = new MathRestbaseInterface( $renderer->getTex(), $renderer->getInputType() );
$renderer->setRestbaseInterface( $rbi );
$rbis[] = $rbi;
}
MathRestbaseInterface::batchEvaluate( $rbis );
}
/**
* Gets the allowed root elements the rendered math tag might have.
*
@ -257,11 +269,7 @@ class MathMathML extends MathRenderer {
* @return bool
*/
protected function doRender() {
if ( $this->getTex() === '' ) {
LoggerFactory::getInstance( 'Math' )->debug(
'Rendering was requested, but no TeX string is specified.'
);
$this->lastError = $this->getError( 'math_empty_tex' );
if ( $this->isEmpty() ) {
return false;
}
$res = '';
@ -502,12 +510,12 @@ class MathMathML extends MathRenderer {
}
/**
* @param $jsonResult
* @param $host
* @param object $jsonResult json result
* @param string $host name
*
* @return bool
*/
private function processJsonResult( $jsonResult, $host ) {
protected function processJsonResult( $jsonResult, $host ) {
if ( $this->getMode() == 'latexml' || $this->inputType == 'pmml' ||
$this->isValidMathML( $jsonResult->mml )
) {
@ -536,4 +544,17 @@ class MathMathML extends MathRenderer {
return false;
}
}
/**
* @return bool
*/
protected function isEmpty() {
if ( $this->userInputTex === '' ) {
LoggerFactory::getInstance( 'Math' )
->debug( 'Rendering was requested, but no TeX string is specified.' );
$this->lastError = $this->getError( 'math_empty_tex' );
return true;
}
return false;
}
}

194
MathMathMLCli.php Normal file
View File

@ -0,0 +1,194 @@
<?php
use MediaWiki\Logger\LoggerFactory;
use \MediaWiki\MediaWikiServices;
/**
* Created by PhpStorm.
* User: Moritz
* Date: 15.08.2017
* Time: 09:33
*/
class MathMathMLCli extends MathMathML {
/**
* @param array &$tags math tags
* @return bool
* @throws MWException
*/
public static function batchEvaluate( &$tags ) {
$req = [];
foreach ( $tags as $key => $tag ) {
/** @var MathMathMLCli $renderer */
$renderer = $tag[0];
// checking if the rendering is in the database is no security issue since only the md5
// hash of the user input string will be sent to the database
if ( !$renderer->isInDatabase() ) {
$req[] = $renderer->getMathoidCliQuery();
}
}
if ( count( $req ) === 0 ) {
return true;
}
$exitCode = 1;
$res = self::evaluateWithCli( $req, $exitCode );
foreach ( $tags as $key => $tag ) {
/** @var MathMathMLCli $renderer */
$renderer = $tag[0];
if ( !$renderer->isInDatabase() ) {
$renderer->initializeFromCliResponse( $res );
}
}
return true;
}
/**
* @param Object $res
* @return bool
*/
private function initializeFromCliResponse( $res ) {
global $wgMathoidCli;
if ( !property_exists( $res, $this->getMd5() ) ) {
$this->lastError =
$this->getError( 'math_mathoid_error', 'cli',
var_export( get_object_vars( $res ) ) );
return false;
}
if ( $this->isEmpty() ) {
return false;
}
$response = $res->{$this->getMd5()};
if ( !$response->success ) {
$this->lastError = $this->renderError( $response );
return false;
}
$this->texSecure = true;
$this->tex = $response->sanetex;
// The host name is only relevant for the debugging. So using file:// to indicate that the
// cli interface seems to be OK.
$this->processJsonResult( $response, 'file://' . $wgMathoidCli[0] );
$this->mathStyle = $response->mathoidStyle;
$this->png = implode( array_map( "chr", $response->png->data ) );
$this->changed = true;
}
public function renderError( $response ) {
$msg = $response->error;
try {
switch ( $response->detail->status ) {
case "F":
$msg .= "\n Found {$response->detail->details}" .
$this->appendLocationInfo( $response );
break;
case 'S':
case "C":
$msg .= $this->appendLocationInfo( $response );
break;
case '-':
// we do not know any cases that triggers this error
}
}
catch ( Exception $e ) {
// use default error message
}
return $this->getError( 'math_mathoid_error', 'cli', $msg );
}
/**
* @return array
*/
public function getMathoidCliQuery() {
return [
'query' => [
'q' => $this->getTex(),
'type' => $this->getInputType(),
'hash' => $this->getMd5(),
],
];
}
/**
* @param string $req request
* @param int &$exitCode exit code
* @return mixed
* @throws MWException
*/
public static function evaluateWithCli( $req, &$exitCode = null ) {
global $wgMathoidCli;
$json_req = json_encode( $req );
$cmd = MediaWikiServices::getInstance()->getShellCommandFactory()->create();
$cmd->params( $wgMathoidCli );
$cmd->input( $json_req );
$result = $cmd->execute();
if ( $result->getExitCode() != 0 ) {
LoggerFactory::getInstance( 'Math' )->error( 'Can not process {req} with config
{conf} returns {res}', [
'req' => $req,
'conf' => var_export( $wgMathoidCli, true ),
'res' => var_export( $result, true ),
] );
throw new MWException( "Mathoid cli '$wgMathoidCli[0]' is not executable." );
}
$res = json_decode( $result->getStdout() );
if ( !$res ) {
throw new MWException( "Mathoid cli response '$res' is no valid JSON file." );
}
return $res;
}
public function render( $forceReRendering = false ) {
if ( $this->getLastError() ) {
return false;
}
return true;
}
protected function doCheck() {
// avoid that restbase is called if check is set to always
return $this->texSecure;
}
protected function initializeFromDatabaseRow( $rpage ) {
if ( !empty( $rpage->math_svg ) ) {
$this->png = $rpage->math_png;
}
parent::initializeFromDatabaseRow( $rpage ); // TODO: Change the autogenerated stub
}
protected function dbOutArray() {
$out = parent::dbOutArray();
$out['math_png'] = $this->png;
return $out;
}
protected function dbInArray() {
$out = parent::dbInArray();
$out[] = 'math_png';
return $out;
}
public function getPng() {
if ( !$this->png ) {
$this->initializeFromCliResponse( self::evaluateWithCli( [
$this->getMathoidCliQuery(),
] ) );
}
return parent::getPng();
}
/**
* @param $response object from cli
* @return string containing the location information
*/
private function appendLocationInfo( $response ) {
return "in {$response->detail->line}:{$response->detail->column}";
}
}

View File

@ -29,6 +29,8 @@ abstract class MathRenderer {
protected $mathml = '';
/** @var string SVG layout only (no semantics) */
protected $svg = '';
/** @var string PNG image only (no semantics) */
protected $png = '';
/** @var string the original user input string (which was used to calculate the inputhash) */
protected $userInputTex = '';
// FURTHER PROPERTIES OF THE MATHEMATICAL CONTENT
@ -141,7 +143,7 @@ abstract class MathRenderer {
* @return MathRenderer appropriate renderer for mode
*/
public static function getRenderer( $tex, $params = [], $mode = 'png' ) {
global $wgDefaultUserOptions, $wgMathEnableExperimentalInputFormats;
global $wgDefaultUserOptions, $wgMathEnableExperimentalInputFormats, $wgMathoidCli;
if ( isset( $params['forcemathmode'] ) ) {
$mode = $params['forcemathmode'];
@ -173,7 +175,11 @@ abstract class MathRenderer {
break;
case 'mathml':
default:
$renderer = new MathMathML( $tex, $params );
if ( $wgMathoidCli ) {
$renderer = new MathMathMLCli( $tex, $params );
} else {
$renderer = new MathMathML( $tex, $params );
}
}
LoggerFactory::getInstance( 'Math' )->debug( 'Start rendering $' . $renderer->tex .
'$ in mode ' . $mode );
@ -706,4 +712,12 @@ abstract class MathRenderer {
$this->lastError = $checker->getError();
return false;
}
/**
* @return string
*/
public function getPng() {
return $this->png;
}
}

View File

@ -42,7 +42,7 @@ class SpecialMathShowImage extends SpecialPage {
}
function execute( $par ) {
global $wgMathEnableExperimentalInputFormats;
global $wgMathEnableExperimentalInputFormats, $wgMathoidCli;
$request = $this->getRequest();
$hash = $request->getText( 'hash', '' );
$tex = $request->getText( 'tex', '' );
@ -51,7 +51,9 @@ class SpecialMathShowImage extends SpecialPage {
} else {
$asciimath = '';
}
$this->mode = MathHooks::mathModeToString( $request->getText( 'mode' ), 'mathml' );
$mode = $request->getText( 'mode' );
$this->mode = MathHooks::mathModeToString( $mode, 'mathml' );
if ( !in_array( $this->mode, MathRenderer::getValidModes() ) ) {
// Fallback to the default if an invalid mode was specified
$this->mode = 'mathml';
@ -61,22 +63,18 @@ class SpecialMathShowImage extends SpecialPage {
echo $this->printSvgError( 'No Inputhash specified' );
} else {
if ( $tex === '' && $asciimath === '' ) {
switch ( $this->mode ) {
case 'png':
$this->renderer = MathTexvc::newFromMd5( $hash );
break;
case 'latexml':
$this->renderer = MathLaTeXML::newFromMd5( $hash );
break;
default:
$this->renderer = MathMathML::newFromMd5( $hash );
if ( $wgMathoidCli && $this->mode === 'png' ) {
$this->renderer = MathRenderer::getRenderer( '', [], 'mathml' );
} else {
$this->renderer = MathRenderer::getRenderer( '', [], $this->mode );
}
$this->renderer->setMd5( $hash );
$this->noRender = $request->getBool( 'noRender', false );
$isInDatabase = $this->renderer->readFromDatabase();
if ( $isInDatabase || $this->noRender ) {
$success = $isInDatabase;
} else {
if ( $this->mode == 'png' ) {
if ( $this->mode == 'png' && !$wgMathoidCli ) {
// get the texvc input from the mathoid database table
// and render the conventional way
$mmlRenderer = MathMathML::newFromMd5( $hash );

View File

@ -16,5 +16,7 @@ CREATE TABLE /*_*/mathoid (
-- MW_MATHSTYLE_(INLINE_DISPLAYSTYLE|DISPLAY|INLINE)
math_style tinyint,
-- type of the Math input (TeX, MathML, AsciiMath...)
math_input_type tinyint
math_input_type tinyint,
-- png output of mathoid (for higher DPI settings this files might become larger than 64KiB)
math_png mediumblob
) /*$wgDBTableOptions*/;

View File

@ -16,5 +16,7 @@ CREATE TABLE /*_*/mathoid (
-- MW_MATHSTYLE_(INLINE_DISPLAYSTYLE|DISPLAY|INLINE)
math_style SMALLINT,
-- type of the Math input (TeX, MathML, AsciiMath...)
math_input_type SMALLINT
math_input_type SMALLINT,
-- png output of mathoid
math_png BYTEA
) /*$wgDBTableOptions*/;

View File

@ -16,5 +16,7 @@ CREATE TABLE /*_*/mathoid (
-- MW_MATHSTYLE_(INLINE_DISPLAYSTYLE|DISPLAY|INLINE)
math_style tinyint,
-- type of the Math input (TeX, MathML, AsciiMath...)
math_input_type tinyint
math_input_type tinyint,
-- png output of mathoid
math_png blob
) /*$wgDBTableOptions*/;

View File

@ -0,0 +1 @@
ALTER TABLE /*_*/mathoid ADD math_png mediumblob;

View File

@ -21,6 +21,7 @@
"MathTexvc": "MathTexvc.php",
"MathSource": "MathSource.php",
"MathMathML": "MathMathML.php",
"MathMathMLCli": "MathMathMLCli.php",
"MathLaTeXML": "MathLaTeXML.php",
"MathInputCheck": "MathInputCheck.php",
"MathInputCheckTexvc": "MathInputCheckTexvc.php",
@ -109,6 +110,7 @@
"MathConcurrentReqs": 50,
"MathPath": false,
"MathTexvcCheckExecutable": false,
"MathoidCli": false,
"MathValidModes": [
"png",
"source",

View File

@ -13,6 +13,7 @@
if ( !document.implementation.hasFeature( 'http://www.w3.org/TR/SVG11/feature#Image', '1.1' ) ) {
$( '.mwe-math-fallback-image-inline, .mwe-math-fallback-image-display' ).each( function () {
this.src = this.src.replace( 'media/math/render/svg/', 'media/math/render/png/' );
this.src = this.src.replace( 'mode=mathml', 'mode=mathml-png' );
} );
}
}( jQuery ) );

View File

@ -1,5 +1,6 @@
<?php
/**
* Test the MathML output format.
*
@ -30,6 +31,11 @@ class MathMathMLTest extends MediaWikiTestCase {
self::$timeout = $timeout;
}
protected function setUp() {
parent::setUp(); // TODO: Change the autogenerated stub
$this->setMwGlobals( 'wgMathoidCli', false );
}
/**
* Tests behavior of makeRequest() that communicates with the host.
* Testcase: Invalid request.
@ -61,7 +67,9 @@ class MathMathMLTest extends MediaWikiTestCase {
*/
public function testMakeRequestSuccess() {
self::setMockValues( true, true, false );
self::$content = 'test content';
$url = 'http://example.com/valid';
/** @var MathMathML */
$renderer = $this->getMockBuilder( 'MathMathML' )
->setMethods( null )
->disableOriginalConstructor()
@ -69,8 +77,8 @@ class MathMathMLTest extends MediaWikiTestCase {
$requestReturn = $renderer->makeRequest( $url, 'a+b', $res, $error,
'MathMLHttpRequestTester' );
$this->assertEquals( true, $requestReturn, "successful call return" );
$this->assertTrue( $res, 'successfull call' );
$this->assertEquals( $error, '', "successfull call errormessage" );
$this->assertEquals( 'test content', $res, 'successful call' );
$this->assertEquals( $error, '', "successful call error-message" );
}
/**

74
tests/MathoidCliTest.php Normal file
View File

@ -0,0 +1,74 @@
<?php
/**
* @covers MathInputCheckTexvc
*
* @group Math
*
* @licence GNU GPL v2+
*/
class MathoidCliTest extends MediaWikiTestCase {
private $goodInput = '\sin\left(\frac12x\right)';
private $badInput = '\newcommand{\text{do evil things}}';
protected static $hasMathoidCli;
public static function setUpBeforeClass() {
global $wgMathoidCli;
if ( is_array( $wgMathoidCli ) && is_executable( $wgMathoidCli[0] ) ) {
self::$hasMathoidCli = true;
}
}
/**
* Sets up the fixture, for example, opens a network connection.
* This method is called before a test is executed.
*/
protected function setUp() {
parent::setUp();
if ( !self::$hasMathoidCli ) {
$this->markTestSkipped( "No mathoid cli configured on server" );
}
}
public function testGood() {
$mml = new MathMathMLCli( $this->goodInput );
$input = [ 'good' => [ $mml ] ];
MathMathMLCli::batchEvaluate( $input );
$this->assertTrue( $mml->render(), 'assert that renders' );
$this->assertContains( '</mo>', $mml->getMathml() );
}
public function testUndefinedFunctionError() {
$mml = new MathMathMLCli( $this->badInput );
$input = [ 'bad' => [ $mml ] ];
MathMathMLCli::batchEvaluate( $input );
$this->assertFalse( $mml->render(), 'assert that fails' );
$this->assertContains( 'newcommand', $mml->getLastError() );
}
public function testSyntaxError() {
$mml = new MathMathMLCli( '^' );
$input = [ 'bad' => [ $mml ] ];
MathMathMLCli::batchEvaluate( $input );
$this->assertFalse( $mml->render(), 'assert that fails' );
$this->assertContains( 'SyntaxError', $mml->getLastError() );
}
public function testCeError() {
$mml = new MathMathMLCli( '\ce{H2O}' );
$input = [ 'bad' => [ $mml ] ];
MathMathMLCli::batchEvaluate( $input );
$this->assertFalse( $mml->render(), 'assert that fails' );
$this->assertContains( 'SyntaxError', $mml->getLastError() );
}
public function testEmpty() {
$mml = new MathMathMLCli( '' );
$input = [ 'bad' => [ $mml ] ];
MathMathMLCli::batchEvaluate( $input );
$this->assertFalse( $mml->render(), 'assert that renders' );
$this->assertFalse( $mml->isTexSecure() );
$this->assertContains( 'empty', $mml->getLastError() );
}
}

106
tests/wfTest.php Normal file
View File

@ -0,0 +1,106 @@
<?php
/**
* Created by PhpStorm.
* User: Moritz
* Date: 14.08.2017
* Time: 12:09
*/
require_once __DIR__ . '/../../../maintenance/Maintenance.php';
class DummyTest extends Maintenance {
const REFERENCE_PAGE = 'mediawikiwiki:Extension:Math/CoverageTest';
public function __construct() {
parent::__construct();
$this->requireExtension( 'Math' );
$this->mDescription = 'Test Mathoid CLI';
$this->addArg( 'page', "The page used for the testset generation.", false );
$this->addOption( 'offset', "If set the first n equations on the page are skipped", false,
true, "o" );
$this->addOption( 'length', "If set the only n equations were processed", false, true,
"l" );
$this->addOption( 'user', "User with rights to view the page", false, true, "u" );
}
private static function getMathTagsFromPage( $titleString ) {
global $wgEnableScaryTranscluding;
$title = Title::newFromText( $titleString );
if ( $title->exists() ) {
$article = new Article( $title );
$wikiText = $article->getPage()->getContent()->getNativeData();
} else {
if ( $title == self::REFERENCE_PAGE ) {
$wgEnableScaryTranscluding = true;
$parser = new Parser();
$wikiText = $parser->interwikiTransclude( $title, 'raw' );
} else {
return 'Page does not exist';
}
}
$wikiText = Sanitizer::removeHTMLcomments( $wikiText );
$wikiText = preg_replace( '#<nowiki>(.*)</nowiki>#', '', $wikiText );
$math = [];
Parser::extractTagsAndParams( [ 'math' ], $wikiText, $math );
return $math;
}
public function execute() {
echo "This test accesses the Mathoid CLI.\n";
$page = $this->getArg( 0, self::REFERENCE_PAGE );
$offset = $this->getOption( 'offset', 0 );
$length = $this->getOption( 'length', PHP_INT_MAX );
$userName = $this->getOption( 'user', 'Maintenance script' );
$wgUser = User::newFromName( $userName );
$allEquations = self::getMathTagsFromPage( $page );
if ( !is_array( $allEquations ) ) {
echo "Could not get equations from page '$page'\n";
echo $allEquations . PHP_EOL;
return;
} else {
echo 'got ' . count( $allEquations ) . " math tags. Start processing.";
}
$i = 0;
$rend = [];
foreach ( array_slice( $allEquations, $offset, $length, true ) as $input ) {
$output = MathRenderer::renderMath( $input[1], $input[2], 'mathml' );
$rend[] = [ MathRenderer::getRenderer( $input[1], $input[2], 'mathml' ), $input ];
$output = preg_replace( '#src="(.*?)/(([a-f]|\d)*).png"#', 'src="\2.png"', $output );
$parserTests[] = [ (string)$input[1], $output ];
$i ++;
echo '.';
}
echo "Generated $i tests\n";
MathMathMLCli::batchEvaluate( $rend );
$retval = null;
$stdout = "[
{
\"query\": {
\"q\": \"E=mc^{2}\"
}}]";
// $f = TempFSFile::factory( 'mathoid', 'json', wfTempDir() );
// $f->autocollect();
// $fhandle = fopen( $f->getPath(), 'w' );
// if ( $fhandle ) {
// fwrite( $fhandle, $stdout );
// fclose( $fhandle );
// }
// $contents =
// wfShellExec( '/tmp/mathoid/cli.js -c /tmp/mathoid/config.dev.yaml ' . $f->getPath(),
// $retval );
// $contents =
// wfShellExecMath( '/tmp/mathoid/cli.js -c /tmp/mathoid/config.dev.yaml ',
// $retval, [], [], [], $stdout );
// if ( $retval == 0 ) {
// $res = json_decode( $contents, true );
// echo "JSON result" . var_export( $res, false ) . "\n";
// }
}
}
$maintClass = 'DummyTest';
require_once RUN_MAINTENANCE_IF_MAIN;