00001 <?php
00002
00006 class Preprocessor_DOM implements Preprocessor {
00007 var $parser, $memoryLimit;
00008
00009 const CACHE_VERSION = 1;
00010
00011 function __construct( $parser ) {
00012 $this->parser = $parser;
00013 $mem = ini_get( 'memory_limit' );
00014 $this->memoryLimit = false;
00015 if ( strval( $mem ) !== '' && $mem != -1 ) {
00016 if ( preg_match( '/^\d+$/', $mem ) ) {
00017 $this->memoryLimit = $mem;
00018 } elseif ( preg_match( '/^(\d+)M$/i', $mem, $m ) ) {
00019 $this->memoryLimit = $m[1] * 1048576;
00020 }
00021 }
00022 }
00023
00024 function newFrame() {
00025 return new PPFrame_DOM( $this );
00026 }
00027
00028 function newCustomFrame( $args ) {
00029 return new PPCustomFrame_DOM( $this, $args );
00030 }
00031
00032 function memCheck() {
00033 if ( $this->memoryLimit === false ) {
00034 return;
00035 }
00036 $usage = memory_get_usage();
00037 if ( $usage > $this->memoryLimit * 0.9 ) {
00038 $limit = intval( $this->memoryLimit * 0.9 / 1048576 + 0.5 );
00039 throw new MWException( "Preprocessor hit 90% memory limit ($limit MB)" );
00040 }
00041 return $usage <= $this->memoryLimit * 0.8;
00042 }
00043
00066 function preprocessToObj( $text, $flags = 0 ) {
00067 wfProfileIn( __METHOD__ );
00068 global $wgMemc, $wgPreprocessorCacheThreshold;
00069
00070 $xml = false;
00071 $cacheable = strlen( $text ) > $wgPreprocessorCacheThreshold;
00072 if ( $cacheable ) {
00073 wfProfileIn( __METHOD__.'-cacheable' );
00074
00075 $cacheKey = wfMemcKey( 'preprocess-xml', md5($text), $flags );
00076 $cacheValue = $wgMemc->get( $cacheKey );
00077 if ( $cacheValue ) {
00078 $version = substr( $cacheValue, 0, 8 );
00079 if ( intval( $version ) == self::CACHE_VERSION ) {
00080 $xml = substr( $cacheValue, 8 );
00081
00082 wfDebugLog( "Preprocessor", "Loaded preprocessor XML from memcached (key $cacheKey)" );
00083 }
00084 }
00085 }
00086 if ( $xml === false ) {
00087 if ( $cacheable ) {
00088 wfProfileIn( __METHOD__.'-cache-miss' );
00089 $xml = $this->preprocessToXml( $text, $flags );
00090 $cacheValue = sprintf( "%08d", self::CACHE_VERSION ) . $xml;
00091 $wgMemc->set( $cacheKey, $cacheValue, 86400 );
00092 wfProfileOut( __METHOD__.'-cache-miss' );
00093 wfDebugLog( "Preprocessor", "Saved preprocessor XML to memcached (key $cacheKey)" );
00094 } else {
00095 $xml = $this->preprocessToXml( $text, $flags );
00096 }
00097
00098 }
00099 wfProfileIn( __METHOD__.'-loadXML' );
00100 $dom = new DOMDocument;
00101 wfSuppressWarnings();
00102 $result = $dom->loadXML( $xml );
00103 wfRestoreWarnings();
00104 if ( !$result ) {
00105
00106 $xml = UtfNormal::cleanUp( $xml );
00107 $result = $dom->loadXML( $xml );
00108 if ( !$result ) {
00109 throw new MWException( __METHOD__.' generated invalid XML' );
00110 }
00111 }
00112 $obj = new PPNode_DOM( $dom->documentElement );
00113 wfProfileOut( __METHOD__.'-loadXML' );
00114 if ( $cacheable ) {
00115 wfProfileOut( __METHOD__.'-cacheable' );
00116 }
00117 wfProfileOut( __METHOD__ );
00118 return $obj;
00119 }
00120
00121 function preprocessToXml( $text, $flags = 0 ) {
00122 wfProfileIn( __METHOD__ );
00123 $rules = array(
00124 '{' => array(
00125 'end' => '}',
00126 'names' => array(
00127 2 => 'template',
00128 3 => 'tplarg',
00129 ),
00130 'min' => 2,
00131 'max' => 3,
00132 ),
00133 '[' => array(
00134 'end' => ']',
00135 'names' => array( 2 => null ),
00136 'min' => 2,
00137 'max' => 2,
00138 )
00139 );
00140
00141 $forInclusion = $flags & Parser::PTD_FOR_INCLUSION;
00142
00143 $xmlishElements = $this->parser->getStripList();
00144 $enableOnlyinclude = false;
00145 if ( $forInclusion ) {
00146 $ignoredTags = array( 'includeonly', '/includeonly' );
00147 $ignoredElements = array( 'noinclude' );
00148 $xmlishElements[] = 'noinclude';
00149 if ( strpos( $text, '<onlyinclude>' ) !== false && strpos( $text, '</onlyinclude>' ) !== false ) {
00150 $enableOnlyinclude = true;
00151 }
00152 } else {
00153 $ignoredTags = array( 'noinclude', '/noinclude', 'onlyinclude', '/onlyinclude' );
00154 $ignoredElements = array( 'includeonly' );
00155 $xmlishElements[] = 'includeonly';
00156 }
00157 $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );
00158
00159
00160 $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA";
00161
00162 $stack = new PPDStack;
00163
00164 $searchBase = "[{<\n"; #}
00165 $revText = strrev( $text );
00166
00167 $i = 0; # Input pointer, starts out pointing to a pseudo-newline before the start
00168 $accum =& $stack->getAccum(); # Current accumulator
00169 $accum = '<root>';
00170 $findEquals = false; # True to find equals signs in arguments
00171 $findPipe = false; # True to take notice of pipe characters
00172 $headingIndex = 1;
00173 $inHeading = false; # True if $i is inside a possible heading
00174 $noMoreGT = false; # True if there are no more greater-than (>) signs right of $i
00175 $findOnlyinclude = $enableOnlyinclude; # True to ignore all input up to the next <onlyinclude>
00176 $fakeLineStart = true; # Do a line-start run without outputting an LF character
00177
00178 while ( true ) {
00179
00180
00181 if ( $findOnlyinclude ) {
00182
00183 $startPos = strpos( $text, '<onlyinclude>', $i );
00184 if ( $startPos === false ) {
00185
00186 $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i ) ) . '</ignore>';
00187 break;
00188 }
00189 $tagEndPos = $startPos + strlen( '<onlyinclude>' );
00190 $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i, $tagEndPos - $i ) ) . '</ignore>';
00191 $i = $tagEndPos;
00192 $findOnlyinclude = false;
00193 }
00194
00195 if ( $fakeLineStart ) {
00196 $found = 'line-start';
00197 $curChar = '';
00198 } else {
00199 # Find next opening brace, closing brace or pipe
00200 $search = $searchBase;
00201 if ( $stack->top === false ) {
00202 $currentClosing = '';
00203 } else {
00204 $currentClosing = $stack->top->close;
00205 $search .= $currentClosing;
00206 }
00207 if ( $findPipe ) {
00208 $search .= '|';
00209 }
00210 if ( $findEquals ) {
00211
00212 $search .= '=';
00213 }
00214 $rule = null;
00215 # Output literal section, advance input counter
00216 $literalLength = strcspn( $text, $search, $i );
00217 if ( $literalLength > 0 ) {
00218 $accum .= htmlspecialchars( substr( $text, $i, $literalLength ) );
00219 $i += $literalLength;
00220 }
00221 if ( $i >= strlen( $text ) ) {
00222 if ( $currentClosing == "\n" ) {
00223
00224 $curChar = '';
00225 $found = 'line-end';
00226 } else {
00227 # All done
00228 break;
00229 }
00230 } else {
00231 $curChar = $text[$i];
00232 if ( $curChar == '|' ) {
00233 $found = 'pipe';
00234 } elseif ( $curChar == '=' ) {
00235 $found = 'equals';
00236 } elseif ( $curChar == '<' ) {
00237 $found = 'angle';
00238 } elseif ( $curChar == "\n" ) {
00239 if ( $inHeading ) {
00240 $found = 'line-end';
00241 } else {
00242 $found = 'line-start';
00243 }
00244 } elseif ( $curChar == $currentClosing ) {
00245 $found = 'close';
00246 } elseif ( isset( $rules[$curChar] ) ) {
00247 $found = 'open';
00248 $rule = $rules[$curChar];
00249 } else {
00250 # Some versions of PHP have a strcspn which stops on null characters
00251 # Ignore and continue
00252 ++$i;
00253 continue;
00254 }
00255 }
00256 }
00257
00258 if ( $found == 'angle' ) {
00259 $matches = false;
00260
00261 if ( $enableOnlyinclude && substr( $text, $i, strlen( '</onlyinclude>' ) ) == '</onlyinclude>' ) {
00262 $findOnlyinclude = true;
00263 continue;
00264 }
00265
00266
00267 if ( !preg_match( $elementsRegex, $text, $matches, 0, $i + 1 ) ) {
00268
00269 $accum .= '<';
00270 ++$i;
00271 continue;
00272 }
00273
00274 if ( isset( $matches[2] ) && $matches[2] == '!--' ) {
00275
00276
00277
00278
00279
00280 $endPos = strpos( $text, '-->', $i + 4 );
00281 if ( $endPos === false ) {
00282
00283 $inner = substr( $text, $i );
00284 $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>';
00285 $i = strlen( $text );
00286 } else {
00287
00288 $wsStart = $i ? ( $i - strspn( $revText, ' ', strlen( $text ) - $i ) ) : 0;
00289
00290
00291 $wsEnd = $endPos + 2 + strspn( $text, ' ', $endPos + 3 );
00292
00293
00294
00295
00296 if ( $wsStart > 0 && substr( $text, $wsStart - 1, 1 ) == "\n"
00297 && substr( $text, $wsEnd + 1, 1 ) == "\n" )
00298 {
00299 $startPos = $wsStart;
00300 $endPos = $wsEnd + 1;
00301
00302
00303 $wsLength = $i - $wsStart;
00304 if ( $wsLength > 0 && substr( $accum, -$wsLength ) === str_repeat( ' ', $wsLength ) ) {
00305 $accum = substr( $accum, 0, -$wsLength );
00306 }
00307
00308 $fakeLineStart = true;
00309 } else {
00310
00311 $startPos = $i;
00312 $endPos += 2;
00313 }
00314
00315 if ( $stack->top ) {
00316 $part = $stack->top->getCurrentPart();
00317 if ( isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 ) {
00318
00319 $part->commentEnd = $wsEnd;
00320 } else {
00321 $part->visualEnd = $wsStart;
00322 $part->commentEnd = $endPos;
00323 }
00324 }
00325 $i = $endPos + 1;
00326 $inner = substr( $text, $startPos, $endPos - $startPos + 1 );
00327 $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>';
00328 }
00329 continue;
00330 }
00331 $name = $matches[1];
00332 $lowerName = strtolower( $name );
00333 $attrStart = $i + strlen( $name ) + 1;
00334
00335
00336 $tagEndPos = $noMoreGT ? false : strpos( $text, '>', $attrStart );
00337 if ( $tagEndPos === false ) {
00338
00339
00340 $noMoreGT = true;
00341 $accum .= '<';
00342 ++$i;
00343 continue;
00344 }
00345
00346
00347 if ( in_array( $lowerName, $ignoredTags ) ) {
00348 $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i, $tagEndPos - $i + 1 ) ) . '</ignore>';
00349 $i = $tagEndPos + 1;
00350 continue;
00351 }
00352
00353 $tagStartPos = $i;
00354 if ( $text[$tagEndPos-1] == '/' ) {
00355 $attrEnd = $tagEndPos - 1;
00356 $inner = null;
00357 $i = $tagEndPos + 1;
00358 $close = '';
00359 } else {
00360 $attrEnd = $tagEndPos;
00361
00362 if ( preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",
00363 $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 ) )
00364 {
00365 $inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 );
00366 $i = $matches[0][1] + strlen( $matches[0][0] );
00367 $close = '<close>' . htmlspecialchars( $matches[0][0] ) . '</close>';
00368 } else {
00369
00370 $inner = substr( $text, $tagEndPos + 1 );
00371 $i = strlen( $text );
00372 $close = '';
00373 }
00374 }
00375
00376 if ( in_array( $lowerName, $ignoredElements ) ) {
00377 $accum .= '<ignore>' . htmlspecialchars( substr( $text, $tagStartPos, $i - $tagStartPos ) )
00378 . '</ignore>';
00379 continue;
00380 }
00381
00382 $accum .= '<ext>';
00383 if ( $attrEnd <= $attrStart ) {
00384 $attr = '';
00385 } else {
00386 $attr = substr( $text, $attrStart, $attrEnd - $attrStart );
00387 }
00388 $accum .= '<name>' . htmlspecialchars( $name ) . '</name>' .
00389
00390
00391 '<attr>' . htmlspecialchars( $attr ) . '</attr>';
00392 if ( $inner !== null ) {
00393 $accum .= '<inner>' . htmlspecialchars( $inner ) . '</inner>';
00394 }
00395 $accum .= $close . '</ext>';
00396 }
00397
00398 elseif ( $found == 'line-start' ) {
00399
00400
00401 if ( $fakeLineStart ) {
00402 $fakeLineStart = false;
00403 } else {
00404 $accum .= $curChar;
00405 $i++;
00406 }
00407
00408 $count = strspn( $text, '=', $i, 6 );
00409 if ( $count == 1 && $findEquals ) {
00410
00411
00412
00413 } elseif ( $count > 0 ) {
00414 $piece = array(
00415 'open' => "\n",
00416 'close' => "\n",
00417 'parts' => array( new PPDPart( str_repeat( '=', $count ) ) ),
00418 'startPos' => $i,
00419 'count' => $count );
00420 $stack->push( $piece );
00421 $accum =& $stack->getAccum();
00422 $flags = $stack->getFlags();
00423 extract( $flags );
00424 $i += $count;
00425 }
00426 }
00427
00428 elseif ( $found == 'line-end' ) {
00429 $piece = $stack->top;
00430
00431 assert( $piece->open == "\n" );
00432 $part = $piece->getCurrentPart();
00433
00434
00435 $wsLength = strspn( $revText, " \t", strlen( $text ) - $i );
00436 $searchStart = $i - $wsLength;
00437 if ( isset( $part->commentEnd ) && $searchStart - 1 == $part->commentEnd ) {
00438
00439
00440 $searchStart = $part->visualEnd;
00441 $searchStart -= strspn( $revText, " \t", strlen( $text ) - $searchStart );
00442 }
00443 $count = $piece->count;
00444 $equalsLength = strspn( $revText, '=', strlen( $text ) - $searchStart );
00445 if ( $equalsLength > 0 ) {
00446 if ( $i - $equalsLength == $piece->startPos ) {
00447
00448
00449
00450 $count = $equalsLength;
00451 if ( $count < 3 ) {
00452 $count = 0;
00453 } else {
00454 $count = min( 6, intval( ( $count - 1 ) / 2 ) );
00455 }
00456 } else {
00457 $count = min( $equalsLength, $count );
00458 }
00459 if ( $count > 0 ) {
00460
00461 $element = "<h level=\"$count\" i=\"$headingIndex\">$accum</h>";
00462 $headingIndex++;
00463 } else {
00464
00465 $element = $accum;
00466 }
00467 } else {
00468
00469 $element = $accum;
00470 }
00471
00472 $stack->pop();
00473 $accum =& $stack->getAccum();
00474 $flags = $stack->getFlags();
00475 extract( $flags );
00476
00477
00478 $accum .= $element;
00479
00480
00481
00482
00483
00484 }
00485
00486 elseif ( $found == 'open' ) {
00487 # count opening brace characters
00488 $count = strspn( $text, $curChar, $i );
00489
00490 # we need to add to stack only if opening brace count is enough for one of the rules
00491 if ( $count >= $rule['min'] ) {
00492 # Add it to the stack
00493 $piece = array(
00494 'open' => $curChar,
00495 'close' => $rule['end'],
00496 'count' => $count,
00497 'lineStart' => ($i > 0 && $text[$i-1] == "\n"),
00498 );
00499
00500 $stack->push( $piece );
00501 $accum =& $stack->getAccum();
00502 $flags = $stack->getFlags();
00503 extract( $flags );
00504 } else {
00505 # Add literal brace(s)
00506 $accum .= htmlspecialchars( str_repeat( $curChar, $count ) );
00507 }
00508 $i += $count;
00509 }
00510
00511 elseif ( $found == 'close' ) {
00512 $piece = $stack->top;
00513 # lets check if there are enough characters for closing brace
00514 $maxCount = $piece->count;
00515 $count = strspn( $text, $curChar, $i, $maxCount );
00516
00517 # check for maximum matching characters (if there are 5 closing
00518 # characters, we will probably need only 3 - depending on the rules)
00519 $matchingCount = 0;
00520 $rule = $rules[$piece->open];
00521 if ( $count > $rule['max'] ) {
00522 # The specified maximum exists in the callback array, unless the caller
00523 # has made an error
00524 $matchingCount = $rule['max'];
00525 } else {
00526 # Count is less than the maximum
00527 # Skip any gaps in the callback array to find the true largest match
00528 # Need to use array_key_exists not isset because the callback can be null
00529 $matchingCount = $count;
00530 while ( $matchingCount > 0 && !array_key_exists( $matchingCount, $rule['names'] ) ) {
00531 --$matchingCount;
00532 }
00533 }
00534
00535 if ($matchingCount <= 0) {
00536 # No matching element found in callback array
00537 # Output a literal closing brace and continue
00538 $accum .= htmlspecialchars( str_repeat( $curChar, $count ) );
00539 $i += $count;
00540 continue;
00541 }
00542 $name = $rule['names'][$matchingCount];
00543 if ( $name === null ) {
00544
00545 $element = $piece->breakSyntax( $matchingCount ) . str_repeat( $rule['end'], $matchingCount );
00546 } else {
00547 # Create XML element
00548 # Note: $parts is already XML, does not need to be encoded further
00549 $parts = $piece->parts;
00550 $title = $parts[0]->out;
00551 unset( $parts[0] );
00552
00553 # The invocation is at the start of the line if lineStart is set in
00554 # the stack, and all opening brackets are used up.
00555 if ( $maxCount == $matchingCount && !empty( $piece->lineStart ) ) {
00556 $attr = ' lineStart="1"';
00557 } else {
00558 $attr = '';
00559 }
00560
00561 $element = "<$name$attr>";
00562 $element .= "<title>$title</title>";
00563 $argIndex = 1;
00564 foreach ( $parts as $partIndex => $part ) {
00565 if ( isset( $part->eqpos ) ) {
00566 $argName = substr( $part->out, 0, $part->eqpos );
00567 $argValue = substr( $part->out, $part->eqpos + 1 );
00568 $element .= "<part><name>$argName</name>=<value>$argValue</value></part>";
00569 } else {
00570 $element .= "<part><name index=\"$argIndex\" /><value>{$part->out}</value></part>";
00571 $argIndex++;
00572 }
00573 }
00574 $element .= "</$name>";
00575 }
00576
00577 # Advance input pointer
00578 $i += $matchingCount;
00579
00580 # Unwind the stack
00581 $stack->pop();
00582 $accum =& $stack->getAccum();
00583
00584 # Re-add the old stack element if it still has unmatched opening characters remaining
00585 if ($matchingCount < $piece->count) {
00586 $piece->parts = array( new PPDPart );
00587 $piece->count -= $matchingCount;
00588 # do we still qualify for any callback with remaining count?
00589 $names = $rules[$piece->open]['names'];
00590 $skippedBraces = 0;
00591 $enclosingAccum =& $accum;
00592 while ( $piece->count ) {
00593 if ( array_key_exists( $piece->count, $names ) ) {
00594 $stack->push( $piece );
00595 $accum =& $stack->getAccum();
00596 break;
00597 }
00598 --$piece->count;
00599 $skippedBraces ++;
00600 }
00601 $enclosingAccum .= str_repeat( $piece->open, $skippedBraces );
00602 }
00603 $flags = $stack->getFlags();
00604 extract( $flags );
00605
00606 # Add XML element to the enclosing accumulator
00607 $accum .= $element;
00608 }
00609
00610 elseif ( $found == 'pipe' ) {
00611 $findEquals = true;
00612 $stack->addPart();
00613 $accum =& $stack->getAccum();
00614 ++$i;
00615 }
00616
00617 elseif ( $found == 'equals' ) {
00618 $findEquals = false;
00619 $stack->getCurrentPart()->eqpos = strlen( $accum );
00620 $accum .= '=';
00621 ++$i;
00622 }
00623 }
00624
00625 # Output any remaining unclosed brackets
00626 foreach ( $stack->stack as $piece ) {
00627 $stack->rootAccum .= $piece->breakSyntax();
00628 }
00629 $stack->rootAccum .= '</root>';
00630 $xml = $stack->rootAccum;
00631
00632 wfProfileOut( __METHOD__ );
00633
00634 return $xml;
00635 }
00636 }
00637
00642 class PPDStack {
00643 var $stack, $rootAccum, $top;
00644 var $out;
00645 var $elementClass = 'PPDStackElement';
00646
00647 static $false = false;
00648
00649 function __construct() {
00650 $this->stack = array();
00651 $this->top = false;
00652 $this->rootAccum = '';
00653 $this->accum =& $this->rootAccum;
00654 }
00655
00656 function count() {
00657 return count( $this->stack );
00658 }
00659
00660 function &getAccum() {
00661 return $this->accum;
00662 }
00663
00664 function getCurrentPart() {
00665 if ( $this->top === false ) {
00666 return false;
00667 } else {
00668 return $this->top->getCurrentPart();
00669 }
00670 }
00671
00672 function push( $data ) {
00673 if ( $data instanceof $this->elementClass ) {
00674 $this->stack[] = $data;
00675 } else {
00676 $class = $this->elementClass;
00677 $this->stack[] = new $class( $data );
00678 }
00679 $this->top = $this->stack[ count( $this->stack ) - 1 ];
00680 $this->accum =& $this->top->getAccum();
00681 }
00682
00683 function pop() {
00684 if ( !count( $this->stack ) ) {
00685 throw new MWException( __METHOD__.': no elements remaining' );
00686 }
00687 $temp = array_pop( $this->stack );
00688
00689 if ( count( $this->stack ) ) {
00690 $this->top = $this->stack[ count( $this->stack ) - 1 ];
00691 $this->accum =& $this->top->getAccum();
00692 } else {
00693 $this->top = self::$false;
00694 $this->accum =& $this->rootAccum;
00695 }
00696 return $temp;
00697 }
00698
00699 function addPart( $s = '' ) {
00700 $this->top->addPart( $s );
00701 $this->accum =& $this->top->getAccum();
00702 }
00703
00704 function getFlags() {
00705 if ( !count( $this->stack ) ) {
00706 return array(
00707 'findEquals' => false,
00708 'findPipe' => false,
00709 'inHeading' => false,
00710 );
00711 } else {
00712 return $this->top->getFlags();
00713 }
00714 }
00715 }
00716
00720 class PPDStackElement {
00721 var $open,
00722 $close,
00723 $count,
00724 $parts,
00725 $lineStart;
00726
00727 var $partClass = 'PPDPart';
00728
00729 function __construct( $data = array() ) {
00730 $class = $this->partClass;
00731 $this->parts = array( new $class );
00732
00733 foreach ( $data as $name => $value ) {
00734 $this->$name = $value;
00735 }
00736 }
00737
00738 function &getAccum() {
00739 return $this->parts[count($this->parts) - 1]->out;
00740 }
00741
00742 function addPart( $s = '' ) {
00743 $class = $this->partClass;
00744 $this->parts[] = new $class( $s );
00745 }
00746
00747 function getCurrentPart() {
00748 return $this->parts[count($this->parts) - 1];
00749 }
00750
00751 function getFlags() {
00752 $partCount = count( $this->parts );
00753 $findPipe = $this->open != "\n" && $this->open != '[';
00754 return array(
00755 'findPipe' => $findPipe,
00756 'findEquals' => $findPipe && $partCount > 1 && !isset( $this->parts[$partCount - 1]->eqpos ),
00757 'inHeading' => $this->open == "\n",
00758 );
00759 }
00760
00764 function breakSyntax( $openingCount = false ) {
00765 if ( $this->open == "\n" ) {
00766 $s = $this->parts[0]->out;
00767 } else {
00768 if ( $openingCount === false ) {
00769 $openingCount = $this->count;
00770 }
00771 $s = str_repeat( $this->open, $openingCount );
00772 $first = true;
00773 foreach ( $this->parts as $part ) {
00774 if ( $first ) {
00775 $first = false;
00776 } else {
00777 $s .= '|';
00778 }
00779 $s .= $part->out;
00780 }
00781 }
00782 return $s;
00783 }
00784 }
00785
00789 class PPDPart {
00790 var $out;
00791
00792
00793
00794
00795
00796
00797 function __construct( $out = '' ) {
00798 $this->out = $out;
00799 }
00800 }
00801
00806 class PPFrame_DOM implements PPFrame {
00807 var $preprocessor, $parser, $title;
00808 var $titleCache;
00809
00814 var $loopCheckHash;
00815
00820 var $depth;
00821
00822
00827 function __construct( $preprocessor ) {
00828 $this->preprocessor = $preprocessor;
00829 $this->parser = $preprocessor->parser;
00830 $this->title = $this->parser->mTitle;
00831 $this->titleCache = array( $this->title ? $this->title->getPrefixedDBkey() : false );
00832 $this->loopCheckHash = array();
00833 $this->depth = 0;
00834 }
00835
00840 function newChild( $args = false, $title = false ) {
00841 $namedArgs = array();
00842 $numberedArgs = array();
00843 if ( $title === false ) {
00844 $title = $this->title;
00845 }
00846 if ( $args !== false ) {
00847 $xpath = false;
00848 if ( $args instanceof PPNode ) {
00849 $args = $args->node;
00850 }
00851 foreach ( $args as $arg ) {
00852 if ( !$xpath ) {
00853 $xpath = new DOMXPath( $arg->ownerDocument );
00854 }
00855
00856 $nameNodes = $xpath->query( 'name', $arg );
00857 $value = $xpath->query( 'value', $arg );
00858 if ( $nameNodes->item( 0 )->hasAttributes() ) {
00859
00860 $index = $nameNodes->item( 0 )->attributes->getNamedItem( 'index' )->textContent;
00861 $numberedArgs[$index] = $value->item( 0 );
00862 unset( $namedArgs[$index] );
00863 } else {
00864
00865 $name = trim( $this->expand( $nameNodes->item( 0 ), PPFrame::STRIP_COMMENTS ) );
00866 $namedArgs[$name] = $value->item( 0 );
00867 unset( $numberedArgs[$name] );
00868 }
00869 }
00870 }
00871 return new PPTemplateFrame_DOM( $this->preprocessor, $this, $numberedArgs, $namedArgs, $title );
00872 }
00873
00874 function expand( $root, $flags = 0 ) {
00875 static $expansionDepth = 0;
00876 if ( is_string( $root ) ) {
00877 return $root;
00878 }
00879
00880 if ( ++$this->parser->mPPNodeCount > $this->parser->mOptions->mMaxPPNodeCount )
00881 {
00882 return '<span class="error">Node-count limit exceeded</span>';
00883 }
00884
00885 if ( $expansionDepth > $this->parser->mOptions->mMaxPPExpandDepth ) {
00886 return '<span class="error">Expansion depth limit exceeded</span>';
00887 }
00888 wfProfileIn( __METHOD__ );
00889 ++$expansionDepth;
00890
00891 if ( $root instanceof PPNode_DOM ) {
00892 $root = $root->node;
00893 }
00894 if ( $root instanceof DOMDocument ) {
00895 $root = $root->documentElement;
00896 }
00897
00898 $outStack = array( '', '' );
00899 $iteratorStack = array( false, $root );
00900 $indexStack = array( 0, 0 );
00901
00902 while ( count( $iteratorStack ) > 1 ) {
00903 $level = count( $outStack ) - 1;
00904 $iteratorNode =& $iteratorStack[ $level ];
00905 $out =& $outStack[$level];
00906 $index =& $indexStack[$level];
00907
00908 if ( $iteratorNode instanceof PPNode_DOM ) $iteratorNode = $iteratorNode->node;
00909
00910 if ( is_array( $iteratorNode ) ) {
00911 if ( $index >= count( $iteratorNode ) ) {
00912
00913 $iteratorStack[$level] = false;
00914 $contextNode = false;
00915 } else {
00916 $contextNode = $iteratorNode[$index];
00917 $index++;
00918 }
00919 } elseif ( $iteratorNode instanceof DOMNodeList ) {
00920 if ( $index >= $iteratorNode->length ) {
00921
00922 $iteratorStack[$level] = false;
00923 $contextNode = false;
00924 } else {
00925 $contextNode = $iteratorNode->item( $index );
00926 $index++;
00927 }
00928 } else {
00929
00930
00931 $contextNode = $iteratorStack[$level];
00932 $iteratorStack[$level] = false;
00933 }
00934
00935 if ( $contextNode instanceof PPNode_DOM ) $contextNode = $contextNode->node;
00936
00937 $newIterator = false;
00938
00939 if ( $contextNode === false ) {
00940
00941 } elseif ( is_string( $contextNode ) ) {
00942 $out .= $contextNode;
00943 } elseif ( is_array( $contextNode ) || $contextNode instanceof DOMNodeList ) {
00944 $newIterator = $contextNode;
00945 } elseif ( $contextNode instanceof DOMNode ) {
00946 if ( $contextNode->nodeType == XML_TEXT_NODE ) {
00947 $out .= $contextNode->nodeValue;
00948 } elseif ( $contextNode->nodeName == 'template' ) {
00949 # Double-brace expansion
00950 $xpath = new DOMXPath( $contextNode->ownerDocument );
00951 $titles = $xpath->query( 'title', $contextNode );
00952 $title = $titles->item( 0 );
00953 $parts = $xpath->query( 'part', $contextNode );
00954 if ( $flags & self::NO_TEMPLATES ) {
00955 $newIterator = $this->virtualBracketedImplode( '{{', '|', '}}', $title, $parts );
00956 } else {
00957 $lineStart = $contextNode->getAttribute( 'lineStart' );
00958 $params = array(
00959 'title' => new PPNode_DOM( $title ),
00960 'parts' => new PPNode_DOM( $parts ),
00961 'lineStart' => $lineStart );
00962 $ret = $this->parser->braceSubstitution( $params, $this );
00963 if ( isset( $ret['object'] ) ) {
00964 $newIterator = $ret['object'];
00965 } else {
00966 $out .= $ret['text'];
00967 }
00968 }
00969 } elseif ( $contextNode->nodeName == 'tplarg' ) {
00970 # Triple-brace expansion
00971 $xpath = new DOMXPath( $contextNode->ownerDocument );
00972 $titles = $xpath->query( 'title', $contextNode );
00973 $title = $titles->item( 0 );
00974 $parts = $xpath->query( 'part', $contextNode );
00975 if ( $flags & self::NO_ARGS ) {
00976 $newIterator = $this->virtualBracketedImplode( '{{{', '|', '}}}', $title, $parts );
00977 } else {
00978 $params = array(
00979 'title' => new PPNode_DOM( $title ),
00980 'parts' => new PPNode_DOM( $parts ) );
00981 $ret = $this->parser->argSubstitution( $params, $this );
00982 if ( isset( $ret['object'] ) ) {
00983 $newIterator = $ret['object'];
00984 } else {
00985 $out .= $ret['text'];
00986 }
00987 }
00988 } elseif ( $contextNode->nodeName == 'comment' ) {
00989 # HTML-style comment
00990 # Remove it in HTML, pre+remove and STRIP_COMMENTS modes
00991 if ( $this->parser->ot['html']
00992 || ( $this->parser->ot['pre'] && $this->parser->mOptions->getRemoveComments() )
00993 || ( $flags & self::STRIP_COMMENTS ) )
00994 {
00995 $out .= '';
00996 }
00997 # Add a strip marker in PST mode so that pstPass2() can run some old-fashioned regexes on the result
00998 # Not in RECOVER_COMMENTS mode (extractSections) though
00999 elseif ( $this->parser->ot['wiki'] && ! ( $flags & self::RECOVER_COMMENTS ) ) {
01000 $out .= $this->parser->insertStripItem( $contextNode->textContent );
01001 }
01002 # Recover the literal comment in RECOVER_COMMENTS and pre+no-remove
01003 else {
01004 $out .= $contextNode->textContent;
01005 }
01006 } elseif ( $contextNode->nodeName == 'ignore' ) {
01007 # Output suppression used by <includeonly> etc.
01008 # OT_WIKI will only respect <ignore> in substed templates.
01009 # The other output types respect it unless NO_IGNORE is set.
01010 # extractSections() sets NO_IGNORE and so never respects it.
01011 if ( ( !isset( $this->parent ) && $this->parser->ot['wiki'] ) || ( $flags & self::NO_IGNORE ) ) {
01012 $out .= $contextNode->textContent;
01013 } else {
01014 $out .= '';
01015 }
01016 } elseif ( $contextNode->nodeName == 'ext' ) {
01017 # Extension tag
01018 $xpath = new DOMXPath( $contextNode->ownerDocument );
01019 $names = $xpath->query( 'name', $contextNode );
01020 $attrs = $xpath->query( 'attr', $contextNode );
01021 $inners = $xpath->query( 'inner', $contextNode );
01022 $closes = $xpath->query( 'close', $contextNode );
01023 $params = array(
01024 'name' => new PPNode_DOM( $names->item( 0 ) ),
01025 'attr' => $attrs->length > 0 ? new PPNode_DOM( $attrs->item( 0 ) ) : null,
01026 'inner' => $inners->length > 0 ? new PPNode_DOM( $inners->item( 0 ) ) : null,
01027 'close' => $closes->length > 0 ? new PPNode_DOM( $closes->item( 0 ) ) : null,
01028 );
01029 $out .= $this->parser->extensionSubstitution( $params, $this );
01030 } elseif ( $contextNode->nodeName == 'h' ) {
01031 # Heading
01032 $s = $this->expand( $contextNode->childNodes, $flags );
01033
01034 # Insert a heading marker only for <h> children of <root>
01035 # This is to stop extractSections from going over multiple tree levels
01036 if ( $contextNode->parentNode->nodeName == 'root'
01037 && $this->parser->ot['html'] )
01038 {
01039 # Insert heading index marker
01040 $headingIndex = $contextNode->getAttribute( 'i' );
01041 $titleText = $this->title->getPrefixedDBkey();
01042 $this->parser->mHeadings[] = array( $titleText, $headingIndex );
01043 $serial = count( $this->parser->mHeadings ) - 1;
01044 $marker = "{$this->parser->mUniqPrefix}-h-$serial-" . Parser::MARKER_SUFFIX;
01045 $count = $contextNode->getAttribute( 'level' );
01046 $s = substr( $s, 0, $count ) . $marker . substr( $s, $count );
01047 $this->parser->mStripState->general->setPair( $marker, '' );
01048 }
01049 $out .= $s;
01050 } else {
01051 # Generic recursive expansion
01052 $newIterator = $contextNode->childNodes;
01053 }
01054 } else {
01055 wfProfileOut( __METHOD__ );
01056 throw new MWException( __METHOD__.': Invalid parameter type' );
01057 }
01058
01059 if ( $newIterator !== false ) {
01060 if ( $newIterator instanceof PPNode_DOM ) {
01061 $newIterator = $newIterator->node;
01062 }
01063 $outStack[] = '';
01064 $iteratorStack[] = $newIterator;
01065 $indexStack[] = 0;
01066 } elseif ( $iteratorStack[$level] === false ) {
01067
01068
01069 while ( $iteratorStack[$level] === false && $level > 0 ) {
01070 $outStack[$level - 1] .= $out;
01071 array_pop( $outStack );
01072 array_pop( $iteratorStack );
01073 array_pop( $indexStack );
01074 $level--;
01075 }
01076 }
01077 }
01078 --$expansionDepth;
01079 wfProfileOut( __METHOD__ );
01080 return $outStack[0];
01081 }
01082
01083 function implodeWithFlags( $sep, $flags ) {
01084 $args = array_slice( func_get_args(), 2 );
01085
01086 $first = true;
01087 $s = '';
01088 foreach ( $args as $root ) {
01089 if ( $root instanceof PPNode_DOM ) $root = $root->node;
01090 if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) {
01091 $root = array( $root );
01092 }
01093 foreach ( $root as $node ) {
01094 if ( $first ) {
01095 $first = false;
01096 } else {
01097 $s .= $sep;
01098 }
01099 $s .= $this->expand( $node, $flags );
01100 }
01101 }
01102 return $s;
01103 }
01104
01109 function implode( $sep ) {
01110 $args = array_slice( func_get_args(), 1 );
01111
01112 $first = true;
01113 $s = '';
01114 foreach ( $args as $root ) {
01115 if ( $root instanceof PPNode_DOM ) $root = $root->node;
01116 if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) {
01117 $root = array( $root );
01118 }
01119 foreach ( $root as $node ) {
01120 if ( $first ) {
01121 $first = false;
01122 } else {
01123 $s .= $sep;
01124 }
01125 $s .= $this->expand( $node );
01126 }
01127 }
01128 return $s;
01129 }
01130
01135 function virtualImplode( $sep ) {
01136 $args = array_slice( func_get_args(), 1 );
01137 $out = array();
01138 $first = true;
01139 if ( $root instanceof PPNode_DOM ) $root = $root->node;
01140
01141 foreach ( $args as $root ) {
01142 if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) {
01143 $root = array( $root );
01144 }
01145 foreach ( $root as $node ) {
01146 if ( $first ) {
01147 $first = false;
01148 } else {
01149 $out[] = $sep;
01150 }
01151 $out[] = $node;
01152 }
01153 }
01154 return $out;
01155 }
01156
01160 function virtualBracketedImplode( $start, $sep, $end ) {
01161 $args = array_slice( func_get_args(), 3 );
01162 $out = array( $start );
01163 $first = true;
01164
01165 foreach ( $args as $root ) {
01166 if ( $root instanceof PPNode_DOM ) $root = $root->node;
01167 if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) {
01168 $root = array( $root );
01169 }
01170 foreach ( $root as $node ) {
01171 if ( $first ) {
01172 $first = false;
01173 } else {
01174 $out[] = $sep;
01175 }
01176 $out[] = $node;
01177 }
01178 }
01179 $out[] = $end;
01180 return $out;
01181 }
01182
01183 function __toString() {
01184 return 'frame{}';
01185 }
01186
01187 function getPDBK( $level = false ) {
01188 if ( $level === false ) {
01189 return $this->title->getPrefixedDBkey();
01190 } else {
01191 return isset( $this->titleCache[$level] ) ? $this->titleCache[$level] : false;
01192 }
01193 }
01194
01195 function getArguments() {
01196 return array();
01197 }
01198
01199 function getNumberedArguments() {
01200 return array();
01201 }
01202
01203 function getNamedArguments() {
01204 return array();
01205 }
01206
01210 function isEmpty() {
01211 return true;
01212 }
01213
01214 function getArgument( $name ) {
01215 return false;
01216 }
01217
01221 function loopCheck( $title ) {
01222 return !isset( $this->loopCheckHash[$title->getPrefixedDBkey()] );
01223 }
01224
01228 function isTemplate() {
01229 return false;
01230 }
01231 }
01232
01237 class PPTemplateFrame_DOM extends PPFrame_DOM {
01238 var $numberedArgs, $namedArgs, $parent;
01239 var $numberedExpansionCache, $namedExpansionCache;
01240
01241 function __construct( $preprocessor, $parent = false, $numberedArgs = array(), $namedArgs = array(), $title = false ) {
01242 PPFrame_DOM::__construct( $preprocessor );
01243 $this->parent = $parent;
01244 $this->numberedArgs = $numberedArgs;
01245 $this->namedArgs = $namedArgs;
01246 $this->title = $title;
01247 $pdbk = $title ? $title->getPrefixedDBkey() : false;
01248 $this->titleCache = $parent->titleCache;
01249 $this->titleCache[] = $pdbk;
01250 $this->loopCheckHash = $parent->loopCheckHash;
01251 if ( $pdbk !== false ) {
01252 $this->loopCheckHash[$pdbk] = true;
01253 }
01254 $this->depth = $parent->depth + 1;
01255 $this->numberedExpansionCache = $this->namedExpansionCache = array();
01256 }
01257
01258 function __toString() {
01259 $s = 'tplframe{';
01260 $first = true;
01261 $args = $this->numberedArgs + $this->namedArgs;
01262 foreach ( $args as $name => $value ) {
01263 if ( $first ) {
01264 $first = false;
01265 } else {
01266 $s .= ', ';
01267 }
01268 $s .= "\"$name\":\"" .
01269 str_replace( '"', '\\"', $value->ownerDocument->saveXML( $value ) ) . '"';
01270 }
01271 $s .= '}';
01272 return $s;
01273 }
01277 function isEmpty() {
01278 return !count( $this->numberedArgs ) && !count( $this->namedArgs );
01279 }
01280
01281 function getArguments() {
01282 $arguments = array();
01283 foreach ( array_merge(
01284 array_keys($this->numberedArgs),
01285 array_keys($this->namedArgs)) as $key ) {
01286 $arguments[$key] = $this->getArgument($key);
01287 }
01288 return $arguments;
01289 }
01290
01291 function getNumberedArguments() {
01292 $arguments = array();
01293 foreach ( array_keys($this->numberedArgs) as $key ) {
01294 $arguments[$key] = $this->getArgument($key);
01295 }
01296 return $arguments;
01297 }
01298
01299 function getNamedArguments() {
01300 $arguments = array();
01301 foreach ( array_keys($this->namedArgs) as $key ) {
01302 $arguments[$key] = $this->getArgument($key);
01303 }
01304 return $arguments;
01305 }
01306
01307 function getNumberedArgument( $index ) {
01308 if ( !isset( $this->numberedArgs[$index] ) ) {
01309 return false;
01310 }
01311 if ( !isset( $this->numberedExpansionCache[$index] ) ) {
01312 # No trimming for unnamed arguments
01313 $this->numberedExpansionCache[$index] = $this->parent->expand( $this->numberedArgs[$index], self::STRIP_COMMENTS );
01314 }
01315 return $this->numberedExpansionCache[$index];
01316 }
01317
01318 function getNamedArgument( $name ) {
01319 if ( !isset( $this->namedArgs[$name] ) ) {
01320 return false;
01321 }
01322 if ( !isset( $this->namedExpansionCache[$name] ) ) {
01323 # Trim named arguments post-expand, for backwards compatibility
01324 $this->namedExpansionCache[$name] = trim(
01325 $this->parent->expand( $this->namedArgs[$name], self::STRIP_COMMENTS ) );
01326 }
01327 return $this->namedExpansionCache[$name];
01328 }
01329
01330 function getArgument( $name ) {
01331 $text = $this->getNumberedArgument( $name );
01332 if ( $text === false ) {
01333 $text = $this->getNamedArgument( $name );
01334 }
01335 return $text;
01336 }
01337
01341 function isTemplate() {
01342 return true;
01343 }
01344 }
01345
01350 class PPCustomFrame_DOM extends PPFrame_DOM {
01351 var $args;
01352
01353 function __construct( $preprocessor, $args ) {
01354 PPFrame_DOM::__construct( $preprocessor );
01355 $this->args = $args;
01356 }
01357
01358 function __toString() {
01359 $s = 'cstmframe{';
01360 $first = true;
01361 foreach ( $this->args as $name => $value ) {
01362 if ( $first ) {
01363 $first = false;
01364 } else {
01365 $s .= ', ';
01366 }
01367 $s .= "\"$name\":\"" .
01368 str_replace( '"', '\\"', $value->__toString() ) . '"';
01369 }
01370 $s .= '}';
01371 return $s;
01372 }
01373
01374 function isEmpty() {
01375 return !count( $this->args );
01376 }
01377
01378 function getArgument( $index ) {
01379 if ( !isset( $this->args[$index] ) ) {
01380 return false;
01381 }
01382 return $this->args[$index];
01383 }
01384 }
01385
01389 class PPNode_DOM implements PPNode {
01390 var $node;
01391
01392 function __construct( $node, $xpath = false ) {
01393 $this->node = $node;
01394 }
01395
01396 function __get( $name ) {
01397 if ( $name == 'xpath' ) {
01398 $this->xpath = new DOMXPath( $this->node->ownerDocument );
01399 }
01400 return $this->xpath;
01401 }
01402
01403 function __toString() {
01404 if ( $this->node instanceof DOMNodeList ) {
01405 $s = '';
01406 foreach ( $this->node as $node ) {
01407 $s .= $node->ownerDocument->saveXML( $node );
01408 }
01409 } else {
01410 $s = $this->node->ownerDocument->saveXML( $this->node );
01411 }
01412 return $s;
01413 }
01414
01415 function getChildren() {
01416 return $this->node->childNodes ? new self( $this->node->childNodes ) : false;
01417 }
01418
01419 function getFirstChild() {
01420 return $this->node->firstChild ? new self( $this->node->firstChild ) : false;
01421 }
01422
01423 function getNextSibling() {
01424 return $this->node->nextSibling ? new self( $this->node->nextSibling ) : false;
01425 }
01426
01427 function getChildrenOfType( $type ) {
01428 return new self( $this->xpath->query( $type, $this->node ) );
01429 }
01430
01431 function getLength() {
01432 if ( $this->node instanceof DOMNodeList ) {
01433 return $this->node->length;
01434 } else {
01435 return false;
01436 }
01437 }
01438
01439 function item( $i ) {
01440 $item = $this->node->item( $i );
01441 return $item ? new self( $item ) : false;
01442 }
01443
01444 function getName() {
01445 if ( $this->node instanceof DOMNodeList ) {
01446 return '#nodelist';
01447 } else {
01448 return $this->node->nodeName;
01449 }
01450 }
01451
01458 function splitArg() {
01459 $names = $this->xpath->query( 'name', $this->node );
01460 $values = $this->xpath->query( 'value', $this->node );
01461 if ( !$names->length || !$values->length ) {
01462 throw new MWException( 'Invalid brace node passed to ' . __METHOD__ );
01463 }
01464 $name = $names->item( 0 );
01465 $index = $name->getAttribute( 'index' );
01466 return array(
01467 'name' => new self( $name ),
01468 'index' => $index,
01469 'value' => new self( $values->item( 0 ) ) );
01470 }
01471
01476 function splitExt() {
01477 $names = $this->xpath->query( 'name', $this->node );
01478 $attrs = $this->xpath->query( 'attr', $this->node );
01479 $inners = $this->xpath->query( 'inner', $this->node );
01480 $closes = $this->xpath->query( 'close', $this->node );
01481 if ( !$names->length || !$attrs->length ) {
01482 throw new MWException( 'Invalid ext node passed to ' . __METHOD__ );
01483 }
01484 $parts = array(
01485 'name' => new self( $names->item( 0 ) ),
01486 'attr' => new self( $attrs->item( 0 ) ) );
01487 if ( $inners->length ) {
01488 $parts['inner'] = new self( $inners->item( 0 ) );
01489 }
01490 if ( $closes->length ) {
01491 $parts['close'] = new self( $closes->item( 0 ) );
01492 }
01493 return $parts;
01494 }
01495
01499 function splitHeading() {
01500 if ( !$this->nodeName == 'h' ) {
01501 throw new MWException( 'Invalid h node passed to ' . __METHOD__ );
01502 }
01503 return array(
01504 'i' => $this->node->getAttribute( 'i' ),
01505 'level' => $this->node->getAttribute( 'level' ),
01506 'contents' => $this->getChildren()
01507 );
01508 }
01509 }