summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'AbuseFilter/includes/parser/AbuseFilterCachingParser.php')
-rw-r--r--AbuseFilter/includes/parser/AbuseFilterCachingParser.php274
1 files changed, 197 insertions, 77 deletions
diff --git a/AbuseFilter/includes/parser/AbuseFilterCachingParser.php b/AbuseFilter/includes/parser/AbuseFilterCachingParser.php
index ef634fd4..b5bc0cb9 100644
--- a/AbuseFilter/includes/parser/AbuseFilterCachingParser.php
+++ b/AbuseFilter/includes/parser/AbuseFilterCachingParser.php
@@ -6,8 +6,15 @@
*
* It currently inherits AbuseFilterParser in order to avoid code duplication.
* In future, this code will replace current AbuseFilterParser entirely.
+ *
+ * @todo Override checkSyntax and make it only try to build the AST. That would mean faster results,
+ * and no need to mess with DUNDEFINED and the like. However, we must first try to reduce the
+ * amount of runtime-only exceptions, and try to detect them in the AFPTreeParser instead.
+ * Otherwise, people may be able to save a broken filter without the syntax check reporting that.
*/
class AbuseFilterCachingParser extends AbuseFilterParser {
+ private const CACHE_VERSION = 1;
+
/**
* Return the generated version of the parser for cache invalidation
* purposes. Automatically tracks list of all functions and invalidates the
@@ -21,10 +28,11 @@ class AbuseFilterCachingParser extends AbuseFilterParser {
}
$versionKey = [
+ self::CACHE_VERSION,
AFPTreeParser::CACHE_VERSION,
AbuseFilterTokenizer::CACHE_VERSION,
- array_keys( AbuseFilterParser::$mFunctions ),
- array_keys( AbuseFilterParser::$mKeywords ),
+ array_keys( AbuseFilterParser::FUNCTIONS ),
+ array_keys( AbuseFilterParser::KEYWORDS ),
];
$version = hash( 'sha256', serialize( $versionKey ) );
@@ -35,36 +43,64 @@ class AbuseFilterCachingParser extends AbuseFilterParser {
* Resets the state of the parser
*/
public function resetState() {
- $this->mVars = new AbuseFilterVariableHolder;
+ $this->mVariables = new AbuseFilterVariableHolder( $this->keywordsManager );
$this->mCur = new AFPToken();
+ $this->mCondCount = 0;
+ $this->mAllowShort = true;
}
/**
* @param string $code
* @return AFPData
*/
- public function intEval( $code ) {
- static $cache = null;
- if ( !$cache ) {
- $cache = ObjectCache::getLocalServerInstance( 'hash' );
+ public function intEval( $code ) : AFPData {
+ $startTime = microtime( true );
+ $tree = $this->getTree( $code );
+
+ $res = $this->evalTree( $tree );
+
+ if ( $res->getType() === AFPData::DUNDEFINED ) {
+ $res = new AFPData( AFPData::DBOOL, false );
}
+ $this->statsd->timing( 'abusefilter_cachingParser_full', microtime( true ) - $startTime );
+ return $res;
+ }
- $tree = $cache->getWithSetCallback(
- $cache->makeGlobalKey(
+ /**
+ * @param string $code
+ * @return AFPSyntaxTree
+ */
+ private function getTree( $code ) : AFPSyntaxTree {
+ return $this->cache->getWithSetCallback(
+ $this->cache->makeGlobalKey(
__CLASS__,
self::getCacheVersion(),
hash( 'sha256', $code )
),
- $cache::TTL_DAY,
+ BagOStuff::TTL_DAY,
function () use ( $code ) {
- $parser = new AFPTreeParser();
- return $parser->parse( $code ) ?: false;
+ $parser = new AFPTreeParser( $this->cache, $this->logger, $this->statsd, $this->keywordsManager );
+ $parser->setFilter( $this->mFilter );
+ return $parser->parse( $code );
}
);
+ }
+
+ /**
+ * @param AFPSyntaxTree $tree
+ * @return AFPData
+ */
+ private function evalTree( AFPSyntaxTree $tree ) : AFPData {
+ $startTime = microtime( true );
+ $root = $tree->getRoot();
+
+ if ( !$root ) {
+ return new AFPData( AFPData::DNULL );
+ }
- return $tree
- ? $this->evalNode( $tree )
- : new AFPData( AFPData::DNULL, null );
+ $ret = $this->evalNode( $root );
+ $this->statsd->timing( 'abusefilter_cachingParser_eval', microtime( true ) - $startTime );
+ return $ret;
}
/**
@@ -76,7 +112,7 @@ class AbuseFilterCachingParser extends AbuseFilterParser {
* @throws AFPUserVisibleException
* @throws MWException
*/
- public function evalNode( AFPTreeNode $node ) {
+ private function evalNode( AFPTreeNode $node ) {
// A lot of AbuseFilterParser features rely on $this->mCur->pos or
// $this->mPos for error reporting.
// FIXME: this is a hack which needs to be removed when the parsers are merged.
@@ -103,55 +139,56 @@ class AbuseFilterCachingParser extends AbuseFilterParser {
case "false":
return new AFPData( AFPData::DBOOL, false );
case "null":
- return new AFPData();
+ return new AFPData( AFPData::DNULL );
}
// Fallthrough intended
default:
+ // @codeCoverageIgnoreStart
throw new AFPException( "Unknown token provided in the ATOM node" );
+ // @codeCoverageIgnoreEnd
}
case AFPTreeNode::ARRAY_DEFINITION:
- $items = array_map( [ $this, 'evalNode' ], $node->children );
+ $items = [];
+ // Foreach is usually faster than array_map
+ // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach children is array here
+ foreach ( $node->children as $el ) {
+ $items[] = $this->evalNode( $el );
+ }
return new AFPData( AFPData::DARRAY, $items );
case AFPTreeNode::FUNCTION_CALL:
$functionName = $node->children[0];
$args = array_slice( $node->children, 1 );
- $func = self::$mFunctions[$functionName];
- $dataArgs = array_map( [ $this, 'evalNode' ], $args );
-
- /** @noinspection PhpToStringImplementationInspection */
- $funcHash = md5( $func . serialize( $dataArgs ) );
-
- if ( isset( self::$funcCache[$funcHash] ) &&
- !in_array( $func, self::$ActiveFunctions )
- ) {
- $result = self::$funcCache[$funcHash];
- } else {
- AbuseFilter::triggerLimiter();
- $result = self::$funcCache[$funcHash] = $this->$func( $dataArgs );
- }
-
- if ( count( self::$funcCache ) > 1000 ) {
- self::$funcCache = [];
+ $dataArgs = [];
+ // Foreach is usually faster than array_map
+ foreach ( $args as $arg ) {
+ $dataArgs[] = $this->evalNode( $arg );
}
- return $result;
-
+ return $this->callFunc( $functionName, $dataArgs );
case AFPTreeNode::ARRAY_INDEX:
list( $array, $offset ) = $node->children;
$array = $this->evalNode( $array );
- if ( $array->type != AFPData::DARRAY ) {
- throw new AFPUserVisibleException( 'notarray', $node->position, [] );
+ // Note: we MUST evaluate the offset to ensure it is valid, regardless
+ // of $array!
+ $offset = $this->evalNode( $offset )->toInt();
+
+ if ( $array->getType() === AFPData::DUNDEFINED ) {
+ return new AFPData( AFPData::DUNDEFINED );
}
- $offset = $this->evalNode( $offset )->toInt();
+ if ( $array->getType() !== AFPData::DARRAY ) {
+ throw new AFPUserVisibleException( 'notarray', $node->position, [] );
+ }
$array = $array->toArray();
if ( count( $array ) <= $offset ) {
throw new AFPUserVisibleException( 'outofbounds', $node->position,
[ $offset, count( $array ) ] );
+ } elseif ( $offset < 0 ) {
+ throw new AFPUserVisibleException( 'negativeindex', $node->position, [ $offset ] );
}
return $array[$offset];
@@ -159,38 +196,33 @@ class AbuseFilterCachingParser extends AbuseFilterParser {
case AFPTreeNode::UNARY:
list( $operation, $argument ) = $node->children;
$argument = $this->evalNode( $argument );
- if ( $operation == '-' ) {
- return AFPData::unaryMinus( $argument );
+ if ( $operation === '-' ) {
+ return $argument->unaryMinus();
}
return $argument;
case AFPTreeNode::KEYWORD_OPERATOR:
list( $keyword, $leftOperand, $rightOperand ) = $node->children;
- $func = self::$mKeywords[$keyword];
$leftOperand = $this->evalNode( $leftOperand );
$rightOperand = $this->evalNode( $rightOperand );
- AbuseFilter::triggerLimiter();
- $result = AFPData::$func( $leftOperand, $rightOperand, $node->position );
-
- return $result;
+ return $this->callKeyword( $keyword, $leftOperand, $rightOperand );
case AFPTreeNode::BOOL_INVERT:
list( $argument ) = $node->children;
$argument = $this->evalNode( $argument );
- return AFPData::boolInvert( $argument );
+ return $argument->boolInvert();
case AFPTreeNode::POW:
list( $base, $exponent ) = $node->children;
$base = $this->evalNode( $base );
$exponent = $this->evalNode( $exponent );
- return AFPData::pow( $base, $exponent );
+ return $base->pow( $exponent );
case AFPTreeNode::MUL_REL:
list( $op, $leftOperand, $rightOperand ) = $node->children;
$leftOperand = $this->evalNode( $leftOperand );
$rightOperand = $this->evalNode( $rightOperand );
- // FIXME
- return AFPData::mulRel( $leftOperand, $rightOperand, $op, 0 );
+ return $leftOperand->mulRel( $rightOperand, $op, $node->position );
case AFPTreeNode::SUM_REL:
list( $op, $leftOperand, $rightOperand ) = $node->children;
@@ -198,38 +230,51 @@ class AbuseFilterCachingParser extends AbuseFilterParser {
$rightOperand = $this->evalNode( $rightOperand );
switch ( $op ) {
case '+':
- return AFPData::sum( $leftOperand, $rightOperand );
+ return $leftOperand->sum( $rightOperand );
case '-':
- return AFPData::sub( $leftOperand, $rightOperand );
+ return $leftOperand->sub( $rightOperand );
default:
+ // @codeCoverageIgnoreStart
throw new AFPException( "Unknown sum-related operator: {$op}" );
+ // @codeCoverageIgnoreEnd
}
case AFPTreeNode::COMPARE:
list( $op, $leftOperand, $rightOperand ) = $node->children;
$leftOperand = $this->evalNode( $leftOperand );
$rightOperand = $this->evalNode( $rightOperand );
- AbuseFilter::triggerLimiter();
- return AFPData::compareOp( $leftOperand, $rightOperand, $op );
+ $this->raiseCondCount();
+ return $leftOperand->compareOp( $rightOperand, $op );
case AFPTreeNode::LOGIC:
list( $op, $leftOperand, $rightOperand ) = $node->children;
$leftOperand = $this->evalNode( $leftOperand );
- $value = $leftOperand->toBool();
+ $value = $leftOperand->getType() === AFPData::DUNDEFINED ? false : $leftOperand->toBool();
// Short-circuit.
- if ( ( !$value && $op == '&' ) || ( $value && $op == '|' ) ) {
+ if ( ( !$value && $op === '&' ) || ( $value && $op === '|' ) ) {
+ if ( $rightOperand instanceof AFPTreeNode ) {
+ $this->maybeDiscardNode( $rightOperand );
+ }
return $leftOperand;
}
$rightOperand = $this->evalNode( $rightOperand );
- return AFPData::boolOp( $leftOperand, $rightOperand, $op );
+ return $leftOperand->boolOp( $rightOperand, $op );
case AFPTreeNode::CONDITIONAL:
list( $condition, $valueIfTrue, $valueIfFalse ) = $node->children;
$condition = $this->evalNode( $condition );
- if ( $condition->toBool() ) {
+ $isTrue = $condition->getType() === AFPData::DUNDEFINED ? false : $condition->toBool();
+ if ( $isTrue ) {
+ if ( $valueIfFalse !== null ) {
+ $this->maybeDiscardNode( $valueIfFalse );
+ }
return $this->evalNode( $valueIfTrue );
} else {
- return $this->evalNode( $valueIfFalse );
+ $this->maybeDiscardNode( $valueIfTrue );
+ return $valueIfFalse !== null
+ ? $this->evalNode( $valueIfFalse )
+ // We assume null as default if the else is missing
+ : new AFPData( AFPData::DNULL );
}
case AFPTreeNode::ASSIGNMENT:
@@ -241,45 +286,120 @@ class AbuseFilterCachingParser extends AbuseFilterParser {
case AFPTreeNode::INDEX_ASSIGNMENT:
list( $varName, $offset, $value ) = $node->children;
- $array = $this->mVars->getVar( $varName );
- if ( $array->type != AFPData::DARRAY ) {
- throw new AFPUserVisibleException( 'notarray', $node->position, [] );
+ if ( $this->isReservedIdentifier( $varName ) ) {
+ throw new AFPUserVisibleException( 'overridebuiltin', $node->position, [ $varName ] );
}
+ $array = $this->getVarValue( $varName );
- $offset = $this->evalNode( $offset )->toInt();
-
- $array = $array->toArray();
- if ( count( $array ) <= $offset ) {
- throw new AFPUserVisibleException( 'outofbounds', $node->position,
- [ $offset, count( $array ) ] );
+ $value = $this->evalNode( $value );
+ if ( $array->getType() !== AFPData::DUNDEFINED ) {
+ // If it's a DUNDEFINED, leave it as is
+ if ( $array->getType() !== AFPData::DARRAY ) {
+ throw new AFPUserVisibleException( 'notarray', $node->position, [] );
+ }
+
+ $offset = $this->evalNode( $offset )->toInt();
+
+ $array = $array->toArray();
+ if ( count( $array ) <= $offset ) {
+ throw new AFPUserVisibleException( 'outofbounds', $node->position,
+ [ $offset, count( $array ) ] );
+ } elseif ( $offset < 0 ) {
+ throw new AFPUserVisibleException( 'negativeindex', $node->position, [ $offset ] );
+ }
+
+ $array[$offset] = $value;
+ $this->setUserVariable( $varName, new AFPData( AFPData::DARRAY, $array ) );
}
- $array[$offset] = $this->evalNode( $value );
- $this->setUserVariable( $varName, new AFPData( AFPData::DARRAY, $array ) );
return $value;
case AFPTreeNode::ARRAY_APPEND:
list( $varName, $value ) = $node->children;
- $array = $this->mVars->getVar( $varName );
- if ( $array->type != AFPData::DARRAY ) {
- throw new AFPUserVisibleException( 'notarray', $node->position, [] );
+ if ( $this->isReservedIdentifier( $varName ) ) {
+ throw new AFPUserVisibleException( 'overridebuiltin', $node->position, [ $varName ] );
}
- $array = $array->toArray();
- $array[] = $this->evalNode( $value );
- $this->setUserVariable( $varName, new AFPData( AFPData::DARRAY, $array ) );
+ $array = $this->getVarValue( $varName );
+ $value = $this->evalNode( $value );
+ if ( $array->getType() !== AFPData::DUNDEFINED ) {
+ // If it's a DUNDEFINED, leave it as is
+ if ( $array->getType() !== AFPData::DARRAY ) {
+ throw new AFPUserVisibleException( 'notarray', $node->position, [] );
+ }
+
+ $array = $array->toArray();
+ $array[] = $value;
+ $this->setUserVariable( $varName, new AFPData( AFPData::DARRAY, $array ) );
+ }
return $value;
case AFPTreeNode::SEMICOLON:
$lastValue = null;
+ // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach children is array here
foreach ( $node->children as $statement ) {
$lastValue = $this->evalNode( $statement );
}
+ // @phan-suppress-next-next-line PhanTypeMismatchReturnNullable Can never be null because
+ // empty statements are discarded in AFPTreeParser
return $lastValue;
default:
+ // @codeCoverageIgnoreStart
throw new AFPException( "Unknown node type passed: {$node->type}" );
+ // @codeCoverageIgnoreEnd
}
}
+
+ /**
+ * Given a node that we don't need to evaluate, decide what to do with it. The nodes passed in
+ * will usually be discarded by short-circuit evaluation. If we allow it, then we just hoist
+ * the variables assigned in any descendant of the node. Otherwise, we fully evaluate the node.
+ *
+ * @param AFPTreeNode $node
+ */
+ private function maybeDiscardNode( AFPTreeNode $node ) {
+ if ( $this->mAllowShort ) {
+ $this->discardWithHoisting( $node );
+ } else {
+ $this->evalNode( $node );
+ }
+ }
+
+ /**
+ * Intended to be used for short-circuit as a solution for T214674.
+ * Given a node, check it and its children; if there are assignments of non-existing variables,
+ * hoist them. In case of index assignment or array append, the old value is always erased and
+ * overwritten with a DUNDEFINED. This is used to allow stuff like:
+ * false & ( var := 'foo' ); var == 2
+ * or
+ * if ( false ) then ( var := 'foo' ) else ( 1 ) end; var == 2
+ * where `false` is something evaluated as false at runtime.
+ *
+ * @note This method doesn't check whether the variable exists in case of index assignments.
+ * Hence, in `false & (nonexistent[] := 2)`, `nonexistent` would be hoisted without errors.
+ * However, that would by caught by checkSyntax, so we can avoid checking here: we'd need
+ * way more context than we currently have.
+ *
+ * @param AFPTreeNode $node
+ */
+ private function discardWithHoisting( AFPTreeNode $node ) {
+ foreach ( $node->getInnerAssignments() as $name ) {
+ if (
+ !$this->mVariables->varIsSet( $name ) ||
+ $this->mVariables->getVar( $name )->getType() === AFPData::DARRAY
+ ) {
+ $this->setUserVariable( $name, new AFPData( AFPData::DUNDEFINED ) );
+ }
+ }
+ }
+
+ /**
+ * @inheritDoc
+ * This parser should not log, because that's handled in AFPTreeParser
+ */
+ protected function logsDeprecatedVars() {
+ return false;
+ }
}