Sunday, June 3, 2012

Filter expressions in Rest urls leveraging QueryDsl Part 2

In my previous post I showed how to use QueryDsl-like BooleanExpressions as value of a Url query-parameter, as in this example:"type.eq(3).and("
The proposed solution leveraged both MVEL and QueryDsl to convert the filter expression at runtime to an executable QueryDsl expression.
As indicated in my post this approach introduced a security vulnerability as we open up our application to code injection via the filter expression. The filter expression is just a string which is interpreted by MVEL as Java code.
The way I defeated this is to restrict the expression string before it is given to MVEL. It is restricted to only accept terms and constructions that lead to a valid QueryDsl Predicate and nothing more. For this purpose I wrote a custom dynamic Parser using Parboild. Parboild is a socalled PEG (Parsing expression grammar) parser.
This is the grammar I came up with to parse the expressions:
FilterExpression : Expression
Expression       : MethodInvocation ['.'MethodInvocation]+
MethodInvocation : Method ArgumentList
Method           : Path
Path             : PathElement ['.'PathElement]+
PathElement      : 
ArgumentList     : '(' Argument? ['.'Argument]+ ')'
Argument         : Literal / Expression / Path / Array
Literal          : FloatLiteral / IntegerLiteral / CharLiteral
                    / StringLiteral / StringLiteral2 / 'true' / 'false' / 'null'
Array            : '[' (Literal / [','Literal]+) / (Expression / [','Expression]+)  ']'
The rule definition of the literal values is copied from an example for parsing of Java source code from the Parboiled website.
public class FilterExpressionParser extends BaseParser<Object> {
    private String[] allowedPathElements;

    public FilterExpressionParser(Collection<String> allowedPathElements) {
        this.allowedPathElements = (String[])allowedPathElements.toArray(new String[0]);
        Arrays.sort(this.allowedPathElements, new Comparator<String>() {
            public int compare(java.lang.String o1, java.lang.String o2) {
                return o2.compareTo(o1);

    Rule FilterExpression() {
        return Sequence(Expression(), EOI);

    Rule Expression() {
        return Sequence(MethodInvocation(), ZeroOrMore(".", MethodInvocation()));

    Rule MethodInvocation() {
        return Sequence(Method(), ArgumentList());

    Rule ArgumentList() {
        return Sequence("(", Optional(Argument(), ZeroOrMore(",", Argument())), ")");

    Rule Argument() {
        return FirstOf(Literal(), Expression(), Path(), Array());

    Rule Array() {
        return Sequence("[",
                FirstOf(Sequence(Literal(), ZeroOrMore(",", Literal())), 
                Sequence(Expression(), ZeroOrMore(",", Expression()))),

    Rule Method() {
        return Path();

    Rule Path() {
        return Sequence(FirstOf(allowedPathElements), ZeroOrMore(".", FirstOf(allowedPathElements)));

    Rule LetterOrDigit() {
        // switch to this "reduced" character space version for a ~10% parser performance speedup
        return FirstOf(CharRange('a', 'z'), CharRange('A', 'Z'), CharRange('0', '9'), '_', '$');
        // return FirstOf(Sequence('\\', UnicodeEscape()), new JavaLetterOrDigitMatcher());

    Rule Literal() {
        return FirstOf(Sequence(Optional("-"), FloatLiteral()), Sequence(Optional("-"),
                IntegerLiteral()), CharLiteral(),
                StringLiteral(), StringLiteral2(), Sequence("true", TestNot(LetterOrDigit())),
                Sequence("false", TestNot(LetterOrDigit())), Sequence("null", TestNot(LetterOrDigit()))


    Rule IntegerLiteral() {
        return Sequence(DecimalNumeral(), Optional(AnyOf("lL")));

    Rule DecimalNumeral() {
        return FirstOf('0', Sequence(CharRange('1', '9'), ZeroOrMore(Digit())));

    Rule HexDigit() {
        return FirstOf(CharRange('a', 'f'), CharRange('A', 'F'), CharRange('0', '9'));

    Rule FloatLiteral() {
        return DecimalFloat();

    Rule DecimalFloat() {
        return FirstOf(Sequence(OneOrMore(Digit()), '.', ZeroOrMore(Digit()), 
                Optional(Exponent()), Optional(AnyOf("fFdD"))),
                Sequence('.', OneOrMore(Digit()), Optional(Exponent()), Optional(AnyOf("fFdD"))),
                Sequence(OneOrMore(Digit()), Exponent(), Optional(AnyOf("fFdD"))),
                Sequence(OneOrMore(Digit()), Optional(Exponent()), AnyOf("fFdD")));

    Rule Exponent() {
        return Sequence(AnyOf("eE"), Optional(AnyOf("+-")), OneOrMore(Digit()));

    Rule Digit() {
        return CharRange('0', '9');

    Rule CharLiteral() {
        return Sequence('\'', FirstOf(Escape(), Sequence(TestNot(AnyOf("'\\")), ANY)).suppressSubnodes(), '\'');

    Rule StringLiteral() {
        return Sequence('"', ZeroOrMore(FirstOf(Escape(), 
               Sequence(TestNot(AnyOf("\r\n\"\\")), ANY))).suppressSubnodes(), '"');

    Rule StringLiteral2() {
        return Sequence('\'', ZeroOrMore(FirstOf(Escape(), 
               Sequence(TestNot(AnyOf("\r\n'\\")), ANY))).suppressSubnodes(), '\'');

    Rule Escape() {
        return Sequence('\\', FirstOf(AnyOf("btnfr\"\'\\"), OctalEscape(), UnicodeEscape()));

    Rule OctalEscape() {
        return FirstOf(Sequence(CharRange('0', '3'), CharRange('0', '7'), CharRange('0', '7')),
                Sequence(CharRange('0', '7'), CharRange('0', '7')), CharRange('0', '7'));

    Rule UnicodeEscape() {
        return Sequence(OneOrMore('u'), HexDigit(), HexDigit(), HexDigit(), HexDigit());
You use it like this:
    private final static List queryDslMethods = Arrays.asList("and", "or", "not", "eq", "ne", 
"in", "notIn", "after", "before", "between", "notBetween", "lt", "loe", "gt", "goe",
"equalsIgnoreCase", "like", "matches", "startsWith", "startsWithIgnoreCase"); public Predicate toPredicate() { // create set with all terms that are allowed to appear as 'methods' in a filter expression Set allowedMethods = new HashSet(); set.addAll(queryDslMethods); set.addAll(Arrays.asList("type", "prize", "creationDate")); set.add("_date"); set.add("valueOf"); // needed for ValueFactory calls, i.e. _date.valueOf(...) // create the Parboiled parser with our FilterExpressionParser class FilterExpressionParser parser = Parboiled.createParser(FilterExpressionParser.class, allowedMethods); ReportingParseRunner<Object> parseRunner = new ReportingParseRunner<Object>(parser.FilterExpression()); // run the parser and examine result ParsingResult<Object> parsingResult =; if (!parsingResult.matched) { throw new IllegalArgumentException("filter expression is invalid: " + expression); } return evalExpression(expression); } private Predicate evalExpression(String expression) // create a map with all Objects that need to be available in the MVEL context. Map<String, Object> vars = new HashMap<String, Object>(); QProduct qProduct = QProduct.product; vars.put("type", qProduct.type); vars.put("prize", qProduct.prize); vars.put("creationDate", qProduct.creationDate); vars.put("_date", new DateFactory()); return (Predicate)MVEL.eval(expression, vars); }
If you want to use an entity property in a filter expression which type is not a primitive type or String, but for instance a java.util.Date object, then you can use a object factory for this. A factory creates an object instance from a string representation.
With the following factory you can construct date objects as part of a filter expression:
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;

public class DateFactory {

    public String getName(){
        return "_date";

    public Date valueOf(String value) {
        DateTimeFormatter formatter = DateTimeFormat.forPattern("yyyy-mm-dd");
        return formatter.parseDateTime(value).toDate();
You can see in the example code above the _date and valueof strings are added to the allowedMethods set for parsing the expression. Furthermore a DateFactory instance is put to the vars map with the name _date, so it is available when evalutaion the expression by MVEL. You use a date in a filter expression like this:"type.eq(3).and(creationDate.after(_date.valueof('2012-05-23')))"
Of course you will need to parameterize the above code to make it reusable for different use cases.

1 comment:

Timo Westkämper said...

Interesting approach, but why do you quote the URL parameter?