AstParser.java :  » Search » byteseek » net » domesdaybook » expression » parser » Java Open Source

Java Open Source » Search » byteseek 
byteseek » net » domesdaybook » expression » parser » AstParser.java
/*
 * Copyright Matt Palmer 2009-2011, All rights reserved.
 *
 */

package net.domesdaybook.expression.parser;

import java.util.ArrayList;
import java.util.List;
import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.CommonToken;
import org.antlr.runtime.CommonTokenStream;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.tree.CommonTree;
import org.antlr.runtime.tree.CommonTreeAdaptor;
import org.antlr.runtime.tree.Tree;

/**
 *
 * @author Matt Palmer palmer
 */
public class AstParser {

    
    public AstParser() {
    }


    /**
     * Returns an (unoptimised) abstract syntax tree from a regular
     * expression string.
     */
    public Tree parseToAST(final String expression) throws ParseException {
        if (expression == null || expression.isEmpty()) {
            throw new IllegalArgumentException("Null or empty expression passed in to AstParser.parseToAST.");
        }
        try {
            return parseToAbstractSyntaxTree(expression);
        } catch (RecognitionException ex) {
            throw new ParseException(ex);
        }
    }

    
    /**
     * Optimises AST tree structures:
     * 
     * 1)  Looks for alternate lists with more than one single byte alternatives.
     *     These can be more efficiently be represented as a set of bytes.
     * 2)  If there are existing alternate sets of bytes, they can also be merged.
     * 3)  If all the alternatives are single bytes, then the entire alternative
     *     can be replaced by the set.
     *
     * @param treeNode The abstract syntax tree root to optimise.
     * @return Tree an AST with the alternatives optimised.
     */
    public Tree optimiseAST(final Tree treeNode) throws ParseException {
        if (treeNode == null) {
            throw new IllegalArgumentException("Null node passed in to AstParser.optimiseAST");
        }
        Tree result = treeNode;
        // Recursively invoke on children of tree node, to walk the tree:
        for (int childIndex = 0; childIndex < treeNode.getChildCount(); childIndex++) {
            Tree childNode = treeNode.getChild(childIndex);

            // If a child is exactly equivalent to its parent, then
            // replace it with its own children, unless it is a repeat node, which can
            // have repeats of repeats
            //TODO: optimise repeats of repeats.
            if (equivalent(treeNode, childNode) && treeNode.getType() != regularExpressionParser.REPEAT) {
                treeNode.replaceChildren(childIndex, childIndex, getChildList(childNode));
                childNode = treeNode.getChild(childIndex);
            }

            Tree resultNode = optimiseAST(childNode);
            if (resultNode != childNode) {
                treeNode.setChild(childIndex, resultNode);
            }
        }

        // If the current node is an alternative node:
        if (treeNode.getType() == regularExpressionParser.ALT) {
            result = optimiseSingleByteAlternatives(treeNode);
        }

        return result;
    }




    private CommonTree parseToAbstractSyntaxTree(final String expression) throws ParseException, RecognitionException {
        CommonTree tree;
        final ANTLRStringStream input = new ANTLRStringStream(expression);
        final regularExpressionLexer lexer = new regularExpressionLexer(input);
        if (lexer.getNumberOfSyntaxErrors() == 0) {

            final CommonTokenStream tokens = new CommonTokenStream(lexer);
            regularExpressionParser parser = new regularExpressionParser(tokens) {
                @Override
                public void emitErrorMessage(String msg) {
                    throw new ParseErrorException(msg);
                }
            };
            try {
                final CommonTreeAdaptor adaptor = new CommonTreeAdaptor();
                parser.setTreeAdaptor(adaptor);
                regularExpressionParser.start_return ret = parser.start();
                tree = (CommonTree) ret.getTree();
            } catch (ParseErrorException e) {
                throw new ParseException(e.getMessage());
            }
        } else {
            throw new ParseException(String.format("Parse error: %d syntax errors in %s", lexer.getNumberOfSyntaxErrors(), expression));
        }
        return tree;
    }


    private Tree optimiseSingleByteAlternatives(Tree treeNode) {

        Tree result = treeNode;

        // Determine which children can be optimised:
        List<Integer> childrenToMerge = new ArrayList<Integer>();
        final int childCount = treeNode.getChildCount();
        for (int childIndex = childCount-1; childIndex >= 0 ; childIndex--) {
            if (isSingleByteNode(treeNode.getChild(childIndex))) {
                childrenToMerge.add(childIndex);
            }
        }

        // If there is more than one candidate child node to merge, then merge them
        // into a set-based byte test, rather than different alternatives:
        if (childrenToMerge.size() > 1) {
            
            CommonTree mergeSet = createNode(regularExpressionParser.SET);
            for (int mergeIndex = 0; mergeIndex < childrenToMerge.size(); mergeIndex++) {
                final int childIndex = childrenToMerge.get(mergeIndex);
                Tree mergeNode = treeNode.getChild(childIndex);

                // If any of the children of the alternative we are changing
                // to a set is also a set, merge its children into the set
                // instead of adding a set child to a set.
                if (mergeNode.getType() == regularExpressionParser.SET) {
                    mergeNode = getChildList(mergeNode);
                }
                mergeSet.addChild(mergeNode);
                treeNode.deleteChild(childIndex);
            }

            // If all the children are merged, the entire alternative
            // is really a single set - change the type of the node:
            if (childrenToMerge.size() == childCount) {
                result = mergeSet;
            } else { // just add the set to the alternative node:
                treeNode.addChild(mergeSet);
            }
        }

        return result;
    }


    private CommonTree createNode(int type) {
        String text = regularExpressionParser.tokenNames[type];
        return new CommonTree(new CommonToken(type, text));
    }

    private boolean equivalent(Tree node1, Tree node2) {
        return node1.getType() == node2.getType() &&
               node1.getText().equals(node2.getText());
    }

    private Tree getChildList(Tree parent) {
        // nodes with no token are "nil" nodes in antlr,
        // which act as lists of children.
        Tree listNode = new CommonTree();
        for (int childIndex = 0; childIndex < parent.getChildCount(); childIndex++) {
            listNode.addChild(parent.getChild(childIndex));
        }
        return listNode;
    }

    
    private boolean isSingleByteNode(Tree node) {
        final int nodeType = node.getType();
        return    nodeType == regularExpressionParser.BYTE
               || nodeType == regularExpressionParser.SET
               || nodeType == regularExpressionParser.ALL_BITMASK
               || nodeType == regularExpressionParser.ANY_BITMASK
               || ((   nodeType == regularExpressionParser.CASE_SENSITIVE_STRING
                    || nodeType == regularExpressionParser.CASE_INSENSITIVE_STRING)
                   && node.getText().length() == 1);
    }

    private class ParseErrorException extends RuntimeException {
        public ParseErrorException(String message) {
            super(message);
        }
    }



}
java2s.com  | Contact Us | Privacy Policy
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.