edu.stanford.nlp.trees.tregex.TregexMatcher.java Source code

Java tutorial

Introduction

Here is the source code for edu.stanford.nlp.trees.tregex.TregexMatcher.java

Source

// TregexMatcher
// Copyright (c) 2004-2007 The Board of Trustees of
// The Leland Stanford Junior University. All Rights Reserved.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
//
//
// For more information, bug reports, fixes, contact:
//    Christopher Manning
//    Dept of Computer Science, Gates 1A
//    Stanford CA 94305-9010
//    USA
//    Support/Questions: parser-user@lists.stanford.edu
//    Licensing: parser-support@lists.stanford.edu
//    http://www-nlp.stanford.edu/software/tregex.shtml

package edu.stanford.nlp.trees.tregex;

import java.util.*;

import edu.stanford.nlp.trees.HasParent;
import edu.stanford.nlp.trees.HeadFinder;
import edu.stanford.nlp.trees.Tree;

/**
 * A TregexMatcher can be used to match a {@link TregexPattern} against a {@link edu.stanford.nlp.trees.Tree}.
 * Usage should be similar to a {@link java.util.regex.Matcher}.
 *
 * @author Galen Andrew
 */
public abstract class TregexMatcher {

    final Tree root;
    Tree tree;
    IdentityHashMap<Tree, Tree> nodesToParents;
    final Map<String, Tree> namesToNodes;
    final VariableStrings variableStrings;

    // these things are used by "find"
    private Iterator<Tree> findIterator;
    private Tree findCurrent;

    final HeadFinder headFinder;

    TregexMatcher(Tree root, Tree tree, IdentityHashMap<Tree, Tree> nodesToParents, Map<String, Tree> namesToNodes,
            VariableStrings variableStrings, HeadFinder headFinder) {
        this.root = root;
        this.tree = tree;
        this.nodesToParents = nodesToParents;
        this.namesToNodes = namesToNodes;
        this.variableStrings = variableStrings;
        this.headFinder = headFinder;
    }

    public HeadFinder getHeadFinder() {
        return this.headFinder;
    }

    /**
     * Resets the matcher so that its search starts over.
     */
    public void reset() {
        findIterator = null;
        findCurrent = null;
        namesToNodes.clear();
        variableStrings.reset();
    }

    /**
     * Resets the matcher to start searching on the given tree for matching subexpressions.
     *
     * @param tree The tree to start searching on
     */
    void resetChildIter(Tree tree) {
        this.tree = tree;
        resetChildIter();
    }

    /**
     * Resets the matcher to restart search for matching subexpressions
     */
    void resetChildIter() {
    }

    /**
     * Does the pattern match the tree?  It's actually closer to java.util.regex's
     * "lookingAt" in that the root of the tree has to match the root of the pattern
     * but the whole tree does not have to be "accounted for".  Like with lookingAt
     * the beginning of the string has to match the pattern, but the whole string
     * doesn't have to be "accounted for".
     *
     * @return whether the tree matches the pattern
     */
    public abstract boolean matches();

    /** Resets the matcher and tests if it matches on the tree when rooted at {@code node}.
     *
     *  @param node The node where the match is checked
     *  @return whether the matcher matches at node
     */
    public boolean matchesAt(Tree node) {
        resetChildIter(node);
        return matches();
    }

    /**
     * Get the last matching tree -- that is, the tree node that matches the root node of the pattern.
     * Returns null if there has not been a match.
     *
     * @return last match
     */
    public abstract Tree getMatch();

    /**
     * Find the next match of the pattern on the tree.
     *
     * @return whether there is a match somewhere in the tree
     */
    public boolean find() {
        if (findIterator == null) {
            findIterator = root.iterator();
        }
        if (findCurrent != null && matches()) {
            return true;
        }
        while (findIterator.hasNext()) {
            findCurrent = findIterator.next();
            resetChildIter(findCurrent);
            if (matches()) {
                return true;
            }
        }
        return false;
    }

    /**
     * Similar to {@code find()}, but matches only if {@code node} is
     * the root of the match.  All other matches are ignored.  If you
     * know you are looking for matches with a particular root, this is
     * much faster than iterating over all matches and taking only the
     * ones that work and faster than altering the tregex to match only
     * the correct node.
     * <br>
     * If called multiple times with the same node, this will return
     * subsequent matches in the same manner as find() returns
     * subsequent matches in the same tree.  If you want to call this using
     * the same TregexMatcher on more than one node, call reset() first;
     * otherwise, an AssertionError will be thrown.
     */
    public boolean findAt(Tree node) {
        if (findCurrent != null && findCurrent != node) {
            throw new AssertionError("Error: must call reset() before changing nodes for a call to findAt");
        }
        if (findCurrent != null) {
            return matches();
        }
        findCurrent = node;
        resetChildIter(findCurrent);
        return matches();
    }

    /**
     * Find the next match of the pattern on the tree such that the
     * matching node (that is, the tree node matching the root node of
     * the pattern) differs from the previous matching node.
     * @return true iff another matching node is found.
     */
    public boolean findNextMatchingNode() {
        Tree lastMatchingNode = getMatch();
        while (find()) {
            if (getMatch() != lastMatchingNode)
                return true;
        }
        return false;
    }

    /**
     * Returns the node labeled with {@code name} in the pattern.
     *
     * @param name the name of the node, specified in the pattern.
     * @return node labeled by the name
     */
    public Tree getNode(String name) {
        return namesToNodes.get(name);
    }

    public Set<String> getNodeNames() {
        return namesToNodes.keySet();
    }

    Tree getParent(Tree node) {
        if (node instanceof HasParent) {
            return node.parent();
        }
        if (nodesToParents == null) {
            nodesToParents = new IdentityHashMap<>();
        }
        if (nodesToParents.isEmpty()) {
            fillNodesToParents(root, null);
        }
        return nodesToParents.get(node);
    }

    private void fillNodesToParents(Tree node, Tree parent) {
        nodesToParents.put(node, parent);
        for (Tree child : node.children()) {
            fillNodesToParents(child, node);
        }
    }

    Tree getRoot() {
        return root;
    }

    /**
     * If there is a current match, and that match involves setting this
     * particular variable string, this returns that string.  Otherwise,
     * it returns null.
     */
    public String getVariableString(String var) {
        return variableStrings.getString(var);
    }

}