Example usage for edu.stanford.nlp.ling StringLabel StringLabel

List of usage examples for edu.stanford.nlp.ling StringLabel StringLabel

Introduction

In this page you can find the example usage for edu.stanford.nlp.ling StringLabel StringLabel.

Prototype

public StringLabel(Label label) 

Source Link

Document

Create a new StringLabel with the value() of another label as its label.

Usage

From source file:qmul.align.TurnConcatSimilarityMeasure.java

License:Open Source License

/**
 * @param t/*from   ww w .  j  a va 2s .  c  o  m*/
 * @return the result of concatenating all sentences linearly for transcription, as daughters of a "TURN" mother for
 *         syntax (unless there's just one, in which case it's just copied without a TURN mother, to prevent false
 *         positive similarity between single-sent turns)
 */
private DialogueSentence concatTurn(DialogueTurn t) {
    DialogueSentence cs = new DialogueSentence(null, 0, t, "");
    System.out.print("Concatenating sentences for turn " + t.getId());
    for (DialogueSentence s : t.getSents()) {
        System.out.print(".");
        if (s.getTranscription() != null) {
            cs.setTranscription((cs.getTranscription() + " " + s.getTranscription()).trim());
        }
        if (s.getTokens() != null) {
            if (cs.getTokens() == null) {
                cs.setTokens(s.getTokens());
            } else {
                cs.getTokens().addAll(s.getTokens());
            }
        }
        if (s.getSyntax() != null) {
            Tree tree;
            if (cs.getSyntax() == null) {
                tree = s.getSyntax();
            } else {
                ArrayList<Tree> dtrs = new ArrayList<Tree>();
                if (cs.getSyntax().label().value().equals("TURN")) {
                    for (Tree child : cs.getSyntax().getChildrenAsList()) {
                        dtrs.add(child);
                    }
                } else {
                    dtrs.add(cs.getSyntax());
                }
                dtrs.add(s.getSyntax());
                tree = new LabeledScoredTreeNode(new StringLabel("TURN"), dtrs);
            }
            cs.setSyntax(tree);
        }
        if (!Double.isNaN(s.getSyntaxProb())) {
            cs.setSyntaxProb(Double.isNaN(cs.getSyntaxProb()) ? s.getSyntaxProb()
                    : (cs.getSyntaxProb() * s.getSyntaxProb()));
        }
    }
    System.out.println(" done.");
    return cs;
}

From source file:qmul.util.parse.CreateTreeFromClarkCurranCCGProlog.java

License:Open Source License

/**
 * @param reader/*  ww  w.  ja v a2 s .  c o  m*/
 *            a {@link BufferedReader}
 * @return the Stanford {@link Tree}
 */
public static Tree makeTree(BufferedReader reader) {
    if (options == null) {
        setDefaultOptions();
    }

    NodeFilter nodeFilter = new NodeFilter();
    String line = null;
    boolean doingTree = false;
    boolean doingWords = false;
    HashMap<Integer, Tree> leaves = new HashMap<Integer, Tree>();
    Tree currentNode = null;
    Tree rootNode = null;
    int treeLevel = 0;
    try {
        while ((line = reader.readLine()) != null) {
            line = line.trim();
            // first we need to get the ccg/2 tree structure
            if (line.startsWith("ccg(")) {
                doingTree = true;
                doingWords = false;
                treeLevel = 1;
                // nothing useful on the actual ccg functor line
                continue;
            }
            // next the w/8 word definitions
            if (line.startsWith("w(")) {
                if (!doingTree && !doingWords) {
                    // if we've hit the word definitions without seeing a tree, stop
                    return null;
                }
                doingTree = false;
                doingWords = true;
            }
            if (doingTree) {
                Matcher m = LEAF_PAT.matcher(line);
                if (m.find()) {
                    // System.out.println("matched leaf " + line);
                    Tree nonTerminal = tf.newTreeNode(getSynLabel(m.group(3)), new ArrayList<Tree>());
                    if (rootNode == null) {
                        rootNode = nonTerminal;
                    } else {
                        currentNode.addChild(nonTerminal);
                    }
                    Tree leaf = tf.newLeaf("DUMMY");
                    nonTerminal.addChild(leaf);
                    leaves.put(Integer.parseInt(m.group(2)), leaf);
                    // adjust currentNode
                    int numOpening = line.replaceAll("[^(]", "").length();
                    int numClosing = line.replaceAll("\\)\\.$", "").replaceAll("[^)]", "").length();
                    int levelChange = numOpening - numClosing;
                    if (levelChange > 0) {
                        throw new RuntimeException("deepening with leaf node!");
                    } else if (levelChange < 0) {
                        do {
                            // System.out.println("cu node " + currentNode.label());
                            currentNode = currentNode.parent(rootNode);
                            // System.out.println("up node " + (currentNode == null ? null : currentNode.label()));
                            treeLevel--;
                            levelChange++;
                        } while (levelChange < 0);
                    }
                    continue;
                }
                m = RULE_PAT.matcher(line);
                if (m.find()) {
                    // System.out.println("matched rule " + line);
                    treeLevel++;
                    Tree node = tf.newTreeNode(getSynLabel(m.group(2)), new ArrayList<Tree>());
                    if (rootNode == null) {
                        rootNode = node;
                    }
                    if (currentNode != null) {
                        currentNode.addChild(node);
                    }
                    currentNode = node;
                    // System.out.println("current node " + node.label());
                    continue;
                }
                m = LEXR_PAT.matcher(line);
                if (m.find()) {
                    // System.out.println("matched lexr " + line);
                    treeLevel++;
                    Tree node = tf.newTreeNode(getSynLabel(m.group(3)), new ArrayList<Tree>());
                    if (rootNode == null) {
                        rootNode = node;
                    }
                    if (currentNode != null) {
                        currentNode.addChild(node);
                    }
                    currentNode = node;
                    // System.out.println("current node " + node.label());
                    continue;
                }
                m = CONJ_PAT.matcher(line);
                if (m.find()) {
                    // System.out.println("matched conj " + line);
                    treeLevel++;
                    Tree node = tf.newTreeNode(getSynLabel(m.group(4)), new ArrayList<Tree>());
                    if (rootNode == null) {
                        rootNode = node;
                    }
                    if (currentNode != null) {
                        currentNode.addChild(node);
                    }
                    currentNode = node;
                    // System.out.println("current node " + node.label());
                    continue;
                }
                throw new RuntimeException("no match for line " + line);
            }
            if (doingWords) {
                Matcher m = WORD_PAT.matcher(line);
                if (m.find()) {
                    Tree leaf = leaves.get(Integer.parseInt(m.group(2)));
                    if (leaf == null) {
                        throw new RuntimeException("Missing leaf " + m.group(2));
                    }
                    leaf.setLabel(new StringLabel(m.group(3)));
                    leaves.remove(Integer.parseInt(m.group(2)));
                } else {
                    if (line.isEmpty()) {
                        doingWords = false;
                        if (!leaves.isEmpty()) {
                            throw new RuntimeException("unmatched leaves " + leaves);
                        }
                        continue;
                    } else {
                        throw new RuntimeException("strange word line " + line);
                    }
                }
                continue;
            }
        }
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        System.exit(0);
    }
    // prune to (optionally) remove punctuation nodes etc, then flatten to remove their dedicated parents
    if (rootNode != null) {
        //         System.out.println();
        //         System.out.println("raw tree " + rootNode.pennString());
        //         System.out.println("pru tree " + rootNode.prune(nodeFilter).pennString());
        //         System.out.println("fla tree " + rootNode.prune(nodeFilter).flatten().pennString());
        //         rootNode = rootNode.prune(nodeFilter).flatten();
    }
    return rootNode;
}

From source file:qmul.util.parse.CreateTreeFromClarkCurranCCGProlog.java

License:Open Source License

/**
 * @param str//from w  w w .  j ava 2s .c  o  m
 * @return a {@link StringLabel} based on the string, optionally removing [subcats] i.e. S[dcl]/S[b] -> S/S
 */
private static Label getSynLabel(String str) {
    if (getOption(REMOVE_SQUARE_BRACKET_SUBCATS)) {
        str = str.replaceAll("\\[(.*?)\\]", "");
    }
    return new StringLabel(str);
}

From source file:qmul.util.parse.StanfordParser.java

License:Open Source License

/**
 * Convenience method: splits utt into sentences, uses {@link LexicalizedParser}'s parse() to tokenize and parse
 * each sentence/*from w w w .  j av  a2s.co  m*/
 * 
 * @param utt
 * @return a {@link Tree} with ROOT node, with the getBestParse() trees for each sentence as children
 */
public Tree parse(String utt) {
    String[] sentences = utt.split("[.!?]");
    // System.out.println("there are sentences:" + sentences.length);
    // LinkedList<Tree> list=new LinkedList<Tree>();
    Label rootLabel = new StringLabel("ROOT");
    Tree concat = new LabeledScoredTreeNode(rootLabel, new LinkedList<Tree>());

    try {
        for (int i = 0; i < sentences.length; i++) {
            boolean parsed = false;
            if (sentences[i].length() > 0)
                parsed = lp.parse(sentences[i]);
            else
                continue;
            Tree t = lp.getBestParse();
            Tree rootChild;
            if (t.children().length == 1)
                rootChild = t.removeChild(0);
            else
                rootChild = t;
            concat.addChild(rootChild);
        }
        if (concat.children().length > 1)
            return concat;
        else
            return concat.removeChild(0);
    } catch (Throwable t) {
        System.out.println(t.getMessage());
        System.out.println("Reinitializing parser because of trying to parse error " + utt);
        this.lp = null;
        Runtime r = Runtime.getRuntime();
        r.gc();
        lp = new LexicalizedParser(System.getProperty("user.dir") + File.separator + "utils" + File.separator
                + "englishPCFG.ser.gz");
        this.lp.setOptionFlags(new String[] { "-maxLength", "100", "-retainTmpSubcategories" });
        return null;
    }

}

From source file:reck.corpora.DocumentImpl.java

License:Open Source License

public RECKParseTreeImpl findDPHeadWord(RECKParseTreeImpl parseTree, MentionImpl mention) {

    /*if (mention.getId().equals("54-86") || mention.getId().equals("52-85"))
    System.out.println();*///from w  ww .j a  v a2 s .  com

    RECKDPTreeNodeImpl DPTreeNode = parseTree.getDPParseTree();
    int start = mention.getHead().getStart().intValue();
    int end = mention.getHead().getEnd().intValue();

    ArrayList leaves = parseTree.getDPTreeList();

    int n = leaves.size(), i = 0, j = n - 1, k;

    int leftIndex = ((RECKDPTreeNodeImpl) leaves.get(i)).getPosition().getStart().intValue();
    int rightIndex = ((RECKDPTreeNodeImpl) leaves.get(j)).getPosition().getEnd().intValue();
    int leftID = leftIndex, rightID = rightIndex;
    RECKDPTreeNodeImpl leftNode = (RECKDPTreeNodeImpl) leaves.get(i),
            rightNode = (RECKDPTreeNodeImpl) leaves.get(j);

    while (i < n - 1 && leftIndex < start) {
        i++;
        leftID = leftIndex;
        leftNode = (RECKDPTreeNodeImpl) leaves.get(i);
        leftIndex = leftNode.getPosition().getStart().intValue();
    }
    while (j > 0 && end < rightIndex) {
        j--;
        rightID = rightIndex;
        rightNode = (RECKDPTreeNodeImpl) leaves.get(j);
        rightIndex = rightNode.getPosition().getEnd().intValue();
    }

    if ((leftIndex > start) && (leftID == start - 1)) {
        i--;
        leftNode = (RECKDPTreeNodeImpl) leaves.get(i);
        leftIndex = leftNode.getPosition().getStart().intValue();
    }
    if ((end > rightIndex) && (rightID == end + 1)) {
        j++;
        rightNode = (RECKDPTreeNodeImpl) leaves.get(j);
        rightIndex = rightNode.getPosition().getEnd().intValue();
    }

    leftID = i;
    rightID = j;

    RECKDPTreeNodeImpl terminal = null;

    if (leftID < rightID) {
        for (k = rightID; k >= leftID; k--) {
            terminal = (RECKDPTreeNodeImpl) leaves.get(k);

            // re-define the head word of the mention when a preposition exists
            if (terminal.role().equals("prep") && (k > leftID)) {
                k--;
                break;
            }
        }
    } else {
        k = leftID;
    }

    RECKDPTreeNodeImpl origin = (k >= leftID) ? (RECKDPTreeNodeImpl) leaves.get(k)
            : (RECKDPTreeNodeImpl) leaves.get(rightID);
    if (k >= leftID) {
        origin = (RECKDPTreeNodeImpl) leaves.get(k);
        rightIndex = origin.getPosition().getEnd().intValue();
        rightID = k;
    } else {
        origin = (RECKDPTreeNodeImpl) leaves.get(rightID);
    }

    StringLabel newLabel = new StringLabel(mention.getEntity().getType());
    Charseq newPosition = null;
    RECKDPTreeNodeImpl newNode = null;

    newPosition = origin.getPosition().clone();
    mention.setHeadword(RECKConstants.trimReturn(noTaggedContent
            .substring(origin.getPosition().getStart().intValue(), origin.getPosition().getEnd().intValue())));
    mention.setHwPosition(new Charseq(origin.getPosition().getStart().intValue(),
            origin.getPosition().getEnd().intValue() - 1));

    RECKDPTreeNodeImpl upper = origin.parent(DPTreeNode);
    Tree[] newChildren = { origin };
    int index = upper.indexOf(origin);
    newNode = new RECKDPTreeNodeImpl(newLabel, newChildren, newPosition);
    upper.setChild(index, newNode);

    ArrayList nodeList = (ArrayList) parseTree.getDPEntityTrees().get(mention);
    if (nodeList == null) {
        nodeList = new ArrayList();
        nodeList.add(newNode);
        parseTree.getDPEntityTrees().put(mention, nodeList);
    }

    return parseTree;
}

From source file:reck.corpora.DocumentImpl.java

License:Open Source License

public RECKParseTreeImpl findCTHeadWord(RECKParseTreeImpl parseTree, MentionImpl mention) {

    RECKCTTreeNodeImpl CTTreeNode = parseTree.getCTParseTree();
    int start = mention.getHead().getStart().intValue();
    int end = mention.getHead().getEnd().intValue();

    ArrayList leaves = new ArrayList(CTTreeNode.getLeaves());

    int n = leaves.size(), i = 0, j = n - 1, k;

    int leftIndex = ((RECKCTTreeNodeImpl) leaves.get(i)).getPosition().getStart().intValue();
    int rightIndex = ((RECKCTTreeNodeImpl) leaves.get(j)).getPosition().getEnd().intValue();
    int leftID = leftIndex, rightID = rightIndex;
    RECKCTTreeNodeImpl leftNode = (RECKCTTreeNodeImpl) leaves.get(i),
            rightNode = (RECKCTTreeNodeImpl) leaves.get(j);

    while (i < n - 1 && leftIndex < start) {
        i++;/*from  www .  j a  va2s . c o m*/
        leftID = leftIndex;
        leftNode = (RECKCTTreeNodeImpl) leaves.get(i);
        leftIndex = leftNode.getPosition().getStart().intValue();
    }
    while (j > 0 && end < rightIndex) {
        j--;
        rightID = rightIndex;
        rightNode = (RECKCTTreeNodeImpl) leaves.get(j);
        rightIndex = rightNode.getPosition().getEnd().intValue();
    }

    if ((leftIndex > start) && (leftID == start - 1)) {
        i--;
        leftNode = (RECKCTTreeNodeImpl) leaves.get(i);
        leftIndex = leftNode.getPosition().getStart().intValue();
    }
    if ((end > rightIndex) && (rightID == end + 1)) {
        j++;
        rightNode = (RECKCTTreeNodeImpl) leaves.get(j);
        rightIndex = rightNode.getPosition().getEnd().intValue();
    }

    leftID = i;
    rightID = j;

    RECKCTTreeNodeImpl terminal = null;
    RECKCTTreeNodeImpl preTerminal = null;
    RECKCTTreeNodeImpl prePreTerminal = null;

    if (leftID < rightID) {
        for (k = rightID; k >= leftID; k--) {
            terminal = (RECKCTTreeNodeImpl) leaves.get(k);
            preTerminal = terminal.parent(CTTreeNode);
            prePreTerminal = preTerminal.parent(CTTreeNode);

            // re-define the head word of the mention when a preposition exists
            if ((preTerminal.label().value().equals("IN")) && (prePreTerminal.label().value().equals("PP"))
                    && k > leftID) {
                k--;
                break;
            }
        }
    } else {
        k = leftID;
    }

    RECKCTTreeNodeImpl origin = (k >= leftID) ? (RECKCTTreeNodeImpl) leaves.get(k)
            : (RECKCTTreeNodeImpl) leaves.get(rightID);
    if (k >= leftID) {
        origin = (RECKCTTreeNodeImpl) leaves.get(k);
        rightIndex = origin.getPosition().getEnd().intValue();
        rightID = k;
    } else {
        origin = (RECKCTTreeNodeImpl) leaves.get(rightID);
    }
    RECKCTTreeNodeImpl upper = origin.parent(CTTreeNode);
    while ((upper.getPosition().getStart().intValue() >= leftIndex)
            && (upper.getPosition().getEnd().intValue() == rightIndex)) {
        origin = upper;
        upper = upper.parent(CTTreeNode);
    }

    StringLabel newLabel = new StringLabel(mention.getEntity().getType());
    Charseq newPosition = null;
    RECKCTTreeNodeImpl newNode = null;

    /** The case where upper covers more than mention head 
     */
    if ((upper.getPosition().getStart().intValue() < leftIndex)
            && (origin.getPosition().getStart().intValue() > leftIndex)) {
        RECKCTTreeNodeImpl child = origin;
        int r = upper.indexOf(child), l = r;
        while (child.getPosition().getStart().intValue() > leftIndex) {
            l--;
            child = (RECKCTTreeNodeImpl) upper.getChild(l);
        }
        if (child.getPosition().getStart().intValue() == leftIndex) {
            RECKCTTreeNodeImpl leftChild = (RECKCTTreeNodeImpl) upper.getChild(l);
            RECKCTTreeNodeImpl rightChild = (RECKCTTreeNodeImpl) upper.getChild(r);
            leftIndex = leftChild.getPosition().getStart().intValue();
            rightIndex = rightChild.getPosition().getEnd().intValue();
            newPosition = new Charseq(leftIndex, rightIndex);
            mention.setHeadword(RECKConstants.trimReturn(noTaggedContent.substring(leftIndex, rightIndex)));
            mention.setHwPosition(new Charseq(leftIndex, rightIndex - 1));

            Tree[] children = new Tree[r - l + 1];

            for (int m = l; m < r + 1; m++)
                children[m - l] = upper.getChild(m);

            newNode = new RECKCTTreeNodeImpl(newLabel, children, newPosition);
            Tree[] newChildren = new Tree[upper.numChildren() - newNode.numChildren() + 1];

            for (int m = 0; m < l; m++) {
                newChildren[m] = upper.getChild(m);
            }

            newChildren[l] = newNode;

            for (int m = r + 1; m < upper.numChildren(); m++) {
                newChildren[m - r + l] = upper.getChild(m);
            }
            upper.setChildren(newChildren);

        }
    }

    if (newNode == null) {
        newPosition = origin.getPosition().clone();
        mention.setHeadword(RECKConstants.trimReturn(noTaggedContent.substring(
                origin.getPosition().getStart().intValue(), origin.getPosition().getEnd().intValue())));
        mention.setHwPosition(new Charseq(origin.getPosition().getStart().intValue(),
                origin.getPosition().getEnd().intValue() - 1));

        if (origin.isPreTerminal()) {
            Tree[] newChildren = { origin };
            int index = upper.indexOf(origin);
            newNode = new RECKCTTreeNodeImpl(newLabel, newChildren, newPosition);
            upper.setChild(index, newNode);
        } else {
            newNode = new RECKCTTreeNodeImpl(newLabel, origin.children(), newPosition);
            Tree[] newChildren = { newNode };
            origin.setChildren(newChildren);
        }
    }

    ArrayList nodeList = (ArrayList) parseTree.getCTEntityTrees().get(mention);
    if (nodeList == null) {
        nodeList = new ArrayList();
        nodeList.add(newNode);
        parseTree.getCTEntityTrees().put(mention, nodeList);
    }

    return parseTree;
}

From source file:reck.parser.lexparser.RECKLexicalizedParser.java

License:Open Source License

public void computePosition(int start, Sentence sentence, String content) {

    int docIndex = start;
    String st = null;/*from  w ww .  j av a 2 s  .  co  m*/

    reckTreeList = new ArrayList();

    for (int i = 0; i < sentence.size(); i++) {
        st = ((Word) sentence.get(i)).toString();
        int index = content.indexOf(st, docIndex);
        if (index == -1 || index - docIndex > maxDistanceBetweenLeaves) {
            if (st.indexOf("&") != -1) {
                String tmp = st.replaceAll("&", "&amp;");
                index = content.indexOf(tmp, docIndex);
                if (index == -1 || index - docIndex > maxDistanceBetweenLeaves) {
                    tmp = st.replaceAll("&", "&AMP;");
                    index = content.indexOf(tmp, docIndex);
                }
            }
            if (index != -1 && index - docIndex <= maxDistanceBetweenLeaves) {
                docIndex = index + st.length() + 4;
            } else {
                st = reConvert(st);
                index = content.indexOf(st, docIndex);
                if (index == -1 || index - docIndex > maxDistanceBetweenLeaves) {
                    if (st.equals("-LRB-") || st.equals("-LCB-")) {
                        int i1 = content.indexOf("(", docIndex);
                        int i2 = content.indexOf("[", docIndex);
                        int i3 = content.indexOf("{", docIndex);
                        if (i1 == -1)
                            i1 = content.length();
                        if (i2 == -1)
                            i2 = content.length();
                        if (i3 == -1)
                            i3 = content.length();

                        if ((i1 == i2) && (i1 == i3))
                            System.out.println("Come here !");
                        else if (i1 < i2) {
                            if (i3 < i1) {
                                // st = "{";
                                index = i3;
                            } else {
                                // st = "(";
                                index = i1;
                            }
                        } else {
                            if (i3 < i2) {
                                // st = "{";
                                index = i3;
                            } else {
                                // st = "[";
                                index = i2;
                            }
                        }
                        docIndex = index + 1;
                    }

                    else if (st.equals("-RRB-") || st.equals("-RCB-")) {
                        int i1 = content.indexOf(")", docIndex);
                        int i2 = content.indexOf("]", docIndex);
                        int i3 = content.indexOf("}", docIndex);
                        if (i1 == -1)
                            i1 = content.length();
                        if (i2 == -1)
                            i2 = content.length();
                        if (i3 == -1)
                            i3 = content.length();

                        if ((i1 == i2) && (i1 == i3))
                            System.out.println("Come here !");
                        else if (i1 < i2) {
                            if (i3 < i1) {
                                // st = "}";
                                index = i3;
                            } else {
                                // st = ")";
                                index = i1;
                            }
                        } else {
                            if (i3 < i2) {
                                // st = "}";
                                index = i3;
                            } else {
                                // st = "]";
                                index = i2;
                            }
                        }
                        docIndex = index + 1;
                    }

                    else {

                        for (int k = 0; k < newStrings.length; k++) {
                            st = st.replace(newStrings[k], oldStrings[k]);
                        }

                        String oldSubSt1 = new String(new char[] { (char) 39, (char) 39 });
                        String oldSubSt2 = new String(new char[] { (char) 96, (char) 96 });
                        String newSubSt = new String(new char[] { (char) 34 });
                        if (st.indexOf(oldSubSt1) != -1 && content.substring(docIndex).indexOf(newSubSt) != -1)
                            st = st.replace(oldSubSt1, newSubSt);
                        else if (st.indexOf(oldSubSt2) != -1
                                && content.substring(docIndex).indexOf(newSubSt) != -1)
                            st = st.replace(oldSubSt2, newSubSt);

                        int i39 = content.indexOf(39, docIndex);
                        int i96 = content.indexOf(96, docIndex);

                        if ((st.indexOf(39) != -1) && (i96 != -1 && i96 - docIndex <= maxDistanceBetweenLeaves))
                            st = st.replace((char) 39, (char) 96);
                        else if ((st.indexOf(96) != -1)
                                && (i39 != -1 && i39 - docIndex <= maxDistanceBetweenLeaves))
                            st = st.replace((char) 96, (char) 39);

                        index = content.indexOf(st, docIndex);
                        if (index == -1 || index - docIndex > maxDistanceBetweenLeaves)
                            System.out.println("Come here !");
                        else
                            docIndex = index + st.length();
                    }
                } else
                    docIndex = index + st.length();
            }
        } else
            docIndex = index + st.length();

        // Test if next node is a sentence splitter, means "."
        if (st.endsWith(".") && i < sentence.size() - 1) {
            String nextLabel = ((Word) sentence.get(i + 1)).toString();
            int nextIndex = content.indexOf(nextLabel, docIndex);

            if (nextLabel.equals(".") && (nextIndex == -1 || nextIndex - docIndex > maxDistanceBetweenLeaves)) {
                docIndex--;
                st = st.substring(0, st.length() - 2);
            }
        }
        // ((Word)sentence.get(i)).setWord(st);

        RECKDPTreeNodeImpl reckNode = new RECKDPTreeNodeImpl(new StringLabel(st), new Charseq(index, docIndex));
        reckTreeList.add(reckNode);

    }

    sentencePosition = new Charseq(start, docIndex);

}

From source file:reck.parser.lexparser.RECKLexicalizedParser.java

License:Open Source License

public RECKCTTreeNodeImpl convertToRECKTree(Tree root, int startSentence, String content) {

    RECKCTTreeNodeImpl newRoot = null;/*from w  ww  . j  a v  a 2s . c o  m*/

    Charseq pos = null;

    List nodeList = root.getLeaves();
    HashSet parentSet = new HashSet();
    int docIndex = startSentence;
    String st = null;

    // compute leaves' positions
    for (int i = 0; i < nodeList.size(); i++) {
        Tree oldNode = (Tree) nodeList.get(i);
        st = oldNode.toString();

        int start = content.indexOf(st, docIndex);
        if (start == -1 || start - docIndex > maxDistanceBetweenLeaves) {
            if (st.indexOf("&") != -1) {
                String tmp = st.replaceAll("&", "&amp;");
                start = content.indexOf(tmp, docIndex);
                if (start == -1 || start - docIndex > maxDistanceBetweenLeaves) {
                    tmp = st.replaceAll("&", "&AMP;");
                    start = content.indexOf(tmp, docIndex);
                }
            }
            if (start != -1 && start - docIndex <= maxDistanceBetweenLeaves) {
                docIndex = start + st.length() + 4;
            } else {
                st = reConvert(st);
                start = content.indexOf(st, docIndex);
                if (start == -1 || start - docIndex > maxDistanceBetweenLeaves) {
                    if (st.equals("-LRB-") || st.equals("-LCB-")) {
                        int i1 = content.indexOf("(", docIndex);
                        int i2 = content.indexOf("[", docIndex);
                        int i3 = content.indexOf("{", docIndex);
                        if (i1 == -1)
                            i1 = content.length();
                        if (i2 == -1)
                            i2 = content.length();
                        if (i3 == -1)
                            i3 = content.length();

                        if ((i1 == i2) && (i1 == i3))
                            System.out.println("Come here !");
                        else if (i1 < i2) {
                            if (i3 < i1) {
                                // st = "{";
                                start = i3;
                            } else {
                                // st = "(";
                                start = i1;
                            }
                        } else {
                            if (i3 < i2) {
                                // st = "{";
                                start = i3;
                            } else {
                                // st = "[";
                                start = i2;
                            }
                        }
                        docIndex = start + 1;
                    }

                    else if (st.equals("-RRB-") || st.equals("-RCB-")) {
                        int i1 = content.indexOf(")", docIndex);
                        int i2 = content.indexOf("]", docIndex);
                        int i3 = content.indexOf("}", docIndex);
                        if (i1 == -1)
                            i1 = content.length();
                        if (i2 == -1)
                            i2 = content.length();
                        if (i3 == -1)
                            i3 = content.length();

                        if ((i1 == i2) && (i1 == i3))
                            System.out.println("Come here !");
                        else if (i1 < i2) {
                            if (i3 < i1) {
                                // st = "}";
                                start = i3;
                            } else {
                                // st = ")";
                                start = i1;
                            }
                        } else {
                            if (i3 < i2) {
                                // st = "}";
                                start = i3;
                            } else {
                                // st = "]";
                                start = i2;
                            }
                        }
                        docIndex = start + 1;
                    }

                    else {

                        for (int k = 0; k < newStrings.length; k++) {
                            st = st.replace(newStrings[k], oldStrings[k]);
                        }

                        String oldSubSt1 = new String(new char[] { (char) 39, (char) 39 });
                        String oldSubSt2 = new String(new char[] { (char) 96, (char) 96 });
                        String newSubSt = new String(new char[] { (char) 34 });
                        if (st.indexOf(oldSubSt1) != -1 && content.substring(docIndex).indexOf(newSubSt) != -1)
                            st = st.replace(oldSubSt1, newSubSt);
                        else if (st.indexOf(oldSubSt2) != -1
                                && content.substring(docIndex).indexOf(newSubSt) != -1)
                            st = st.replace(oldSubSt2, newSubSt);

                        int i39 = content.indexOf(39, docIndex);
                        int i96 = content.indexOf(96, docIndex);

                        if ((st.indexOf(39) != -1) && (i96 != -1 && i96 - docIndex <= maxDistanceBetweenLeaves))
                            st = st.replace((char) 39, (char) 96);
                        else if ((st.indexOf(96) != -1)
                                && (i39 != -1 && i39 - docIndex <= maxDistanceBetweenLeaves))
                            st = st.replace((char) 96, (char) 39);

                        start = content.indexOf(st, docIndex);
                        if (start == -1 || start - docIndex > maxDistanceBetweenLeaves)
                            System.out.println("Come here !");
                        else
                            docIndex = start + st.length();
                    }
                } else
                    docIndex = start + st.length();
            }
        } else
            docIndex = start + st.length();

        // Test if next node is a sentence splitter, means "."
        if (st.endsWith(".") && i < nodeList.size() - 1) {
            Tree nextNode = (Tree) nodeList.get(i + 1);
            String nextLabel = nextNode.label().value();
            int nextStart = content.indexOf(nextLabel, docIndex);

            if (nextLabel.equals(".") && (nextStart == -1 || nextStart - docIndex > maxDistanceBetweenLeaves)) {
                docIndex--;
                oldNode.setLabel(new StringLabel(st.substring(0, st.length() - 1)));
            }
        }

        pos = new Charseq(start, docIndex);
        RECKCTTreeNodeImpl newNode = new RECKCTTreeNodeImpl(new StringLabel(st),
                (List) oldNode.getChildrenAsList(), pos);
        Tree parent = oldNode.parent(root);
        parent.setChild(parent.indexOf(oldNode), newNode);
        parentSet.add(parent);
    }

    nodeList.clear();
    nodeList.addAll(parentSet);

    // compute upper nodes' positions
    while (!nodeList.isEmpty()) {
        parentSet = new HashSet();
        for (int i = 0; i < nodeList.size(); i++) {
            Tree oldNode = (Tree) nodeList.get(i);
            Iterator nodeIter = oldNode.getChildrenAsList().iterator();
            Tree node = (Tree) nodeIter.next();
            while (node instanceof RECKCTTreeNodeImpl && nodeIter.hasNext()) {
                node = (Tree) nodeIter.next();
            }
            if (node instanceof RECKCTTreeNodeImpl) {
                Long start = ((RECKCTTreeNodeImpl) oldNode.firstChild()).getPosition().getStart();
                Long end = ((RECKCTTreeNodeImpl) oldNode.lastChild()).getPosition().getEnd();
                pos = new Charseq(start, end);
                RECKCTTreeNodeImpl newNode = new RECKCTTreeNodeImpl(oldNode.label(),
                        (List) oldNode.getChildrenAsList(), pos);
                Tree parent = oldNode.parent(root);
                parent.setChild(parent.indexOf(oldNode), newNode);
                parentSet.add(parent);

                // if oldNode is in parentSet, remove it
                if (parentSet.contains(oldNode)) {
                    parentSet.remove(oldNode);
                }
            } else {
                parentSet.add(oldNode);
            }
        }

        nodeList.clear();
        if (parentSet.size() == 1 && parentSet.contains(root)) {
            Long start = ((RECKCTTreeNodeImpl) root.firstChild()).getPosition().getStart();
            Long end = ((RECKCTTreeNodeImpl) root.lastChild()).getPosition().getEnd();
            pos = new Charseq(start, end);
            newRoot = new RECKCTTreeNodeImpl(root.label(), (List) root.getChildrenAsList(), pos);
        } else {
            nodeList.addAll(parentSet);
        }
    }

    return newRoot;

}

From source file:reck.parser.lexparser.RECKLexicalizedParser.java

License:Open Source License

public RECKCTTreeNodeImpl[] splitHyphenSt_Constituent(RECKCTTreeNodeImpl node, RECKCTTreeNodeImpl parent) {
    String label = node.label().value();
    String subSt[] = label.split("-");
    int n = subSt.length;
    long index = node.getPosition().getStart();
    RECKCTTreeNodeImpl preTerminalNode[] = new RECKCTTreeNodeImpl[2 * n - 1];

    for (int i = 0; i < n; i++) {
        StringLabel leafLb = new StringLabel(subSt[i]);
        Charseq leafPos = new Charseq(index, index + subSt[i].length());
        RECKCTTreeNodeImpl leafNode = new RECKCTTreeNodeImpl(leafLb, leafPos);
        preTerminalNode[2 * i] = new RECKCTTreeNodeImpl(new StringLabel(parent.label().value()),
                new RECKCTTreeNodeImpl[] { leafNode }, leafPos);
        index += subSt[i].length();//w w  w . j  ava2 s.  co  m

        if (i < n - 1) {
            StringLabel hyphenLb = new StringLabel("-");
            Charseq hyphenPos = new Charseq(index, index + 1);
            RECKCTTreeNodeImpl hyphenNode = new RECKCTTreeNodeImpl(hyphenLb, hyphenPos);
            preTerminalNode[2 * i + 1] = new RECKCTTreeNodeImpl(new StringLabel(parent.label().value()),
                    new RECKCTTreeNodeImpl[] { hyphenNode }, hyphenPos);
            index++;
        }

    }

    return preTerminalNode;
}

From source file:reck.parser.lexparser.RECKLexicalizedParser.java

License:Open Source License

public RECKCTTreeNodeImpl[] splitPointSt_Constituent(RECKCTTreeNodeImpl node, RECKCTTreeNodeImpl parent) {
    String label = node.label().value();
    int startNode = node.getPosition().getStart().intValue();
    int endNode = node.getPosition().getEnd().intValue();
    int lenNode = label.length();
    RECKCTTreeNodeImpl preTerminalNode[] = new RECKCTTreeNodeImpl[2];

    StringLabel leafLb = new StringLabel(label.substring(0, lenNode - 1));
    Charseq leafPos = new Charseq(startNode, endNode - 1);
    RECKCTTreeNodeImpl leafNode = new RECKCTTreeNodeImpl(leafLb, leafPos);
    preTerminalNode[0] = new RECKCTTreeNodeImpl(new StringLabel(parent.label().value()),
            new RECKCTTreeNodeImpl[] { leafNode }, leafPos);

    StringLabel pointLb = new StringLabel(".");
    Charseq pointPos = new Charseq(endNode - 1, endNode);
    RECKCTTreeNodeImpl pointNode = new RECKCTTreeNodeImpl(pointLb, pointPos);
    preTerminalNode[1] = new RECKCTTreeNodeImpl(new StringLabel(parent.label().value()),
            new RECKCTTreeNodeImpl[] { pointNode }, pointPos);

    return preTerminalNode;
}