Example usage for edu.stanford.nlp.trees Tree yield

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree yield.

Prototype

public ArrayList<Label> yield()

Source Link

Document

Gets the yield of the tree.

Usage

From source file:edu.cmu.ark.SentenceSimplifier.java

License:Open Source License

private void extractComplementClauses(Collection<Question> extracted, Question input) {
    Tree subord;// w ww  . j ava2  s  .  c  o  m
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;

    //TODO should also address infinitive complements
    tregexOpStr = "SBAR " + " < (S=sub !< (VP < VBG)) " + //+ //not a participial phrase
            " !> NP|PP " + //not part of a noun phrase or PP (other methods for those)
            " [ $- /^VB.*/=verb | >+(SBAR) (SBAR $- /^VB.*/=verb) ] "; //complement of a VP (follows the verb)

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    while (matcher.find()) {
        Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>());
        subord = matcher.getNode("sub");
        Tree verb = matcher.getNode("verb");
        String verbLemma = AnalysisUtilities.getInstance().getLemma(verb.yield().toString(),
                verb.label().toString());

        if (!verbImpliesComplement(verbLemma)) {
            continue;
        }
        newTree.addChild(subord.deeperCopy());

        AnalysisUtilities.addPeriodIfNeeded(newTree);
        addQuotationMarksIfNeeded(newTree);
        Question newTreeWithFeatures = input.deeperCopy();
        newTreeWithFeatures.setIntermediateTree(newTree);
        if (GlobalProperties.getComputeFeatures())
            newTreeWithFeatures.setFeatureValue("extractedFromFiniteClause", 1.0); //old feature name
        if (GlobalProperties.getComputeFeatures())
            newTreeWithFeatures.setFeatureValue("extractedFromComplementClause", 1.0);
        if (GlobalProperties.getDebug())
            System.err.println("extractComplementClauses: " + newTree.toString());
        addIfNovel(extracted, newTreeWithFeatures);
    }
}

From source file:edu.cmu.ark.SentenceSimplifier.java

License:Open Source License

/**
 * @param args//from   w ww  . j  a v a2  s .c om
 */
public static void main(String[] args) {
    SentenceSimplifier ss = new SentenceSimplifier();
    boolean doMobyDickParse = false;
    boolean treeInput = false;
    boolean verbose = false;
    String propertiesFile = "config" + File.separator + "factual-statement-extractor.properties";

    for (int i = 0; i < args.length; i++) {
        if (args[i].equals("--debug")) {
            GlobalProperties.setDebug(true);
        } else if (args[i].equals("--properties")) {
            propertiesFile = args[i + 1];
        } else if (args[i].equals("--moby")) {
            doMobyDickParse = true;
        } else if (args[i].equals("--break-nps")) {
            ss.setBreakNPs(true);
        } else if (args[i].equals("--comp")) {
            ss.setExtractFromVerbComplements(true);
        } else if (args[i].equals("--tree-input")) {
            treeInput = true;
        } else if (args[i].equals("--verbose")) {
            verbose = true;
            GlobalProperties.setComputeFeatures(true);
        } else if (args[i].equals("--help") || args[i].equals("-help") || args[i].equals("-h")) {
            String helpStr = "A program for extracting simplified factual statements"
                    + " from complex sentences.\n"
                    + "Written by Michael Heilman (mheilman@cmu.edu), March 2010.\n"
                    + "The program takes plain text on standard input and prints output"
                    + " on standard output.  The following options are available:\n\n"
                    + " --debug\t\tprint debugging output to standard error.\n"
                    + " --properties PATH\tload settings from the properties file at PATH\n"
                    + " --break-nps\t\tsplit conjunctions of noun phrases (disabled by default)\n"
                    + " --comp\t\t\textract from verb complements (disabled by default)\n\n";
            System.err.println(helpStr);
            System.exit(0);
        }
    }

    GlobalProperties.loadProperties(propertiesFile);

    if (doMobyDickParse) {
        String parseStr = "(ROOT (S (SBAR (IN As) (S (NP (PRP they)) (VP (VBD narrated) (PP (TO to) (NP (DT each) (NN other))) (NP (NP (PRP$ their) (JJ unholy) (NNS adventures)) (, ,) (NP (NP (PRP$ their) (NNS tales)) (PP (IN of) (NP (NN terror))) (VP (VBN told) (PP (IN in) (NP (NNS words) (PP (IN of) (NP (NN mirth))))))))))) (: ;) (SBAR (IN as) (S (NP (PRP$ their) (JJ uncivilized) (NN laughter)) (VP (VBD forked) (ADVP (RB upwards)) (PP (IN out) (IN of) (NP (PRP them))) (, ,) (PP (IN like) (NP (NP (DT the) (NNS flames)) (PP (IN from) (NP (DT the) (NN furnace)))))))) (: ;) (SBAR (IN as) (S (ADVP (TO to) (CC and) (RB fro)) (, ,) (PP (IN in) (NP (PRP$ their) (NN front))) (, ,) (NP (DT the) (NNS harpooneers)) (VP (ADVP (RB wildly)) (VBD gesticulated) (PP (IN with) (NP (PRP$ their) (JJ huge) (NP (JJ pronged) (NNS forks)) (CC and) (NNS dippers)))))) (: ;) (SBAR (IN as) (S (S (NP (DT the) (NN wind)) (VP (VBD howled) (PRT (RP on)))) (, ,) (CC and) (S (NP (DT the) (NN sea)) (VP (VBD leaped))) (, ,) (CC and) (S (NP (DT the) (NN ship)) (VP (VP (VBD groaned) (CC and) (VBD dived)) (, ,) (CC and) (RB yet) (VP (VP (ADVP (RB steadfastly)) (VBD shot) (NP (PRP$ her) (JJ red) (NN hell)) (ADVP (RBR further) (CC and) (RBR further)) (PP (IN into) (NP (NP (DT the) (NN blackness)) (PP (IN of) (NP (NP (DT the) (NN sea)) (CC and) (NP (DT the) (NN night))))))) (, ,) (CC and) (VP (ADVP (RB scornfully)) (VBD champed) (NP (NP (DT the) (JJ white) (NN bone)) (PP (IN in) (PRP$ her) (NP (NN mouth))))) (, ,) (CC and) (VP (ADVP (RB viciously)) (VBD spat) (PP (IN round) (NP (PRP her))) (PP (IN on) (NP (DT all) (NNS sides))))))))) (: ;) (ADVP (RB then)) (NP (NP (DT the) (JJ rushing) (NNP Pequod)) (, ,) (VP (VP (VBN freighted) (PP (IN with) (NP (NN savages)))) (, ,) (CC and) (VP (VBN laden) (PP (IN with) (NP (NN fire)))) (, ,) (CC and) (VP (VBG burning) (NP (DT a) (NN corpse))) (, ,) (CC and) (VP (VBG plunging) (PP (IN into) (NP (NP (DT that) (NN blackness)) (PP (IN of) (NP (NN darkness))))))) (, ,)) (VP (VBD seemed) (NP (NP (DT the) (JJ material) (NN counterpart)) (PP (IN of) (NP (NP (PRP$ her) (JJ monomaniac) (NN commander) (POS 's)) (NN soul))))) (. .)))";
        Tree mobyParse = AnalysisUtilities.getInstance().readTreeFromString(parseStr);
        Collection<Question> output = ss.simplify(mobyParse);
        for (Question q : output) {
            System.out.println(AnalysisUtilities.getCleanedUpYield(q.getIntermediateTree()));
            //System.out.println(q.findLogicalWordsAboveIntermediateTree());
        }
        System.exit(0);
    }

    String buf, doc;
    Tree parsed;

    //pre-load      
    if (GlobalProperties.getDebug())
        System.err.println("Enter sentence: ");
    try {
        BufferedReader br = new BufferedReader(new InputStreamReader(System.in));

        if (treeInput) {
            doc = "";
            buf = br.readLine();
            doc += buf + " ";
            while (br.ready()) {
                buf = br.readLine();
                if (buf == null)
                    break;
                doc += buf;
            }
            Tree t = AnalysisUtilities.getInstance().readTreeFromString(doc);
            for (Question q : ss.simplify(t)) {
                System.out.println(AnalysisUtilities.getCleanedUpYield(q.getIntermediateTree()));
            }
            System.exit(0);
        }

        while (true) {
            doc = "";
            buf = "";

            buf = br.readLine();
            if (buf == null) {
                break;
            }
            doc += buf;

            while (br.ready()) {
                buf = br.readLine();
                if (buf == null) {
                    break;
                }
                doc += buf + " ";
            }
            if (doc.length() == 0) {
                break;
            }

            long startTime = System.currentTimeMillis();

            List<String> sentences = AnalysisUtilities.getSentences(doc);

            //iterate over each segmented sentence and generate questions
            List<Question> output = new ArrayList<Question>();

            for (String sentence : sentences) {
                parsed = AnalysisUtilities.getInstance().parseSentence(sentence).parse;
                if (GlobalProperties.getDebug())
                    System.err.println("input: " + parsed.yield().toString());
                if (GlobalProperties.getDebug())
                    System.err.println("parse: " + sentence.toString());
                //if no parse, print the original sentence
                if (parsed.yield().toString().equals(".")) {
                    System.out.print(sentence);
                    if (verbose)
                        System.out.print("\t" + sentence);
                    System.out.println();
                    continue;
                }

                output.clear();
                output.addAll(ss.simplify(parsed));
                for (Question q : output) {
                    System.out.print(AnalysisUtilities.getCleanedUpYield(q.getIntermediateTree()));
                    if (verbose)
                        System.out.print("\t" + AnalysisUtilities.getCleanedUpYield(q.getSourceTree()));
                    if (verbose)
                        System.out.print("\t" + simplificationFeatureString(q));
                    //System.out.println(q.findLogicalWordsAboveIntermediateTree());
                    System.out.println();
                }
            }

            System.err.println("Seconds Elapsed:\t" + ((System.currentTimeMillis() - startTime) / 1000.0));
            //prompt for another piece of input text 
            if (GlobalProperties.getDebug())
                System.err.println("\nInput Text:");
        }

    } catch (Exception e) {
        e.printStackTrace();
    }

}

From source file:elkfed.mmax.importer.ImportOntonotes.java

License:Apache License

public MiniDiscourse importFile(String fname) {
    try {/*from  w  w  w  .  j a  v  a2  s .  c  o  m*/
        boolean had_space = true;
        boolean need_bugfix = System.getProperty("elkfed.BuggyOntonotes", "no").matches("y|yes|true");
        List<Tag> names_stack = new ArrayList<Tag>();
        Alphabet<String> sets = new Alphabet<String>();
        sets.lookupIndex("*DUMMY*");
        int sent_id = 0;
        Tag sentence_tag = null;
        OntonotesReader reader = new OntonotesReader(new File(fname + ".coref"));
        OntonotesReader readerNE = new OntonotesReader(new File(fname + ".name"));
        TreeReader tr = new PennTreeReader(new FileReader(fname + ".parse"), new LabeledScoredTreeFactory(),
                new BobChrisTreeNormalizer());
        Tree tree = null;
        int eventType = reader.getNextEvent();
        boolean in_text = false;
        do {
            if (eventType == OntonotesReader.START_TAG && "COREF".equals(reader.getName())) {
                Tag t;
                if (need_bugfix) {
                    t = buggy_push_tag("coref", tag_stack);
                } else {
                    t = push_tag("coref");
                }
                if ("IDENT".equals(reader.getAttribute("TYPE"))) {
                    t.attrs.put("coref_set", "set_" + sets.lookupIndex(reader.getAttribute("ID")));
                }
                had_space = true;
            } else if (eventType == OntonotesReader.END_TAG && "COREF".equals(reader.getName())) {
                Tag t = pop_tag("coref");
                DetermineMinSpan.addMinSpan(sentence_tag.start, tree, t, tokens);
                had_space = true;
            } else if (in_text && eventType == OntonotesReader.TOKEN) {
                if (!reader.isTrace()) {
                    // process up to the next token in the names part
                    int names_event = readerNE.getNextEvent();
                    while (names_event != OntonotesReader.TOKEN) {
                        if (names_event == OntonotesReader.START_TAG && "ENAMEX".equals(readerNE.getName())) {
                            Tag t = push_tag("enamex", names_stack);
                            t.attrs.put("tag", readerNE.getAttribute("TYPE"));
                        } else if (names_event == OntonotesReader.END_TAG
                                && "ENAMEX".equals(readerNE.getName())) {
                            Tag t = pop_tag("enamex", names_stack);
                        } else {
                            throw new IllegalStateException("Unexpected event:" + names_event);
                        }
                        names_event = readerNE.getNextEvent();
                    }
                    assert (reader.getToken().equals(readerNE.getToken()));
                    String tok = reader.getToken();
                    if (tok.equals("-LRB-"))
                        tok = "(";
                    if (tok.equals("-RRB-"))
                        tok = ")";
                    if (tok.equals("-LSB-"))
                        tok = "[";
                    if (tok.equals("-RSB-"))
                        tok = "]";
                    if (tok.equals("-LCB-"))
                        tok = "{";
                    if (tok.equals("-RCB-"))
                        tok = "}";
                    add_token(tok);
                }
            } else if (in_text && eventType == OntonotesReader.NEWLINE) {
                //System.out.println("sentence break");
                if (sentence_tag != null) {
                    sentence_tag.end = tokens.size() - 1;
                    if (sentence_tag.end >= sentence_tag.start) {
                        tags.add(sentence_tag);
                        if (tree != null) {
                            Tag parse_tag = new Tag();
                            parse_tag.tag = "parse";
                            parse_tag.start = sentence_tag.start;
                            parse_tag.end = sentence_tag.end;
                            parse_tag.attrs.put("tag", tree.toString());
                            tags.add(parse_tag);
                            assert sentence_tag.end - sentence_tag.start + 1 == tree.yield().size() : String
                                    .format("%s / %s", tokens.subList(sentence_tag.start, sentence_tag.end + 1),
                                            tree.yield());
                            addParseInfo(sentence_tag.start, tree);
                        }
                    }
                }
                // process up to end of sentence in names annotation
                int names_event = readerNE.getNextEvent();
                while (names_event != OntonotesReader.NEWLINE) {
                    if (names_event == OntonotesReader.START_TAG && "ENAMEX".equals(readerNE.getName())) {
                        Tag t = push_tag("enamex", names_stack);
                        t.attrs.put("tag", readerNE.getAttribute("TYPE"));
                    } else if (names_event == OntonotesReader.END_TAG && "ENAMEX".equals(readerNE.getName())) {
                        Tag t = pop_tag("enamex", names_stack);
                    } else if (names_event == OntonotesReader.END_TAG && "DOC".equals(readerNE.getName())) {
                        // ignore
                    } else {
                        throw new IllegalStateException(
                                "Unexpected event:" + readerNE.describeEvent(names_event));
                    }
                    names_event = readerNE.getNextEvent();
                }
                // prepare new parse and sentence
                sentence_tag = new Tag();
                sentence_tag.start = tokens.size();
                sentence_tag.tag = "sentence";
                sentence_tag.attrs.put("orderid", "" + sent_id++);
                tree = tr.readTree();
            } else if (eventType == OntonotesReader.END_TAG && "DOCNO".equals(reader.getName())) {
                in_text = true;
                // go to the end of the DOCNO part in name doc
                int names_event = readerNE.getNextEvent();
                while (names_event != OntonotesReader.END_TAG || !"DOCNO".equals(reader.getName())) {
                    names_event = readerNE.getNextEvent();
                }
            } else if (eventType == OntonotesReader.START_TAG && "TURN".equals(reader.getName())) {
                int names_event = readerNE.getNextEvent();
                if (names_event != OntonotesReader.START_TAG || !"TURN".equals(readerNE.getName())) {
                    throw new UnsupportedOperationException("TURN in coref but not in names");
                }
                // parse level seems to be inconsistent... so don't check here :-|
                System.err.println("TURN parse:" + tree.toString());
                tree = tr.readTree();
                eventType = reader.getNextEvent();
                names_event = readerNE.getNextEvent();
                if (eventType != OntonotesReader.NEWLINE || names_event != OntonotesReader.NEWLINE) {
                    throw new UnsupportedOperationException("No Newline after TURN");
                }
            }
            eventType = reader.getNextEvent();
        } while (eventType != OntonotesReader.END_DOCUMENT);
        return create();
    } catch (IOException ex) {
        throw new RuntimeException("Cannot read file", ex);
    }

}

From source file:englishparser.EnglishParser.java

private static void extractNN(Tree t) {
    for (Tree child : t.children()) {
        if (isNN(child)) {
            printer_NN.print(Sentence.listToString(child.yield()));
            continue;
        }/* w  ww  .  j a  v a 2 s  .  c o m*/
        extractNN(child);
    }
}

From source file:englishparser.EnglishParser.java

private static void extractNNP(Tree t) {
    for (Tree child : t.children()) {
        if (isNNP(child)) {
            printer_NNP.print(Sentence.listToString(child.yield()));
            continue;
        }// w w w . ja v  a2s .  c  o m
        extractNNP(child);
    }
}

From source file:englishparser.EnglishParser.java

private static void extractNP(Tree t) {
    for (Tree child : t.children()) {
        if (isNP(child)) {
            if (isLowestNoun(child) == true) {
                //                    System.out.println(Sentence.listToString(child.yield()));
                printer_NP.print(Sentence.listToString(child.yield()));
                continue;
            }//  w w w. ja v  a  2 s . c  o  m
        }
        extractNP(child);
    }
}

From source file:MedArkRef.AnalysisUtilities.java

License:Open Source License

public static void downcaseFirstToken(Tree inputTree) {
    if (inputTree == null) {
        return;/*from www. ja  v  a 2 s.c  om*/
    }
    Tree firstWordTree = inputTree.getLeaves().get(0);
    if (firstWordTree == null)
        return;
    Tree preterm = firstWordTree.parent(inputTree);
    String firstWord = firstWordTree.yield().toString();
    if (firstWord != null && preterm != null && preterm.label() != null
            && !preterm.label().toString().matches("^NNP.*") && !firstWord.equals("I")) {
        //if(firstWord.indexOf('-') == -1 && !firstWord.equals("I")){
        firstWord = firstWord.substring(0, 1).toLowerCase() + firstWord.substring(1);
        firstWordTree.label().setValue(firstWord);
    }

    //if(QuestionTransducer.DEBUG) System.err.println("downcaseFirstToken: "+inputTree.toString());
}

From source file:opennlp.tools.parse_thicket.opinion_processor.DefaultSentimentProcessor.java

License:Apache License

/**
 * Reads an annotation from the given filename using the requested input.
 *///from   w  ww. j  a v  a  2  s. c  o m
public static List<Annotation> getAnnotations(StanfordCoreNLP tokenizer, Input inputFormat, String filename,
        boolean filterUnknown) {
    switch (inputFormat) {
    case TEXT: {
        String text = IOUtils.slurpFileNoExceptions(filename);
        Annotation annotation = new Annotation(text);
        tokenizer.annotate(annotation);
        List<Annotation> annotations = Generics.newArrayList();
        for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
            Annotation nextAnnotation = new Annotation(sentence.get(CoreAnnotations.TextAnnotation.class));
            nextAnnotation.set(CoreAnnotations.SentencesAnnotation.class, Collections.singletonList(sentence));
            annotations.add(nextAnnotation);
        }
        return annotations;
    }
    case TREES: {
        List<Tree> trees;
        if (filterUnknown) {
            trees = SentimentUtils.readTreesWithGoldLabels(filename);
            trees = SentimentUtils.filterUnknownRoots(trees);
        } else {
            trees = Generics.newArrayList();
            MemoryTreebank treebank = new MemoryTreebank("utf-8");
            treebank.loadPath(filename, null);
            for (Tree tree : treebank) {
                trees.add(tree);
            }
        }

        List<Annotation> annotations = Generics.newArrayList();
        for (Tree tree : trees) {
            CoreMap sentence = new Annotation(listToString(tree.yield()));
            sentence.set(TreeCoreAnnotations.TreeAnnotation.class, tree);
            List<CoreMap> sentences = Collections.singletonList(sentence);
            Annotation annotation = new Annotation("");
            annotation.set(CoreAnnotations.SentencesAnnotation.class, sentences);
            annotations.add(annotation);
        }
        return annotations;
    }
    default:
        throw new IllegalArgumentException("Unknown format " + inputFormat);
    }
}

From source file:pltag.parser.performance.EvalbImpl.java

License:Open Source License

public boolean evaluate(Tree guess, Tree gold, String exampleName) {
    if (gold == null || guess == null) {
        //            LogInfo.error(exampleName + ": Cannot compare against a null gold or guess tree!\n");
        return false;

    } else if (guess.yield().size() != gold.yield().size()) {
        //            LogInfo.error(String.format("%s: Warning: yield differs", exampleName));
        //            LogInfo.error(String.format("%s: Warning: yield differs:\nGuess: %s\nGold: %s", exampleName, guess.yield(), gold.yield()));
    }/* w  w w  . j a v a  2 s .c  om*/

    evaluate(guess, gold, null, 1.0);
    return true;
}

From source file:pltag.parser.semantics.discriminative.ExtractFeatures.java

License:Open Source License

private void extractStringTreeAnalysisFeatures(String inputTree, String stringAnalysisTree, boolean endOfSent,
        IncrementalAnalysis analysis, DiscriminativeFeatureIndexers featureIndexers, boolean train) {
    //        System.out.println(inputTree);        
    Tree tree = Tree.valueOf(inputTree);
    List<Tree> leaves = tree.getLeaves();
    Tree currentWord = leaves.get(leaves.size() - 1);
    int currentWordIndex = featureIndexers.getWordIndex(currentWord.nodeString(), train);
    // right branch (2): 1. Count number of nodes from root to rightmost non-terminal, 2. Count rest nodes
    // compute domination path from root to rightmost leaf. Subtract 2 from size to exclude root and pre-terminal
    int pathSize = tree.dominationPath(currentWord.parent(tree)).size();
    analysis.setRightBranchSpine(pathSize > 2 ? pathSize - 2 : 0);
    int rest = tree.size() - analysis.getRightBranchSpine() - leaves.size() * 2 - 1;
    analysis.setRightBranchRest(rest > 0 ? rest : 0); // Subtract the number of terminals, pre-terminals (leaves.size()*2) and root symbol

    // get list of rightmost complete non-terminals. We don't compute the whole list of complete non-terminals, but only the ones that have been just completed,
    // hence lie at the rightmost position of the tree. Since we compute the features piecemeal, by the end of the sentence we will have computed all complete
    // non-terminals, depending on the training strategy. Used for features: heavy, neighbours, and edges
    Tree analysisTree = Tree.valueOf(stringAnalysisTree);
    analysisTree.indexLeaves();/*from w  ww. j a  v a  2s. co  m*/
    List<Tree> complete = getListOfRightMostCompleteNonTerminals(analysisTree);
    String[] heavyStr = new String[complete.size()];
    String[] neighboursL1Str = new String[complete.size()];
    String[] neighboursL2Str = new String[complete.size()];
    int i = 0;
    for (Tree subTree : complete) {
        // heavy feature
        int categoryIndex = featureIndexers.getCategoryIndex(subTree.nodeString(), train);
        List<Label> yield = subTree.yield();
        String yieldSize = yield.size() > 5 ? "5+" : String.valueOf(yield.size());
        heavyStr[i] = String.format("%s %s %s", categoryIndex, yieldSize, endOfSent ? "y" : "n");
        // neighbours l1, l2 features            
        int leftmostLeafId = ((CoreLabel) yield.get(0)).index();
        if (leftmostLeafId > 1) {
            int l1CategoryId = featureIndexers
                    .getCategoryIndex(leaves.get(leftmostLeafId - 2).parent(tree).nodeString(), train);
            if (leftmostLeafId > 2) {
                neighboursL1Str[i] = String.format("%s %s %s", categoryIndex, yieldSize, l1CategoryId);
                int l2CategoryId = featureIndexers
                        .getCategoryIndex(leaves.get(leftmostLeafId - 3).parent(tree).nodeString(), train);
                neighboursL2Str[i] = String.format("%s %s %s %s", categoryIndex, yieldSize, l2CategoryId,
                        l1CategoryId);
            } else {
                neighboursL2Str[i] = String.format("%s %s SoS %s", categoryIndex, yieldSize, l1CategoryId);
            }
        } else // leftmost leaf is at the beginning of the sentence
        {
            neighboursL1Str[i] = String.format("%s %s SoS", categoryIndex, yieldSize);
            neighboursL2Str[i] = String.format("%s %s SoS SoS", categoryIndex, yieldSize);
        }

        // coPar and coLenPar features
        Tree[] children = subTree.children();
        if (children.length > 2) {
            // found structure: (X (A ...) (CC and/or) (B ...))
            if (children.length == 3 && children[1].nodeString().startsWith("CC")) {
                analysis.setCoPar(getCoParString(children[0], children[2]), featureIndexers, train);
                analysis.setCoLenPar(getCoLenParString(children[0], children[2], endOfSent), featureIndexers,
                        train);
            }
            // found structure ((CC either) (A ...) (CC or) (B...))
            else if (children.length == 4 && children[0].nodeString().startsWith("CC")
                    && children[2].nodeString().startsWith("CC")) {
                analysis.setCoPar(getCoParString(children[1], children[3]), featureIndexers, train);
                analysis.setCoLenPar(getCoLenParString(children[1], children[3], endOfSent), featureIndexers,
                        train);
            }
            // found structure ((A ...) (, ,) (CC but) (B...))
            else if (children.length == 4 && children[1].nodeString().equals(",")
                    && children[2].nodeString().startsWith("CC")) {
                analysis.setCoPar(getCoParString(children[0], children[3]), featureIndexers, train);
                analysis.setCoLenPar(getCoLenParString(children[0], children[3], endOfSent), featureIndexers,
                        train);
            }
        }
        i++;
    }
    analysis.setHeavy(heavyStr, featureIndexers, train);
    analysis.setNeighboursL1(neighboursL1Str, featureIndexers, train);
    analysis.setNeighboursL2(neighboursL2Str, featureIndexers, train);

    // compute word + L=2 ancestor nodes, L=3 ancestor nodes
    Tree preTerminal = currentWord.parent(tree);
    Tree wordL2 = preTerminal.parent(tree);
    if (wordL2 != null) {
        int preTerminalIndex = featureIndexers.getCategoryIndex(preTerminal.nodeString(), train);
        int wordL2Index = featureIndexers.getCategoryIndex(wordL2.nodeString(), train);
        analysis.setWordL2(String.format("%s %s %s", currentWordIndex, preTerminalIndex, wordL2Index),
                featureIndexers, train);
        Tree wordL3 = wordL2.parent(tree);
        if (wordL3 != null) {
            analysis.setWordL3(String.format("%s %s %s %s", currentWordIndex, preTerminalIndex, wordL2Index,
                    featureIndexers.getCategoryIndex(wordL3.nodeString())), featureIndexers, train);
        }
    }

    // get integration point + elem tree (Parent-emulation feature)
    analysis.setIpElemTree(String.format("%s,%s", analysis.getIntegrationPoint(), analysis.getElemTree()),
            featureIndexers, train);
    analysis.setIpElemTreeUnlex(
            String.format("%s,%s", analysis.getIntegrationPoint(), analysis.getElemTreeUnlex()),
            featureIndexers, train);
}