Example usage for edu.stanford.nlp.trees Tree preTerminalYield

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree preTerminalYield.

Prototype

public List<Label> preTerminalYield()

Source Link

Document

Gets the preterminal yield (i.e., tags) of the tree.

Usage

From source file:com.mycompany.stanlp.ChildSpeech.java

/**
 * @param args the command line arguments
 *///www .ja va 2s  .  com
public static void main(String[] args) throws IOException {

    PrintWriter pw = new PrintWriter(new File("out.csv"));
    StringBuilder sb = new StringBuilder();
    // creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution 
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    TreeMap<String, String[]> tm = new TreeMap<String, String[]>();
    String csvFile = "C:\\Users\\steve\\Downloads\\verbsome.csv";
    BufferedReader br = null;
    String line = "";
    String cvsSplitBy = ",";

    try {

        br = new BufferedReader(new FileReader(csvFile));
        while ((line = br.readLine()) != null) {
            System.out.println("reached");
            String[] country = line.split(cvsSplitBy);
            String[] input = new String[2];
            input[0] = country[0];
            input[1] = country[5];
            tm.put(country[4], input);

        }

    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        if (br != null) {
            try {
                br.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    for (Map.Entry<String, String[]> entry : tm.entrySet()) {
        String[] value = entry.getValue();
        Annotation document = new Annotation(value[1]);

        pipeline.annotate(document);
        List<CoreMap> sentences = document.get(SentencesAnnotation.class);

        for (CoreMap sentence : sentences) {

            for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
                // this is the text of the token
                String word = token.get(TextAnnotation.class);
                //ArrayList<CoreLabel> al = new ArrayList();
                if (word.equals(value[0])) {
                    Tree tree = sentence.get(TreeAnnotation.class);
                    //TregexPattern patternMW = TregexPattern.compile("VP([ >># VB | >># VBP | >># VBD] <<" + value[0] +
                    //       ")");
                    TregexPattern patternMW = TregexPattern
                            .compile(" VP  [ <# VB | <# VBP | <# VBD] & <<" + value[0]);
                    TregexMatcher matcher = patternMW.matcher(tree);
                    while (matcher.findNextMatchingNode()) {
                        Tree match = matcher.getMatch();
                        String tempString = tree.toString();
                        sb.append(entry.getKey());
                        sb.append(",");
                        sb.append(value[0]);
                        sb.append(",");
                        sb.append(tempString);
                        sb.append(",");
                        if (match.preTerminalYield().size() == 1) {
                            for (Label l : tree.preTerminalYield()) {
                                sb.append(l.toString());
                                sb.append("&");
                            }
                        } else {
                            for (Label l : match.preTerminalYield()) {

                                sb.append(l.toString());
                                sb.append("&");
                            }
                        }
                        sb.append(",");
                        sb.append(match.toString());
                        //sb.append(",");
                        //sb.append(token.get(PartOfSpeechAnnotation.class));
                        sb.append('\n');
                    }

                }
                // this is the POS tag of the token
                // this is the NER label of the token
                //String ne = token.get(NamedEntityTagAnnotation.class);       
            }

            //SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
        }

        //Map<Integer, edu.stanford.nlp.dcoref.CorefChain> graph = 
        //document.get(CorefChainAnnotation.class);

    }
    pw.write(sb.toString());
    pw.close();
}

From source file:elkfed.expletives.ExpletiveInstance.java

License:Apache License

public ExpletiveInstance(Tree root, Tree pronoun, String id) {
    _root = root;//from   w ww . ja  va  2 s.c o  m
    _pronoun = pronoun;
    _id = id;

    List<Tree> wordsT = root.getLeaves();
    List<Label> posT = root.preTerminalYield();
    // get words and POS into an array so that
    // we get an idea of the pronoun's surrounding
    String[] words = new String[wordsT.size()];
    String[] pos = new String[wordsT.size()];
    if (!root.dominates(pronoun)) {
        System.err.format("%s does not dominate %s. WTF?", root, pronoun);
    }
    for (int here = 0; here < wordsT.size(); here++) {
        Tree w1 = wordsT.get(here);
        Label p1 = posT.get(here);
        words[here] = w1.toString();
        pos[here] = p1.value();
        if (w1 == pronoun) {
            _idx = here;
        } else if (pronoun.dominates(w1)) {
            _idx = here;
            pronoun = w1;
        }
    }
    assert _idx >= 0 : String.format("wanted %s in %s", pronoun, root);
    assert pos[_idx].equals("PRP") : String.format("wanted PRP got '%s'", pos[_idx]);
    _words = words;
    _pos = pos;
}

From source file:elkfed.mmax.importer.ImportOntonotes.java

License:Apache License

/** adds pos and chunk information */
private void addParseInfo(int start, Tree tree) {
    /** Retrieve chunk tags from the parse tree and add chunk markables */
    boolean inNP = false;
    int startNP = -1;
    int wordLoc = 0;
    int depth = 0;
    for (String tok : tree.toString().replaceAll("\\)", ") ").split("\\s+")) {
        if (tok.matches("\\(NP")) {
            inNP = true;/*  w w w. j av a2s.c  o m*/
            startNP = wordLoc;
            depth = 0;
        }
        if ((inNP) && (tok.matches(".*\\)"))) {
            depth--;
        }
        if ((inNP) && (tok.matches("\\(.*"))) {
            depth++;
        }
        if (tok.matches(".+\\)")) {
            wordLoc++;
        }
        if ((depth == 0) && (inNP)) {
            inNP = false;
            Tag t = new Tag();
            t.tag = DEFAULT_CHUNK_LEVEL;
            t.attrs.put("tag", "np");
            t.start = start + startNP;
            t.end = start + wordLoc - 1;
            tags.add(t);
        }
    }

    /** Retrieve POS tags from the parse tree */
    List<Label> taggedSent = new ArrayList<Label>(tree.preTerminalYield());
    for (int i = 0; i < taggedSent.size(); i++) {
        Tag t = new Tag();
        t.tag = DEFAULT_POS_LEVEL;
        t.start = t.end = start + i;
        String tag = taggedSent.get(i).value();
        t.attrs.put("tag", tag.toLowerCase());
        tags.add(t);
    }
}

From source file:elkfed.mmax.pipeline.Parser.java

License:Apache License

/** Add parser, part of speech, and chunk markables */
protected void addMarkables() {

    final StringBuffer markableBuffer = new StringBuffer();
    List<Markable> sentences = null;
    try {// w w  w  . ja v a2  s  . co m
        sentences = DiscourseUtils.getSentences(currentDocument);
    } catch (Exception mmax2e) {
        mmax2e.printStackTrace();
    }

    for (int sentence = 0; sentence < sentences.size(); sentence++) {
        /** Add the parse tree markables */
        final Map<String, String> attributes = new HashMap<String, String>(levelAttributes);
        attributes.put(TAG_ATTRIBUTE, forest.get(sentence).replaceAll("&", "&amp;"));
        markableBuffer.setLength(0);
        Markable sent_m = sentences.get(sentence);
        int start = sent_m.getLeftmostDiscoursePosition();
        int end = sent_m.getRightmostDiscoursePosition();
        currentLevel.addMarkable(start, end, attributes);

        /** Retrieve chunk tags from the parse tree and add chunk markables */
        boolean inNP = false;
        int startNP = -1;
        int wordLoc = 0;
        int depth = 0;
        for (String tok : forest.get(sentence).replaceAll("\\)", ") ").split("\\s+")) {
            if (tok.matches("\\(NP")) {
                inNP = true;
                startNP = wordLoc;
                depth = 0;
            }

            if ((inNP) && (tok.matches(".*\\)"))) {
                depth--;
            }
            if ((inNP) && (tok.matches("\\(.*"))) {
                depth++;
            }

            if (tok.matches(".+\\)")) {
                wordLoc++;
            }

            if ((depth == 0) && (inNP)) {
                inNP = false;
                final Map<String, String> cAttributes = new HashMap<String, String>(chunkAttributes);
                markableBuffer.setLength(0);
                cAttributes.put(TAG_ATTRIBUTE, "np");
                //TODO: check if it's not start+wordLoc-1 ?
                chunkLevel.addMarkable(start + startNP, start + wordLoc - 1, cAttributes);
            }

        }

        /** Create a tree object from the current sentence */
        Tree currentTree = new LabeledScoredTreeNode();
        // System.err.println("processing sentence: "+forest.get(sentence));
        currentTree = (LabeledScoredTreeNode) Tree.valueOf(forest.get(sentence));

        /** Retrieve POS tags from the parse tree */
        List<Label> taggedSent = new ArrayList<Label>(currentTree.preTerminalYield());
        for (int i = 0; i < taggedSent.size(); i++) {
            posTags.add(taggedSent.get(i).value());
        }
    }

    /** Add POS tag markables */
    for (int pos = 0; pos < posTags.size(); pos++) {
        final HashMap<String, String> attributes = new HashMap<String, String>(posAttributes);
        attributes.put(TAG_ATTRIBUTE, posTags.get(pos).toLowerCase());
        posLevel.addMarkable(pos, pos, attributes);
    }
}