Example usage for edu.stanford.nlp.trees Tree preTerminalYield

List of usage examples for edu.stanford.nlp.trees Tree preTerminalYield

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree preTerminalYield.

Prototype

public List<Label> preTerminalYield() 

Source Link

Document

Gets the preterminal yield (i.e., tags) of the tree.

Usage

From source file:com.mycompany.stanlp.ChildSpeech.java

/**
 * @param args the command line arguments
 *///www .ja va 2s  .  com
public static void main(String[] args) throws IOException {

    PrintWriter pw = new PrintWriter(new File("out.csv"));
    StringBuilder sb = new StringBuilder();
    // creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution 
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    TreeMap<String, String[]> tm = new TreeMap<String, String[]>();
    String csvFile = "C:\\Users\\steve\\Downloads\\verbsome.csv";
    BufferedReader br = null;
    String line = "";
    String cvsSplitBy = ",";

    try {

        br = new BufferedReader(new FileReader(csvFile));
        while ((line = br.readLine()) != null) {
            System.out.println("reached");
            String[] country = line.split(cvsSplitBy);
            String[] input = new String[2];
            input[0] = country[0];
            input[1] = country[5];
            tm.put(country[4], input);

        }

    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        if (br != null) {
            try {
                br.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    for (Map.Entry<String, String[]> entry : tm.entrySet()) {
        String[] value = entry.getValue();
        Annotation document = new Annotation(value[1]);

        pipeline.annotate(document);
        List<CoreMap> sentences = document.get(SentencesAnnotation.class);

        for (CoreMap sentence : sentences) {

            for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
                // this is the text of the token
                String word = token.get(TextAnnotation.class);
                //ArrayList<CoreLabel> al = new ArrayList();
                if (word.equals(value[0])) {
                    Tree tree = sentence.get(TreeAnnotation.class);
                    //TregexPattern patternMW = TregexPattern.compile("VP([ >># VB | >># VBP | >># VBD] <<" + value[0] +
                    //       ")");
                    TregexPattern patternMW = TregexPattern
                            .compile(" VP  [ <# VB | <# VBP | <# VBD] & <<" + value[0]);
                    TregexMatcher matcher = patternMW.matcher(tree);
                    while (matcher.findNextMatchingNode()) {
                        Tree match = matcher.getMatch();
                        String tempString = tree.toString();
                        sb.append(entry.getKey());
                        sb.append(",");
                        sb.append(value[0]);
                        sb.append(",");
                        sb.append(tempString);
                        sb.append(",");
                        if (match.preTerminalYield().size() == 1) {
                            for (Label l : tree.preTerminalYield()) {
                                sb.append(l.toString());
                                sb.append("&");
                            }
                        } else {
                            for (Label l : match.preTerminalYield()) {

                                sb.append(l.toString());
                                sb.append("&");
                            }
                        }
                        sb.append(",");
                        sb.append(match.toString());
                        //sb.append(",");
                        //sb.append(token.get(PartOfSpeechAnnotation.class));
                        sb.append('\n');
                    }

                }
                // this is the POS tag of the token
                // this is the NER label of the token
                //String ne = token.get(NamedEntityTagAnnotation.class);       
            }

            //SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
        }

        //Map<Integer, edu.stanford.nlp.dcoref.CorefChain> graph = 
        //document.get(CorefChainAnnotation.class);

    }
    pw.write(sb.toString());
    pw.close();
}

From source file:elkfed.expletives.ExpletiveInstance.java

License:Apache License

public ExpletiveInstance(Tree root, Tree pronoun, String id) {
    _root = root;//from   w ww . ja  va  2 s.c o  m
    _pronoun = pronoun;
    _id = id;

    List<Tree> wordsT = root.getLeaves();
    List<Label> posT = root.preTerminalYield();
    // get words and POS into an array so that
    // we get an idea of the pronoun's surrounding
    String[] words = new String[wordsT.size()];
    String[] pos = new String[wordsT.size()];
    if (!root.dominates(pronoun)) {
        System.err.format("%s does not dominate %s. WTF?", root, pronoun);
    }
    for (int here = 0; here < wordsT.size(); here++) {
        Tree w1 = wordsT.get(here);
        Label p1 = posT.get(here);
        words[here] = w1.toString();
        pos[here] = p1.value();
        if (w1 == pronoun) {
            _idx = here;
        } else if (pronoun.dominates(w1)) {
            _idx = here;
            pronoun = w1;
        }
    }
    assert _idx >= 0 : String.format("wanted %s in %s", pronoun, root);
    assert pos[_idx].equals("PRP") : String.format("wanted PRP got '%s'", pos[_idx]);
    _words = words;
    _pos = pos;
}

From source file:elkfed.mmax.importer.ImportOntonotes.java

License:Apache License

/** adds pos and chunk information */
private void addParseInfo(int start, Tree tree) {
    /** Retrieve chunk tags from the parse tree and add chunk markables */
    boolean inNP = false;
    int startNP = -1;
    int wordLoc = 0;
    int depth = 0;
    for (String tok : tree.toString().replaceAll("\\)", ") ").split("\\s+")) {
        if (tok.matches("\\(NP")) {
            inNP = true;/*  w w w. j av a2s.c  o m*/
            startNP = wordLoc;
            depth = 0;
        }
        if ((inNP) && (tok.matches(".*\\)"))) {
            depth--;
        }
        if ((inNP) && (tok.matches("\\(.*"))) {
            depth++;
        }
        if (tok.matches(".+\\)")) {
            wordLoc++;
        }
        if ((depth == 0) && (inNP)) {
            inNP = false;
            Tag t = new Tag();
            t.tag = DEFAULT_CHUNK_LEVEL;
            t.attrs.put("tag", "np");
            t.start = start + startNP;
            t.end = start + wordLoc - 1;
            tags.add(t);
        }
    }

    /** Retrieve POS tags from the parse tree */
    List<Label> taggedSent = new ArrayList<Label>(tree.preTerminalYield());
    for (int i = 0; i < taggedSent.size(); i++) {
        Tag t = new Tag();
        t.tag = DEFAULT_POS_LEVEL;
        t.start = t.end = start + i;
        String tag = taggedSent.get(i).value();
        t.attrs.put("tag", tag.toLowerCase());
        tags.add(t);
    }
}

From source file:elkfed.mmax.pipeline.Parser.java

License:Apache License

/** Add parser, part of speech, and chunk markables */
protected void addMarkables() {

    final StringBuffer markableBuffer = new StringBuffer();
    List<Markable> sentences = null;
    try {// w w  w  . ja v a2  s  . co m
        sentences = DiscourseUtils.getSentences(currentDocument);
    } catch (Exception mmax2e) {
        mmax2e.printStackTrace();
    }

    for (int sentence = 0; sentence < sentences.size(); sentence++) {
        /** Add the parse tree markables */
        final Map<String, String> attributes = new HashMap<String, String>(levelAttributes);
        attributes.put(TAG_ATTRIBUTE, forest.get(sentence).replaceAll("&", "&amp;"));
        markableBuffer.setLength(0);
        Markable sent_m = sentences.get(sentence);
        int start = sent_m.getLeftmostDiscoursePosition();
        int end = sent_m.getRightmostDiscoursePosition();
        currentLevel.addMarkable(start, end, attributes);

        /** Retrieve chunk tags from the parse tree and add chunk markables */
        boolean inNP = false;
        int startNP = -1;
        int wordLoc = 0;
        int depth = 0;
        for (String tok : forest.get(sentence).replaceAll("\\)", ") ").split("\\s+")) {
            if (tok.matches("\\(NP")) {
                inNP = true;
                startNP = wordLoc;
                depth = 0;
            }

            if ((inNP) && (tok.matches(".*\\)"))) {
                depth--;
            }
            if ((inNP) && (tok.matches("\\(.*"))) {
                depth++;
            }

            if (tok.matches(".+\\)")) {
                wordLoc++;
            }

            if ((depth == 0) && (inNP)) {
                inNP = false;
                final Map<String, String> cAttributes = new HashMap<String, String>(chunkAttributes);
                markableBuffer.setLength(0);
                cAttributes.put(TAG_ATTRIBUTE, "np");
                //TODO: check if it's not start+wordLoc-1 ?
                chunkLevel.addMarkable(start + startNP, start + wordLoc - 1, cAttributes);
            }

        }

        /** Create a tree object from the current sentence */
        Tree currentTree = new LabeledScoredTreeNode();
        // System.err.println("processing sentence: "+forest.get(sentence));
        currentTree = (LabeledScoredTreeNode) Tree.valueOf(forest.get(sentence));

        /** Retrieve POS tags from the parse tree */
        List<Label> taggedSent = new ArrayList<Label>(currentTree.preTerminalYield());
        for (int i = 0; i < taggedSent.size(); i++) {
            posTags.add(taggedSent.get(i).value());
        }
    }

    /** Add POS tag markables */
    for (int pos = 0; pos < posTags.size(); pos++) {
        final HashMap<String, String> attributes = new HashMap<String, String>(posAttributes);
        attributes.put(TAG_ATTRIBUTE, posTags.get(pos).toLowerCase());
        posLevel.addMarkable(pos, pos, attributes);
    }
}