Example usage for edu.stanford.nlp.tagger.maxent MaxentTagger tagSentence

Introduction

In this page you can find the example usage for edu.stanford.nlp.tagger.maxent MaxentTagger tagSentence.

Prototype

public List<TaggedWord> tagSentence(List<? extends HasWord> sentence)

Source Link

Document

Returns a new Sentence that is a copy of the given sentence with all the words tagged with their part-of-speech.

Usage

From source file:Dependency.java

public static void main(String[] args) {
    String modelPath = DependencyParser.DEFAULT_MODEL;
    String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";
    Scanner sc = new Scanner(System.in);

    String text = "";
    text = sc.nextLine();/*w w  w  .ja  v  a2 s.  c o m*/
    // while(text!="exit"){

    MaxentTagger tagger = new MaxentTagger(taggerPath);
    DependencyParser parser = DependencyParser.loadFromModelFile(modelPath);

    DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text));
    for (List<HasWord> sentence : tokenizer) {

        List<TaggedWord> tagged = tagger.tagSentence(sentence);
        Object[] x = tagged.toArray();
        GrammaticalStructure gs = parser.predict(tagged);
        //System.out.println();

        Collection<TypedDependency> s = gs.typedDependenciesCollapsedTree();
        Object[] z = s.toArray();

        System.out.println(tagged.toString());
        String token[] = new String[z.length];
        String pos[] = new String[z.length];
        int k = 0;
        for (Object i : x) {
            String str = i.toString();
            /*String temp0="(.*?)(?=\\/)";
            String temp1="\\/(.*)";
                    
            System.out.println(str);
            Pattern t0 = Pattern.compile("(.*?)(?=\\/)");
            Pattern t1 = Pattern.compile("\\/(.*)");
            Matcher m0 = t0.matcher(str);
            Matcher m1 = t1.matcher(str);*/
            int index = str.lastIndexOf('/');
            token[k] = str.substring(0, index);
            pos[k] = str.substring(index + 1);
            //System.out.println(pos[k]);
            k++;
        }
        String rels[] = new String[z.length];
        String word1[] = new String[z.length];
        String word2[] = new String[z.length];
        int j = 0;
        for (Object i : z) {
            System.out.println(i);
            String temp = i.toString();
            String pattern0 = "(.*)(?=\\()";
            String pattern1 = "(?<=\\()(.*?)(?=-)";
            String pattern2 = "(?<=, )(.*)(?=-)";
            Pattern r0 = Pattern.compile(pattern0);
            Pattern r1 = Pattern.compile(pattern1);
            Pattern r2 = Pattern.compile(pattern2);
            Matcher m0 = r0.matcher(temp);
            Matcher m1 = r1.matcher(temp);
            Matcher m2 = r2.matcher(temp);
            if (m0.find()) {
                rels[j] = m0.group(0);
                //System.out.println(rels[j]);
            }
            if (m1.find()) {
                word1[j] = m1.group(0);
            }
            if (m2.find()) {
                word2[j] = m2.group(0);
            }
            j++;
        }
        //System.out.println(s);
        //Rules for feature extraction.
        //rule1:::::::::::::::::
        //System.out.println("1");
        int[] q = toIntArray(grRecog(rels, "nsubj"));
        //System.out.println("2");
        if (q.length != 0) {
            //System.out.println("3");
            if (posrecog(token, pos, word2[q[0]]).equals("NN")) {
                //System.out.println("4");
                int[] w = toIntArray(grRecog(rels, "compound"));
                //System.out.println("5");
                if (w.length != 0) {
                    System.out.println("6");
                    System.out.println(word1[q[0]] + "," + word2[q[0]] + "," + word2[w[0]]);
                } else {
                    int conj_and_index = compgrRecog(rels, word1, word2, "conj:and", word2[q[0]]);
                    if (conj_and_index != -1) {
                        System.out.println(
                                word1[conj_and_index] + "," + word2[conj_and_index] + "," + word2[q[0]]);
                    } else
                        System.out.println(word1[q[0]] + "," + word2[q[0]]);
                }
            }
            //RULE 2:::::::::::::
            else if (posrecog(token, pos, word1[q[0]]).equals("JJ")) {
                //System.out.println("aaaaa_JJ");
                int a = compgrRecog(rels, word1, word2, "xcomp", word1[q[0]]);
                if (a != -1) {
                    int b = compgrRecog(rels, word1, word2, "dobj", word2[a]);
                    if (b != -1) {
                        int c = compgrRecog(rels, word1, word2, "compound", word2[b]);
                        if (c != -1) {
                            System.out.println(word1[q[0]] + "," + word1[c] + "," + word2[c]);
                        }
                    }
                }
                //RULE 3::::::::::
                else {
                    int b[] = toIntArray(grRecog(rels, "ccomp"));
                    if (b.length != 0) {
                        System.out.println(word1[q[1]] + "," + word2[q[1]] + "," + word1[b[0]]);
                    }

                }
            }
            //RULE 4::::::::::
            else if (posrecog(token, pos, word1[q[0]]).equals("VBZ")) {
                //System.out.println("aaaaa");
                int vbp_dobj_index = compgrRecog(rels, word1, word2, "dobj", word2[q[0]]);
                if (vbp_dobj_index != -1) {
                    System.out.println(word1[vbp_dobj_index] + "," + word2[vbp_dobj_index]);
                } else {
                    int vbp_xcomp_index = compgrRecog(rels, word1, word2, "xcomp", word1[q[0]]);
                    if (vbp_xcomp_index != -1) {

                        System.out.println(word1[vbp_xcomp_index] + "," + word2[vbp_xcomp_index]);
                    } else {
                        int vbp_acomp_index = compgrRecog(rels, word1, word2, "acomp", word1[q[0]]);
                        if (vbp_acomp_index != -1) {

                            System.out.println(
                                    word1[q[0]] + "," + word1[vbp_acomp_index] + "," + word2[vbp_acomp_index]);
                        } else
                            System.out.println(word1[q[0]]);

                    }

                }

            }
            int[] f = toIntArray(grRecog(rels, "amod"));
            if (f.length != 0) {
                for (int i : f) {
                    System.out.println(word1[i] + "," + word2[i]);
                }
                int cj[] = toIntArray(grRecog(rels, "conj:and"));
                if (cj.length != 0) {
                    for (int i : cj) {
                        System.out.println(word1[i] + "," + word2[i]);
                    }
                }
            }
            int[] neg = toIntArray(grRecog(rels, "neg"));
            if (neg.length != 0) {
                for (int i : neg) {
                    System.out.println(word1[i] + "," + word2[i]);
                }

            }

        } else {
            int[] f = toIntArray(grRecog(rels, "amod"));
            if (f.length != 0) {
                for (int i : f) {
                    System.out.print(word1[i] + "," + word2[i]);
                    String qwe = word1[i] + "," + word2[i];
                }
                int cj[] = toIntArray(grRecog(rels, "conj:and"));
                if (cj.length != 0) {
                    for (int i : cj) {
                        System.out.println(word2[i]);

                    }
                }
            }
            int[] neg = toIntArray(grRecog(rels, "neg"));
            if (neg.length != 0) {
                for (int i : neg) {
                    System.out.println(word1[i] + "," + word2[i]);
                }

            }

        }

        //RULE 2:::::::::::::

    }

    //  text=sc.nextLine();
    //}
}

From source file:DependencyParse.java

License:Apache License

public static void main(String[] args) throws Exception {
    Properties props = StringUtils.argsToProperties(args);
    if (!props.containsKey("tokpath") || !props.containsKey("parentpath") || !props.containsKey("relpath")) {
        System.err.println(//  w  ww  . j  av  a 2s  .c o m
                "usage: java DependencyParse -tokenize - -tokpath <tokpath> -parentpath <parentpath> -relpath <relpath>");
        System.exit(1);
    }

    boolean tokenize = false;
    if (props.containsKey("tokenize")) {
        tokenize = true;
    }

    String tokPath = props.getProperty("tokpath");
    String parentPath = props.getProperty("parentpath");
    String relPath = props.getProperty("relpath");

    BufferedWriter tokWriter = new BufferedWriter(new FileWriter(tokPath));
    BufferedWriter parentWriter = new BufferedWriter(new FileWriter(parentPath));
    BufferedWriter relWriter = new BufferedWriter(new FileWriter(relPath));

    MaxentTagger tagger = new MaxentTagger(TAGGER_MODEL);
    DependencyParser parser = DependencyParser.loadFromModelFile(PARSER_MODEL);
    Scanner stdin = new Scanner(System.in);
    int count = 0;
    long start = System.currentTimeMillis();
    while (stdin.hasNextLine()) {
        String line = stdin.nextLine();
        List<HasWord> tokens = new ArrayList<>();
        if (tokenize) {
            PTBTokenizer<Word> tokenizer = new PTBTokenizer(new StringReader(line), new WordTokenFactory(), "");
            for (Word label; tokenizer.hasNext();) {
                tokens.add(tokenizer.next());
            }
        } else {
            for (String word : line.split(" ")) {
                tokens.add(new Word(word));
            }
        }

        List<TaggedWord> tagged = tagger.tagSentence(tokens);

        int len = tagged.size();
        Collection<TypedDependency> tdl = parser.predict(tagged).typedDependencies();
        int[] parents = new int[len];
        for (int i = 0; i < len; i++) {
            // if a node has a parent of -1 at the end of parsing, then the node
            // has no parent.
            parents[i] = -1;
        }

        String[] relns = new String[len];
        for (TypedDependency td : tdl) {
            // let root have index 0
            int child = td.dep().index();
            int parent = td.gov().index();
            relns[child - 1] = td.reln().toString();
            parents[child - 1] = parent;
        }

        // print tokens
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < len - 1; i++) {
            if (tokenize) {
                sb.append(PTBTokenizer.ptbToken2Text(tokens.get(i).word()));
            } else {
                sb.append(tokens.get(i).word());
            }
            sb.append(' ');
        }
        if (tokenize) {
            sb.append(PTBTokenizer.ptbToken2Text(tokens.get(len - 1).word()));
        } else {
            sb.append(tokens.get(len - 1).word());
        }
        sb.append('\n');
        tokWriter.write(sb.toString());

        // print parent pointers
        sb = new StringBuilder();
        for (int i = 0; i < len - 1; i++) {
            sb.append(parents[i]);
            sb.append(' ');
        }
        sb.append(parents[len - 1]);
        sb.append('\n');
        parentWriter.write(sb.toString());

        // print relations
        sb = new StringBuilder();
        for (int i = 0; i < len - 1; i++) {
            sb.append(relns[i]);
            sb.append(' ');
        }
        sb.append(relns[len - 1]);
        sb.append('\n');
        relWriter.write(sb.toString());

        count++;
        if (count % 1000 == 0) {
            double elapsed = (System.currentTimeMillis() - start) / 1000.0;
            System.err.printf("Parsed %d lines (%.2fs)\n", count, elapsed);
        }
    }

    long totalTimeMillis = System.currentTimeMillis() - start;
    System.err.printf("Done: %d lines in %.2fs (%.1fms per line)\n", count, totalTimeMillis / 1000.0,
            totalTimeMillis / (double) count);
    tokWriter.close();
    parentWriter.close();
    relWriter.close();
}

From source file:DependencyParserDemo.java

public static void main(String[] args) {
    String modelPath = DependencyParser.DEFAULT_MODEL;
    String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";

    for (int argIndex = 0; argIndex < args.length;) {
        switch (args[argIndex]) {
        case "-tagger":
            taggerPath = args[argIndex + 1];
            argIndex += 2;/*from  w  w  w .j  a v a  2 s  .c  o  m*/
            break;
        case "-model":
            modelPath = args[argIndex + 1];
            argIndex += 2;
            break;
        default:
            throw new RuntimeException("Unknown argument " + args[argIndex]);
        }
    }

    String text = "I can almost always tell when movies use fake dinosaurs.";

    MaxentTagger tagger = new MaxentTagger(taggerPath);
    DependencyParser parser = DependencyParser.loadFromModelFile(modelPath);

    DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text));
    for (List<HasWord> sentence : tokenizer) {
        List<TaggedWord> tagged = tagger.tagSentence(sentence);
        GrammaticalStructure gs = parser.predict(tagged);

        // Print typed dependencies
        System.err.println(gs);
    }
}

From source file:Dependency2.java

public static void main(String[] args) {
    String modelPath = DependencyParser.DEFAULT_MODEL;
    String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";
    Scanner sc = new Scanner(System.in);

    readCsv();//  www .  j av  a 2  s . co m
    String text = "";
    text = sc.nextLine();
    if (multifeatures(text)) {
        System.out.println("Multiple features present");
        MaxentTagger tagger = new MaxentTagger(taggerPath);
        DependencyParser parser = DependencyParser.loadFromModelFile(modelPath);

        DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text));
        for (List<HasWord> sentence : tokenizer) {
            List<TaggedWord> tagged = tagger.tagSentence(sentence);
            GrammaticalStructure gs = parser.predict(tagged);

            Collection<TypedDependency> s = gs.typedDependenciesCollapsedTree();
            Map<Character, Pair<Character, Character>> map = new HashMap<Character, Pair<Character, Character>>();
            Object[] z = s.toArray();
            String rels[] = new String[z.length];
            String word1[] = new String[z.length];
            String word2[] = new String[z.length];
            int j = 0;
            String f, f1, f2;
            for (Object i : z) {
                //System.out.println(i);
                String temp = i.toString();
                System.out.println(temp);
                String pattern0 = "(.*)(?=\\()";
                String pattern1 = "(?<=\\()(.*?)(?=-)";
                String pattern2 = "(?<=,)(.*)(?=-)";
                Pattern r0 = Pattern.compile(pattern0);
                Pattern r1 = Pattern.compile(pattern1);
                Pattern r2 = Pattern.compile(pattern2);
                Matcher m0 = r0.matcher(temp);
                Matcher m1 = r1.matcher(temp);
                Matcher m2 = r2.matcher(temp);
                if (m0.find())
                    rels[j] = m0.group(0);
                if (m1.find())
                    word1[j] = m1.group(0);
                if (m2.find())
                    word2[j] = m2.group(0);
                if (rels[j].equals("amod")) {
                    f1 = getFeature(word1[j]);
                    f2 = getFeature(word2[j]);
                    f = f1 != null ? (f1) : (f2 != null ? f2 : null);
                    if (f != null) {
                        System.out.println("Feature: " + f);

                    }

                }

                j++;
            }
            //System.out.println(Arrays.toString(rels));
        }
    } else {
        //sentence score is feature score
    }

}

From source file:context.core.task.pos.POSTagger.java

License:Open Source License

/**
 *
 * @param sent/*  w ww  . j a  v a  2  s .  c  o  m*/
 * @param language
 * @return
 */
public static List<TaggedWord> tag(List<CoreLabel> sent, String language) {
    MaxentTagger tagger = getTagger(language);
    //        List<HasWord> sent = Sentence.toWordList("This is a sample text");
    List<TaggedWord> taggedSent = tagger.tagSentence(sent);
    //        for (TaggedWord tw : taggedSent) {
    //            System.out.println(tw.word() + "\t" + tw.tag());
    //        }
    return taggedSent;
}

From source file:context.core.task.stemming.LemmaTagger.java

License:Open Source License

/**
 *
 * @param sent//w w w .j av  a 2  s  .  c  o  m
 * @param language
 * @return
 */
public static List<TaggedWord> lemmatize(List<CoreLabel> sent, String language) {
    MaxentTagger tagger = getTagger(language);
    //        List<HasWord> sent = Sentence.toWordList("This is a sample text");
    List<TaggedWord> taggedSent = tagger.tagSentence(sent);
    for (TaggedWord token : taggedSent) {
        String word = token.word();
        String pos = token.tag();
        String lemma = morphology.lemmatize(new WordTag(word, pos)).lemma();
        token.setTag(lemma);
    }
    //        final List<WordLemmaTag> tagged = (List<WordLemmaTag>) tagger.tagCoreLabelsOrHasWords(sent, morphology, true);
    //        for (TaggedWord tw : taggedSent) {
    //            System.out.println(tw.word() + "\t" + tw.tag());
    //        }
    return taggedSent;
}

From source file:corpusProcessor.TokenCorpusXmlBuilder.java

License:Open Source License

public void characters(char[] buf, int offset, int len) throws SAXException {
    posTokenBuffer = new StringBuffer();
    String s = new String(buf, offset, len);

    if (textBuffer == null) {
        textBuffer = new StringBuffer(s);
    } else {/*w  w w . j a  va2s.c o  m*/
        textBuffer.append(s);
    }
    String model = "/project/nlp/dingcheng/nlplab/models/bidirectional-wsj-0-18.tagger";
    try {
        MaxentTagger tagger = new MaxentTagger(model);
        List<Sentence<? extends HasWord>> sentences = MaxentTagger
                .tokenizeText(new StringReader(textBuffer.toString()));
        for (Sentence<? extends HasWord> sentence : sentences) {
            Sentence<TaggedWord> tSentence = MaxentTagger.tagSentence(sentence);
            //out.append(tSentence.toString(false));
            String[] tokenPosArray = tSentence.toString(false).split(" ");
            //for(int i=0;i<tokenPosArray.length;i++)
            for (String tokenPosStr : tokenPosArray) {
                String[] tokenPosPair = tokenPosStr.split("/");

                posTokenBuffer.append("<TOKEN id=\"" + tokenCount + "\" pos=\"" + tokenPosPair[1] + "\">"
                        + tokenPosPair[0] + "</TOKEN>");
                posTokenBuffer.append("\n");
                //emit("<TOKEN id=\""+tokenCount);
                tokenCount++;
            }

            //posTokenBuffer.append(tSentence.toString(false));
            //System.out.println(tSentence.toString(false));
        }
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

}

From source file:final_dissertation.POStag.java

public static void TextToXml(MaxentTagger mt, String input, String output, HashMap<Integer, String> sentences)
        throws IOException {
    String str;//from  w w w  .j  a v a  2  s . c  om
    File fc = new File("E:\\Disertation\\Reviews\\Texts\\" + input);
    FileWriter file;
    file = new FileWriter("E:\\Disertation\\Reviews\\XMLs\\" + output);
    try (FileReader fl = new FileReader(fc)) {
        BufferedReader bf = new BufferedReader(fl);
        StringReader reader;
        int sentenceNum = 0;
        file.write("<");
        file.write(XMLUtils.escapeElementXML("Text"));
        file.write(">");

        while ((str = bf.readLine()) != null) {
            sentences.put(sentenceNum, str);
            reader = new StringReader(str);
            for (List sentence : MaxentTagger.tokenizeText(reader)) {

                ArrayList<TaggedWord> taggedSentence = mt.tagSentence(sentence);
                file.write(getXMLWords(taggedSentence, sentenceNum));
                sentenceNum++;
            }

        }
        file.write(System.lineSeparator());
        file.write("<");
        file.write(XMLUtils.escapeElementXML("/Text"));
        file.write(">");
        file.close();
    }
}

From source file:flight_ranker.TaggerDemo.java

public static void main(String[] args) throws Exception {
    //    if (args.length != 2) {
    //      System.err.println("usage: java TaggerDemo modelFile fileToTag");
    //      return;
    //    }/*from  w w  w.  j  a v  a  2  s  .c  o m*/
    MaxentTagger tagger = new MaxentTagger("taggers\\english-left3words-distsim.tagger");
    List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new BufferedReader(new FileReader("G:\\t.txt")));
    for (List<HasWord> sentence : sentences) {
        List<TaggedWord> tSentence = tagger.tagSentence(sentence);
        System.out.println(Sentence.listToString(tSentence, false));
    }
}

From source file:flight_ranker.TaggerDemo2.java

public static void main(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("usage: java TaggerDemo2 modelFile fileToTag");
        return;/*w ww.j  a va 2  s  .co m*/
    }
    MaxentTagger tagger = new MaxentTagger(args[0]);
    TokenizerFactory<CoreLabel> ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(),
            "untokenizable=noneKeep");
    BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8"));
    PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, "utf-8"));
    DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r);
    documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory);
    for (List<HasWord> sentence : documentPreprocessor) {
        List<TaggedWord> tSentence = tagger.tagSentence(sentence);
        pw.println(Sentence.listToString(tSentence, false));
    }

    // print the adjectives in one more sentence. This shows how to get at words and tags in a tagged sentence.
    List<HasWord> sent = Sentence.toWordList("The", "slimy", "slug", "crawled", "over", "the", "long", ",",
            "green", "grass", ".");
    List<TaggedWord> taggedSent = tagger.tagSentence(sent);
    for (TaggedWord tw : taggedSent) {
        if (tw.tag().startsWith("JJ")) {
            pw.println(tw.word());
        }
    }

    pw.close();
}