Example usage for edu.stanford.nlp.tagger.maxent MaxentTagger tagSentence

List of usage examples for edu.stanford.nlp.tagger.maxent MaxentTagger tagSentence

Introduction

In this page you can find the example usage for edu.stanford.nlp.tagger.maxent MaxentTagger tagSentence.

Prototype

public List<TaggedWord> tagSentence(List<? extends HasWord> sentence) 

Source Link

Document

Returns a new Sentence that is a copy of the given sentence with all the words tagged with their part-of-speech.

Usage

From source file:Dependency.java

public static void main(String[] args) {
    String modelPath = DependencyParser.DEFAULT_MODEL;
    String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";
    Scanner sc = new Scanner(System.in);

    String text = "";
    text = sc.nextLine();/*w w  w  .ja  v  a2 s.  c o m*/
    // while(text!="exit"){

    MaxentTagger tagger = new MaxentTagger(taggerPath);
    DependencyParser parser = DependencyParser.loadFromModelFile(modelPath);

    DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text));
    for (List<HasWord> sentence : tokenizer) {

        List<TaggedWord> tagged = tagger.tagSentence(sentence);
        Object[] x = tagged.toArray();
        GrammaticalStructure gs = parser.predict(tagged);
        //System.out.println();

        Collection<TypedDependency> s = gs.typedDependenciesCollapsedTree();
        Object[] z = s.toArray();

        System.out.println(tagged.toString());
        String token[] = new String[z.length];
        String pos[] = new String[z.length];
        int k = 0;
        for (Object i : x) {
            String str = i.toString();
            /*String temp0="(.*?)(?=\\/)";
            String temp1="\\/(.*)";
                    
            System.out.println(str);
            Pattern t0 = Pattern.compile("(.*?)(?=\\/)");
            Pattern t1 = Pattern.compile("\\/(.*)");
            Matcher m0 = t0.matcher(str);
            Matcher m1 = t1.matcher(str);*/
            int index = str.lastIndexOf('/');
            token[k] = str.substring(0, index);
            pos[k] = str.substring(index + 1);
            //System.out.println(pos[k]);
            k++;
        }
        String rels[] = new String[z.length];
        String word1[] = new String[z.length];
        String word2[] = new String[z.length];
        int j = 0;
        for (Object i : z) {
            System.out.println(i);
            String temp = i.toString();
            String pattern0 = "(.*)(?=\\()";
            String pattern1 = "(?<=\\()(.*?)(?=-)";
            String pattern2 = "(?<=, )(.*)(?=-)";
            Pattern r0 = Pattern.compile(pattern0);
            Pattern r1 = Pattern.compile(pattern1);
            Pattern r2 = Pattern.compile(pattern2);
            Matcher m0 = r0.matcher(temp);
            Matcher m1 = r1.matcher(temp);
            Matcher m2 = r2.matcher(temp);
            if (m0.find()) {
                rels[j] = m0.group(0);
                //System.out.println(rels[j]);
            }
            if (m1.find()) {
                word1[j] = m1.group(0);
            }
            if (m2.find()) {
                word2[j] = m2.group(0);
            }
            j++;
        }
        //System.out.println(s);
        //Rules for feature extraction.
        //rule1:::::::::::::::::
        //System.out.println("1");
        int[] q = toIntArray(grRecog(rels, "nsubj"));
        //System.out.println("2");
        if (q.length != 0) {
            //System.out.println("3");
            if (posrecog(token, pos, word2[q[0]]).equals("NN")) {
                //System.out.println("4");
                int[] w = toIntArray(grRecog(rels, "compound"));
                //System.out.println("5");
                if (w.length != 0) {
                    System.out.println("6");
                    System.out.println(word1[q[0]] + "," + word2[q[0]] + "," + word2[w[0]]);
                } else {
                    int conj_and_index = compgrRecog(rels, word1, word2, "conj:and", word2[q[0]]);
                    if (conj_and_index != -1) {
                        System.out.println(
                                word1[conj_and_index] + "," + word2[conj_and_index] + "," + word2[q[0]]);
                    } else
                        System.out.println(word1[q[0]] + "," + word2[q[0]]);
                }
            }
            //RULE 2:::::::::::::
            else if (posrecog(token, pos, word1[q[0]]).equals("JJ")) {
                //System.out.println("aaaaa_JJ");
                int a = compgrRecog(rels, word1, word2, "xcomp", word1[q[0]]);
                if (a != -1) {
                    int b = compgrRecog(rels, word1, word2, "dobj", word2[a]);
                    if (b != -1) {
                        int c = compgrRecog(rels, word1, word2, "compound", word2[b]);
                        if (c != -1) {
                            System.out.println(word1[q[0]] + "," + word1[c] + "," + word2[c]);
                        }
                    }
                }
                //RULE 3::::::::::
                else {
                    int b[] = toIntArray(grRecog(rels, "ccomp"));
                    if (b.length != 0) {
                        System.out.println(word1[q[1]] + "," + word2[q[1]] + "," + word1[b[0]]);
                    }

                }
            }
            //RULE 4::::::::::
            else if (posrecog(token, pos, word1[q[0]]).equals("VBZ")) {
                //System.out.println("aaaaa");
                int vbp_dobj_index = compgrRecog(rels, word1, word2, "dobj", word2[q[0]]);
                if (vbp_dobj_index != -1) {
                    System.out.println(word1[vbp_dobj_index] + "," + word2[vbp_dobj_index]);
                } else {
                    int vbp_xcomp_index = compgrRecog(rels, word1, word2, "xcomp", word1[q[0]]);
                    if (vbp_xcomp_index != -1) {

                        System.out.println(word1[vbp_xcomp_index] + "," + word2[vbp_xcomp_index]);
                    } else {
                        int vbp_acomp_index = compgrRecog(rels, word1, word2, "acomp", word1[q[0]]);
                        if (vbp_acomp_index != -1) {

                            System.out.println(
                                    word1[q[0]] + "," + word1[vbp_acomp_index] + "," + word2[vbp_acomp_index]);
                        } else
                            System.out.println(word1[q[0]]);

                    }

                }

            }
            int[] f = toIntArray(grRecog(rels, "amod"));
            if (f.length != 0) {
                for (int i : f) {
                    System.out.println(word1[i] + "," + word2[i]);
                }
                int cj[] = toIntArray(grRecog(rels, "conj:and"));
                if (cj.length != 0) {
                    for (int i : cj) {
                        System.out.println(word1[i] + "," + word2[i]);
                    }
                }
            }
            int[] neg = toIntArray(grRecog(rels, "neg"));
            if (neg.length != 0) {
                for (int i : neg) {
                    System.out.println(word1[i] + "," + word2[i]);
                }

            }

        } else {
            int[] f = toIntArray(grRecog(rels, "amod"));
            if (f.length != 0) {
                for (int i : f) {
                    System.out.print(word1[i] + "," + word2[i]);
                    String qwe = word1[i] + "," + word2[i];
                }
                int cj[] = toIntArray(grRecog(rels, "conj:and"));
                if (cj.length != 0) {
                    for (int i : cj) {
                        System.out.println(word2[i]);

                    }
                }
            }
            int[] neg = toIntArray(grRecog(rels, "neg"));
            if (neg.length != 0) {
                for (int i : neg) {
                    System.out.println(word1[i] + "," + word2[i]);
                }

            }

        }

        //RULE 2:::::::::::::

    }

    //  text=sc.nextLine();
    //}
}

From source file:DependencyParse.java

License:Apache License

public static void main(String[] args) throws Exception {
    Properties props = StringUtils.argsToProperties(args);
    if (!props.containsKey("tokpath") || !props.containsKey("parentpath") || !props.containsKey("relpath")) {
        System.err.println(//  w  ww  . j  av  a 2s  .c o m
                "usage: java DependencyParse -tokenize - -tokpath <tokpath> -parentpath <parentpath> -relpath <relpath>");
        System.exit(1);
    }

    boolean tokenize = false;
    if (props.containsKey("tokenize")) {
        tokenize = true;
    }

    String tokPath = props.getProperty("tokpath");
    String parentPath = props.getProperty("parentpath");
    String relPath = props.getProperty("relpath");

    BufferedWriter tokWriter = new BufferedWriter(new FileWriter(tokPath));
    BufferedWriter parentWriter = new BufferedWriter(new FileWriter(parentPath));
    BufferedWriter relWriter = new BufferedWriter(new FileWriter(relPath));

    MaxentTagger tagger = new MaxentTagger(TAGGER_MODEL);
    DependencyParser parser = DependencyParser.loadFromModelFile(PARSER_MODEL);
    Scanner stdin = new Scanner(System.in);
    int count = 0;
    long start = System.currentTimeMillis();
    while (stdin.hasNextLine()) {
        String line = stdin.nextLine();
        List<HasWord> tokens = new ArrayList<>();
        if (tokenize) {
            PTBTokenizer<Word> tokenizer = new PTBTokenizer(new StringReader(line), new WordTokenFactory(), "");
            for (Word label; tokenizer.hasNext();) {
                tokens.add(tokenizer.next());
            }
        } else {
            for (String word : line.split(" ")) {
                tokens.add(new Word(word));
            }
        }

        List<TaggedWord> tagged = tagger.tagSentence(tokens);

        int len = tagged.size();
        Collection<TypedDependency> tdl = parser.predict(tagged).typedDependencies();
        int[] parents = new int[len];
        for (int i = 0; i < len; i++) {
            // if a node has a parent of -1 at the end of parsing, then the node
            // has no parent.
            parents[i] = -1;
        }

        String[] relns = new String[len];
        for (TypedDependency td : tdl) {
            // let root have index 0
            int child = td.dep().index();
            int parent = td.gov().index();
            relns[child - 1] = td.reln().toString();
            parents[child - 1] = parent;
        }

        // print tokens
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < len - 1; i++) {
            if (tokenize) {
                sb.append(PTBTokenizer.ptbToken2Text(tokens.get(i).word()));
            } else {
                sb.append(tokens.get(i).word());
            }
            sb.append(' ');
        }
        if (tokenize) {
            sb.append(PTBTokenizer.ptbToken2Text(tokens.get(len - 1).word()));
        } else {
            sb.append(tokens.get(len - 1).word());
        }
        sb.append('\n');
        tokWriter.write(sb.toString());

        // print parent pointers
        sb = new StringBuilder();
        for (int i = 0; i < len - 1; i++) {
            sb.append(parents[i]);
            sb.append(' ');
        }
        sb.append(parents[len - 1]);
        sb.append('\n');
        parentWriter.write(sb.toString());

        // print relations
        sb = new StringBuilder();
        for (int i = 0; i < len - 1; i++) {
            sb.append(relns[i]);
            sb.append(' ');
        }
        sb.append(relns[len - 1]);
        sb.append('\n');
        relWriter.write(sb.toString());

        count++;
        if (count % 1000 == 0) {
            double elapsed = (System.currentTimeMillis() - start) / 1000.0;
            System.err.printf("Parsed %d lines (%.2fs)\n", count, elapsed);
        }
    }

    long totalTimeMillis = System.currentTimeMillis() - start;
    System.err.printf("Done: %d lines in %.2fs (%.1fms per line)\n", count, totalTimeMillis / 1000.0,
            totalTimeMillis / (double) count);
    tokWriter.close();
    parentWriter.close();
    relWriter.close();
}

From source file:DependencyParserDemo.java

public static void main(String[] args) {
    String modelPath = DependencyParser.DEFAULT_MODEL;
    String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";

    for (int argIndex = 0; argIndex < args.length;) {
        switch (args[argIndex]) {
        case "-tagger":
            taggerPath = args[argIndex + 1];
            argIndex += 2;/*from  w  w  w .j  a v a  2 s  .c  o  m*/
            break;
        case "-model":
            modelPath = args[argIndex + 1];
            argIndex += 2;
            break;
        default:
            throw new RuntimeException("Unknown argument " + args[argIndex]);
        }
    }

    String text = "I can almost always tell when movies use fake dinosaurs.";

    MaxentTagger tagger = new MaxentTagger(taggerPath);
    DependencyParser parser = DependencyParser.loadFromModelFile(modelPath);

    DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text));
    for (List<HasWord> sentence : tokenizer) {
        List<TaggedWord> tagged = tagger.tagSentence(sentence);
        GrammaticalStructure gs = parser.predict(tagged);

        // Print typed dependencies
        System.err.println(gs);
    }
}

From source file:Dependency2.java

public static void main(String[] args) {
    String modelPath = DependencyParser.DEFAULT_MODEL;
    String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";
    Scanner sc = new Scanner(System.in);

    readCsv();//  www .  j av  a 2  s . co m
    String text = "";
    text = sc.nextLine();
    if (multifeatures(text)) {
        System.out.println("Multiple features present");
        MaxentTagger tagger = new MaxentTagger(taggerPath);
        DependencyParser parser = DependencyParser.loadFromModelFile(modelPath);

        DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text));
        for (List<HasWord> sentence : tokenizer) {
            List<TaggedWord> tagged = tagger.tagSentence(sentence);
            GrammaticalStructure gs = parser.predict(tagged);

            Collection<TypedDependency> s = gs.typedDependenciesCollapsedTree();
            Map<Character, Pair<Character, Character>> map = new HashMap<Character, Pair<Character, Character>>();
            Object[] z = s.toArray();
            String rels[] = new String[z.length];
            String word1[] = new String[z.length];
            String word2[] = new String[z.length];
            int j = 0;
            String f, f1, f2;
            for (Object i : z) {
                //System.out.println(i);
                String temp = i.toString();
                System.out.println(temp);
                String pattern0 = "(.*)(?=\\()";
                String pattern1 = "(?<=\\()(.*?)(?=-)";
                String pattern2 = "(?<=,)(.*)(?=-)";
                Pattern r0 = Pattern.compile(pattern0);
                Pattern r1 = Pattern.compile(pattern1);
                Pattern r2 = Pattern.compile(pattern2);
                Matcher m0 = r0.matcher(temp);
                Matcher m1 = r1.matcher(temp);
                Matcher m2 = r2.matcher(temp);
                if (m0.find())
                    rels[j] = m0.group(0);
                if (m1.find())
                    word1[j] = m1.group(0);
                if (m2.find())
                    word2[j] = m2.group(0);
                if (rels[j].equals("amod")) {
                    f1 = getFeature(word1[j]);
                    f2 = getFeature(word2[j]);
                    f = f1 != null ? (f1) : (f2 != null ? f2 : null);
                    if (f != null) {
                        System.out.println("Feature: " + f);

                    }

                }

                j++;
            }
            //System.out.println(Arrays.toString(rels));
        }
    } else {
        //sentence score is feature score
    }

}

From source file:context.core.task.pos.POSTagger.java

License:Open Source License

/**
 *
 * @param sent/*  w ww  . j a  v a  2  s .  c  o  m*/
 * @param language
 * @return
 */
public static List<TaggedWord> tag(List<CoreLabel> sent, String language) {
    MaxentTagger tagger = getTagger(language);
    //        List<HasWord> sent = Sentence.toWordList("This is a sample text");
    List<TaggedWord> taggedSent = tagger.tagSentence(sent);
    //        for (TaggedWord tw : taggedSent) {
    //            System.out.println(tw.word() + "\t" + tw.tag());
    //        }
    return taggedSent;
}

From source file:context.core.task.stemming.LemmaTagger.java

License:Open Source License

/**
 *
 * @param sent//w w w .j av  a 2  s  .  c  o  m
 * @param language
 * @return
 */
public static List<TaggedWord> lemmatize(List<CoreLabel> sent, String language) {
    MaxentTagger tagger = getTagger(language);
    //        List<HasWord> sent = Sentence.toWordList("This is a sample text");
    List<TaggedWord> taggedSent = tagger.tagSentence(sent);
    for (TaggedWord token : taggedSent) {
        String word = token.word();
        String pos = token.tag();
        String lemma = morphology.lemmatize(new WordTag(word, pos)).lemma();
        token.setTag(lemma);
    }
    //        final List<WordLemmaTag> tagged = (List<WordLemmaTag>) tagger.tagCoreLabelsOrHasWords(sent, morphology, true);
    //        for (TaggedWord tw : taggedSent) {
    //            System.out.println(tw.word() + "\t" + tw.tag());
    //        }
    return taggedSent;
}

From source file:corpusProcessor.TokenCorpusXmlBuilder.java

License:Open Source License

public void characters(char[] buf, int offset, int len) throws SAXException {
    posTokenBuffer = new StringBuffer();
    String s = new String(buf, offset, len);

    if (textBuffer == null) {
        textBuffer = new StringBuffer(s);
    } else {/*w  w w . j a  va2s.c o  m*/
        textBuffer.append(s);
    }
    String model = "/project/nlp/dingcheng/nlplab/models/bidirectional-wsj-0-18.tagger";
    try {
        MaxentTagger tagger = new MaxentTagger(model);
        List<Sentence<? extends HasWord>> sentences = MaxentTagger
                .tokenizeText(new StringReader(textBuffer.toString()));
        for (Sentence<? extends HasWord> sentence : sentences) {
            Sentence<TaggedWord> tSentence = MaxentTagger.tagSentence(sentence);
            //out.append(tSentence.toString(false));
            String[] tokenPosArray = tSentence.toString(false).split(" ");
            //for(int i=0;i<tokenPosArray.length;i++)
            for (String tokenPosStr : tokenPosArray) {
                String[] tokenPosPair = tokenPosStr.split("/");

                posTokenBuffer.append("<TOKEN id=\"" + tokenCount + "\" pos=\"" + tokenPosPair[1] + "\">"
                        + tokenPosPair[0] + "</TOKEN>");
                posTokenBuffer.append("\n");
                //emit("<TOKEN id=\""+tokenCount);
                tokenCount++;
            }

            //posTokenBuffer.append(tSentence.toString(false));
            //System.out.println(tSentence.toString(false));
        }
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

}

From source file:final_dissertation.POStag.java

public static void TextToXml(MaxentTagger mt, String input, String output, HashMap<Integer, String> sentences)
        throws IOException {
    String str;//from  w w w  .j  a v a  2  s . c  om
    File fc = new File("E:\\Disertation\\Reviews\\Texts\\" + input);
    FileWriter file;
    file = new FileWriter("E:\\Disertation\\Reviews\\XMLs\\" + output);
    try (FileReader fl = new FileReader(fc)) {
        BufferedReader bf = new BufferedReader(fl);
        StringReader reader;
        int sentenceNum = 0;
        file.write("<");
        file.write(XMLUtils.escapeElementXML("Text"));
        file.write(">");

        while ((str = bf.readLine()) != null) {
            sentences.put(sentenceNum, str);
            reader = new StringReader(str);
            for (List sentence : MaxentTagger.tokenizeText(reader)) {

                ArrayList<TaggedWord> taggedSentence = mt.tagSentence(sentence);
                file.write(getXMLWords(taggedSentence, sentenceNum));
                sentenceNum++;
            }

        }
        file.write(System.lineSeparator());
        file.write("<");
        file.write(XMLUtils.escapeElementXML("/Text"));
        file.write(">");
        file.close();
    }
}

From source file:flight_ranker.TaggerDemo.java

public static void main(String[] args) throws Exception {
    //    if (args.length != 2) {
    //      System.err.println("usage: java TaggerDemo modelFile fileToTag");
    //      return;
    //    }/*from  w w  w.  j  a v  a  2  s  .c  o m*/
    MaxentTagger tagger = new MaxentTagger("taggers\\english-left3words-distsim.tagger");
    List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new BufferedReader(new FileReader("G:\\t.txt")));
    for (List<HasWord> sentence : sentences) {
        List<TaggedWord> tSentence = tagger.tagSentence(sentence);
        System.out.println(Sentence.listToString(tSentence, false));
    }
}

From source file:flight_ranker.TaggerDemo2.java

public static void main(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("usage: java TaggerDemo2 modelFile fileToTag");
        return;/*w ww.j  a va 2  s  .co m*/
    }
    MaxentTagger tagger = new MaxentTagger(args[0]);
    TokenizerFactory<CoreLabel> ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(),
            "untokenizable=noneKeep");
    BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8"));
    PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, "utf-8"));
    DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r);
    documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory);
    for (List<HasWord> sentence : documentPreprocessor) {
        List<TaggedWord> tSentence = tagger.tagSentence(sentence);
        pw.println(Sentence.listToString(tSentence, false));
    }

    // print the adjectives in one more sentence. This shows how to get at words and tags in a tagged sentence.
    List<HasWord> sent = Sentence.toWordList("The", "slimy", "slug", "crawled", "over", "the", "long", ",",
            "green", "grass", ".");
    List<TaggedWord> taggedSent = tagger.tagSentence(sent);
    for (TaggedWord tw : taggedSent) {
        if (tw.tag().startsWith("JJ")) {
            pw.println(tw.word());
        }
    }

    pw.close();
}