Example usage for edu.stanford.nlp.parser.lexparser LexicalizedParser DEFAULT_PARSER_LOC

List of usage examples for edu.stanford.nlp.parser.lexparser LexicalizedParser DEFAULT_PARSER_LOC

Introduction

In this page you can find the example usage for edu.stanford.nlp.parser.lexparser LexicalizedParser DEFAULT_PARSER_LOC.

Prototype

String DEFAULT_PARSER_LOC

To view the source code for edu.stanford.nlp.parser.lexparser LexicalizedParser DEFAULT_PARSER_LOC.

Click Source Link

Usage

From source file:ie.pars.bnc.preprocess.MainBNCProcess.java

License:Open Source License

private static void getZippedFile() throws IOException, ArchiveException, Exception {
    String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";
    String parseModel = LexicalizedParser.DEFAULT_PARSER_LOC;

    InputStream is = new FileInputStream(pathInput);
    TarArchiveInputStream tarStream = (TarArchiveInputStream) new ArchiveStreamFactory()
            .createArchiveInputStream("tar", is);
    TarArchiveEntry entry = null;/*  w w w. j  ava 2  s .co m*/
    int countfiles = 0;
    while ((entry = (TarArchiveEntry) tarStream.getNextEntry()) != null) {
        //     for(File lf: listFiles){ 
        if (!entry.isDirectory()) {

            byte[] content = new byte[(int) entry.getSize()];
            int offset = 0;
            tarStream.read(content, offset, content.length - offset);
            String id = entry.getName().split("/")[entry.getName().split("/").length - 1].split(".xml")[0];

            if (!filesProcesed.contains(id) && id.startsWith(letter.toUpperCase())) {
                if (countfiles++ % 10 == 0) {
                    tagger = new MaxentTagger(taggerPath);
                    m = new Morphology();
                    parser = ParserGrammar.loadModel(parseModel);
                    parser.loadTagger();
                }
                System.out.print("Entry " + entry.getName());

                InputStream bis = new ByteArrayInputStream(content);
                StringBuilder parseBNCXML = ProcessNLP.parseBNCXML(bis, m, tagger, parser);
                bis.close();
                OutputStream out = new FileOutputStream(pathOutput + File.separatorChar + id + ".vert");
                Writer writer = new OutputStreamWriter(out, "UTF-8");

                writer.write("<text id=\"" + id + "\">\n");
                writer.write(parseBNCXML.toString());
                writer.write("</text>\n");
                writer.close();
                out.close();
            } else {
                System.out.println(">> Bypass Entry " + entry.getName());
            }
            //break;
        }

    }
    is.close();
    System.out.println("There are " + countfiles);
    //    tarStream.close();

}