List of usage examples for edu.stanford.nlp.parser.lexparser LexicalizedParser DEFAULT_PARSER_LOC
String DEFAULT_PARSER_LOC
To view the source code for edu.stanford.nlp.parser.lexparser LexicalizedParser DEFAULT_PARSER_LOC.
Click Source Link
From source file:ie.pars.bnc.preprocess.MainBNCProcess.java
License:Open Source License
private static void getZippedFile() throws IOException, ArchiveException, Exception { String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger"; String parseModel = LexicalizedParser.DEFAULT_PARSER_LOC; InputStream is = new FileInputStream(pathInput); TarArchiveInputStream tarStream = (TarArchiveInputStream) new ArchiveStreamFactory() .createArchiveInputStream("tar", is); TarArchiveEntry entry = null;/* w w w. j ava 2 s .co m*/ int countfiles = 0; while ((entry = (TarArchiveEntry) tarStream.getNextEntry()) != null) { // for(File lf: listFiles){ if (!entry.isDirectory()) { byte[] content = new byte[(int) entry.getSize()]; int offset = 0; tarStream.read(content, offset, content.length - offset); String id = entry.getName().split("/")[entry.getName().split("/").length - 1].split(".xml")[0]; if (!filesProcesed.contains(id) && id.startsWith(letter.toUpperCase())) { if (countfiles++ % 10 == 0) { tagger = new MaxentTagger(taggerPath); m = new Morphology(); parser = ParserGrammar.loadModel(parseModel); parser.loadTagger(); } System.out.print("Entry " + entry.getName()); InputStream bis = new ByteArrayInputStream(content); StringBuilder parseBNCXML = ProcessNLP.parseBNCXML(bis, m, tagger, parser); bis.close(); OutputStream out = new FileOutputStream(pathOutput + File.separatorChar + id + ".vert"); Writer writer = new OutputStreamWriter(out, "UTF-8"); writer.write("<text id=\"" + id + "\">\n"); writer.write(parseBNCXML.toString()); writer.write("</text>\n"); writer.close(); out.close(); } else { System.out.println(">> Bypass Entry " + entry.getName()); } //break; } } is.close(); System.out.println("There are " + countfiles); // tarStream.close(); }