List of usage examples for org.apache.lucene.index IndexWriter getDirectory
public Directory getDirectory()
From source file:retriever.TermStats.java
Directory buildTemporalIndex(int refDocId) throws Exception { Directory ramdir = new RAMDirectory(); IndexWriterConfig iwcfg = new IndexWriterConfig(new StandardAnalyzer()); iwcfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter writer = new IndexWriter(ramdir, iwcfg); // Get all documents from the current month of the year getSubsetToSearch(refDocId, writer); writer.commit();/*from w w w.j av a 2s . co m*/ writer.close(); return writer.getDirectory(); }
From source file:ro.uaic.info.nlptools.corpus.IndexedLuceneCorpus.java
License:Apache License
public static IndexedLuceneCorpus CreateIndexFromXmlFiles(File indexedCorpusFolder, File xmlData, String sentElem) throws Exception { if (!xmlData.exists()) throw new FileNotFoundException(); Analyzer analyzer = new KeywordAnalyzer(); rmdir(Paths.get(indexedCorpusFolder.toString(), "tokensTemp").toFile()); rmdir(Paths.get(indexedCorpusFolder.toString(), "sentencesTemp").toFile()); rmdir(Paths.get(indexedCorpusFolder.toString(), "annotationsTemp").toFile()); rmdir(Paths.get(indexedCorpusFolder.toString(), "tokens").toFile()); rmdir(Paths.get(indexedCorpusFolder.toString(), "sentences").toFile()); rmdir(Paths.get(indexedCorpusFolder.toString(), "annotations").toFile()); Paths.get(indexedCorpusFolder.toString(), "tokensTemp").toFile().mkdirs(); Paths.get(indexedCorpusFolder.toString(), "sentencesTemp").toFile().mkdirs(); Paths.get(indexedCorpusFolder.toString(), "annotationsTemp").toFile().mkdirs(); Paths.get(indexedCorpusFolder.toString(), "tokens").toFile().mkdirs(); Paths.get(indexedCorpusFolder.toString(), "sentences").toFile().mkdirs(); Paths.get(indexedCorpusFolder.toString(), "annotations").toFile().mkdirs(); FSDirectory tempTokensIndex = FSDirectory.open(Paths.get(indexedCorpusFolder.toString(), "tokensTemp")); IndexWriter tokensWriter = new IndexWriter(tempTokensIndex, new IndexWriterConfig(analyzer)); FSDirectory tempSentencesIndex = FSDirectory .open(Paths.get(indexedCorpusFolder.toString(), "sentencesTemp")); IndexWriter sentencesWriter = new IndexWriter(tempSentencesIndex, new IndexWriterConfig(analyzer)); FSDirectory tempAnnotationsIndex = FSDirectory .open(Paths.get(indexedCorpusFolder.toString(), "annotationsTemp")); IndexWriter annotationsWriter = new IndexWriter(tempAnnotationsIndex, new IndexWriterConfig(analyzer)); SAXParserFactory saxParserFactory = SAXParserFactory.newInstance(); SAXParser parser = saxParserFactory.newSAXParser(); long time = System.currentTimeMillis(); InputTextParserHandler handler = new InputTextParserHandler(tokensWriter, sentencesWriter, annotationsWriter, sentElem); if (xmlData.isFile()) parser.parse(xmlData, handler);/*from www .ja va 2 s . c o m*/ else for (File child : xmlData.listFiles()) if (child.isFile()) parser.parse(child, handler); annotationsWriter.close(); tokensWriter.close(); sentencesWriter.close(); UpdateInterIndexReferences(indexedCorpusFolder, new IndexSearcher(DirectoryReader.open(annotationsWriter.getDirectory())), new IndexSearcher(DirectoryReader.open(tokensWriter.getDirectory())), new IndexSearcher(DirectoryReader.open(sentencesWriter.getDirectory())), analyzer); System.out.println("Indexed xml files corpus:\n" + (System.currentTimeMillis() - time)); rmdir(Paths.get(indexedCorpusFolder.toString(), "tokensTemp").toFile()); rmdir(Paths.get(indexedCorpusFolder.toString(), "sentencesTemp").toFile()); rmdir(Paths.get(indexedCorpusFolder.toString(), "annotationsTemp").toFile()); return new IndexedLuceneCorpus(indexedCorpusFolder); }
From source file:ro.uaic.info.nlptools.corpus.IndexedLuceneCorpus.java
License:Apache License
private static void UpdateInterIndexReferences(File indexFolder, IndexSearcher tempAnnotationSearcher, IndexSearcher tempTokenSearcher, IndexSearcher tempSentenceSearcher, Analyzer analyzer) throws IOException { List<Integer> annotations; IndexWriter annotationWriter = new IndexWriter( FSDirectory.open(Paths.get(indexFolder.toString(), "annotations")), new IndexWriterConfig(analyzer)); for (int i = 0; i < tempAnnotationSearcher.getIndexReader().numDocs(); i++) { Document doc = tempAnnotationSearcher.doc(i); Document newDoc = new Document(); for (IndexableField f : doc.getFields()) { if (f.name().equals("GGS:StartTokenRefId")) newDoc.add(new IntField("GGS:StartTokenIndex", tempTokenSearcher .search(new TermQuery(new Term("GGS:RefId", f.stringValue())), 1).scoreDocs[0].doc, Field.Store.YES)); else if (f.name().equals("GGS:EndTokenRefId")) newDoc.add(new IntField("GGS:EndTokenIndex", tempTokenSearcher .search(new TermQuery(new Term("GGS:RefId", f.stringValue())), 1).scoreDocs[0].doc, Field.Store.YES)); else//ww w .j ava 2 s .com newDoc.add(f); } annotationWriter.addDocument(newDoc); } annotationWriter.close(); tempAnnotationSearcher = new IndexSearcher(DirectoryReader.open(annotationWriter.getDirectory())); Map<Integer, List<Integer>> toksAnnotations = new HashMap<>(); Map<Integer, List<Integer>> sentsAnnotations = new HashMap<>(); for (int i = 0; i < tempAnnotationSearcher.getIndexReader().numDocs(); i++) { Document doc = tempAnnotationSearcher.doc(i); int start = doc.getField("GGS:StartTokenIndex").numericValue().intValue(); int end = doc.getField("GGS:EndTokenIndex").numericValue().intValue(); for (int j = start; j <= end; j++) { annotations = toksAnnotations.get(j); if (annotations == null) { annotations = new ArrayList<>(); toksAnnotations.put(j, annotations); } annotations.add(i); } int sentIndex = tempTokenSearcher.doc(start).getField("GGS:Sentence").numericValue().intValue(); annotations = sentsAnnotations.get(sentIndex); if (annotations == null) { annotations = new ArrayList<>(); sentsAnnotations.put(sentIndex, annotations); } annotations.add(i); } IndexWriter tokenWriter = new IndexWriter(FSDirectory.open(Paths.get(indexFolder.toString(), "tokens")), new IndexWriterConfig(analyzer)); for (int i = 0; i < tempTokenSearcher.getIndexReader().numDocs(); i++) { Document doc = tempTokenSearcher.doc(i); Document newDoc = new Document(); for (IndexableField f : doc.getFields()) { newDoc.add(f); } annotations = toksAnnotations.get(i); if (annotations != null) { for (int k : annotations) newDoc.add(new IntField("GGS:SpanAnnotation", k, Field.Store.YES)); } tokenWriter.addDocument(newDoc); } tokenWriter.close(); IndexWriter sentenceWriter = new IndexWriter( FSDirectory.open(Paths.get(indexFolder.toString(), "sentences")), new IndexWriterConfig(analyzer)); for (int i = 0; i < tempSentenceSearcher.getIndexReader().numDocs(); i++) { Document doc = tempSentenceSearcher.doc(i); Document newDoc = new Document(); for (IndexableField f : doc.getFields()) { newDoc.add(f); } annotations = sentsAnnotations.get(i); if (annotations != null) { for (int k : annotations) newDoc.add(new IntField("GGS:SpanAnnotation", k, Field.Store.YES)); } sentenceWriter.addDocument(newDoc); } sentenceWriter.close(); tempTokenSearcher.getIndexReader().close(); tempAnnotationSearcher.getIndexReader().close(); tempSentenceSearcher.getIndexReader().close(); }
From source file:util.IndexTools.java
License:Open Source License
public IndexTools(IndexWriter indexWriter) { try {/*from w w w. j a v a2 s.c o m*/ ir = indexWriter.getReader(); is = new IndexSearcher(ir); } catch (IOException e) { AppLogger.error.log(Level.SEVERE, "Could not access index at location " + indexWriter.getDirectory() + " for reading"); throw new RuntimeException("Exiting application", e); } }