Example usage for org.apache.lucene.index IndexWriter getDirectory

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter getDirectory.

Prototype

public Directory getDirectory()

Source Link

Document

Returns the Directory used by this index.

Usage

From source file:retriever.TermStats.java

Directory buildTemporalIndex(int refDocId) throws Exception {
    Directory ramdir = new RAMDirectory();
    IndexWriterConfig iwcfg = new IndexWriterConfig(new StandardAnalyzer());
    iwcfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(ramdir, iwcfg);

    // Get all documents from the current month of the year
    getSubsetToSearch(refDocId, writer);

    writer.commit();/*from   w w  w.j av a  2s  .  co  m*/
    writer.close();
    return writer.getDirectory();
}

From source file:ro.uaic.info.nlptools.corpus.IndexedLuceneCorpus.java

License:Apache License

public static IndexedLuceneCorpus CreateIndexFromXmlFiles(File indexedCorpusFolder, File xmlData,
        String sentElem) throws Exception {
    if (!xmlData.exists())
        throw new FileNotFoundException();
    Analyzer analyzer = new KeywordAnalyzer();

    rmdir(Paths.get(indexedCorpusFolder.toString(), "tokensTemp").toFile());
    rmdir(Paths.get(indexedCorpusFolder.toString(), "sentencesTemp").toFile());
    rmdir(Paths.get(indexedCorpusFolder.toString(), "annotationsTemp").toFile());
    rmdir(Paths.get(indexedCorpusFolder.toString(), "tokens").toFile());
    rmdir(Paths.get(indexedCorpusFolder.toString(), "sentences").toFile());
    rmdir(Paths.get(indexedCorpusFolder.toString(), "annotations").toFile());

    Paths.get(indexedCorpusFolder.toString(), "tokensTemp").toFile().mkdirs();
    Paths.get(indexedCorpusFolder.toString(), "sentencesTemp").toFile().mkdirs();
    Paths.get(indexedCorpusFolder.toString(), "annotationsTemp").toFile().mkdirs();
    Paths.get(indexedCorpusFolder.toString(), "tokens").toFile().mkdirs();
    Paths.get(indexedCorpusFolder.toString(), "sentences").toFile().mkdirs();
    Paths.get(indexedCorpusFolder.toString(), "annotations").toFile().mkdirs();

    FSDirectory tempTokensIndex = FSDirectory.open(Paths.get(indexedCorpusFolder.toString(), "tokensTemp"));
    IndexWriter tokensWriter = new IndexWriter(tempTokensIndex, new IndexWriterConfig(analyzer));
    FSDirectory tempSentencesIndex = FSDirectory
            .open(Paths.get(indexedCorpusFolder.toString(), "sentencesTemp"));
    IndexWriter sentencesWriter = new IndexWriter(tempSentencesIndex, new IndexWriterConfig(analyzer));
    FSDirectory tempAnnotationsIndex = FSDirectory
            .open(Paths.get(indexedCorpusFolder.toString(), "annotationsTemp"));
    IndexWriter annotationsWriter = new IndexWriter(tempAnnotationsIndex, new IndexWriterConfig(analyzer));

    SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
    SAXParser parser = saxParserFactory.newSAXParser();
    long time = System.currentTimeMillis();
    InputTextParserHandler handler = new InputTextParserHandler(tokensWriter, sentencesWriter,
            annotationsWriter, sentElem);
    if (xmlData.isFile())
        parser.parse(xmlData, handler);/*from  www  .ja  va 2  s  . c  o m*/
    else
        for (File child : xmlData.listFiles())
            if (child.isFile())
                parser.parse(child, handler);
    annotationsWriter.close();
    tokensWriter.close();
    sentencesWriter.close();
    UpdateInterIndexReferences(indexedCorpusFolder,
            new IndexSearcher(DirectoryReader.open(annotationsWriter.getDirectory())),
            new IndexSearcher(DirectoryReader.open(tokensWriter.getDirectory())),
            new IndexSearcher(DirectoryReader.open(sentencesWriter.getDirectory())), analyzer);

    System.out.println("Indexed xml files corpus:\n" + (System.currentTimeMillis() - time));

    rmdir(Paths.get(indexedCorpusFolder.toString(), "tokensTemp").toFile());
    rmdir(Paths.get(indexedCorpusFolder.toString(), "sentencesTemp").toFile());
    rmdir(Paths.get(indexedCorpusFolder.toString(), "annotationsTemp").toFile());

    return new IndexedLuceneCorpus(indexedCorpusFolder);
}

From source file:ro.uaic.info.nlptools.corpus.IndexedLuceneCorpus.java

License:Apache License

private static void UpdateInterIndexReferences(File indexFolder, IndexSearcher tempAnnotationSearcher,
        IndexSearcher tempTokenSearcher, IndexSearcher tempSentenceSearcher, Analyzer analyzer)
        throws IOException {
    List<Integer> annotations;

    IndexWriter annotationWriter = new IndexWriter(
            FSDirectory.open(Paths.get(indexFolder.toString(), "annotations")),
            new IndexWriterConfig(analyzer));
    for (int i = 0; i < tempAnnotationSearcher.getIndexReader().numDocs(); i++) {
        Document doc = tempAnnotationSearcher.doc(i);
        Document newDoc = new Document();
        for (IndexableField f : doc.getFields()) {
            if (f.name().equals("GGS:StartTokenRefId"))
                newDoc.add(new IntField("GGS:StartTokenIndex", tempTokenSearcher
                        .search(new TermQuery(new Term("GGS:RefId", f.stringValue())), 1).scoreDocs[0].doc,
                        Field.Store.YES));
            else if (f.name().equals("GGS:EndTokenRefId"))
                newDoc.add(new IntField("GGS:EndTokenIndex", tempTokenSearcher
                        .search(new TermQuery(new Term("GGS:RefId", f.stringValue())), 1).scoreDocs[0].doc,
                        Field.Store.YES));
            else//ww w  .j  ava  2 s  .com
                newDoc.add(f);
        }
        annotationWriter.addDocument(newDoc);
    }
    annotationWriter.close();
    tempAnnotationSearcher = new IndexSearcher(DirectoryReader.open(annotationWriter.getDirectory()));

    Map<Integer, List<Integer>> toksAnnotations = new HashMap<>();
    Map<Integer, List<Integer>> sentsAnnotations = new HashMap<>();

    for (int i = 0; i < tempAnnotationSearcher.getIndexReader().numDocs(); i++) {
        Document doc = tempAnnotationSearcher.doc(i);
        int start = doc.getField("GGS:StartTokenIndex").numericValue().intValue();
        int end = doc.getField("GGS:EndTokenIndex").numericValue().intValue();
        for (int j = start; j <= end; j++) {
            annotations = toksAnnotations.get(j);
            if (annotations == null) {
                annotations = new ArrayList<>();
                toksAnnotations.put(j, annotations);
            }
            annotations.add(i);
        }

        int sentIndex = tempTokenSearcher.doc(start).getField("GGS:Sentence").numericValue().intValue();
        annotations = sentsAnnotations.get(sentIndex);
        if (annotations == null) {
            annotations = new ArrayList<>();
            sentsAnnotations.put(sentIndex, annotations);
        }
        annotations.add(i);
    }

    IndexWriter tokenWriter = new IndexWriter(FSDirectory.open(Paths.get(indexFolder.toString(), "tokens")),
            new IndexWriterConfig(analyzer));

    for (int i = 0; i < tempTokenSearcher.getIndexReader().numDocs(); i++) {
        Document doc = tempTokenSearcher.doc(i);
        Document newDoc = new Document();
        for (IndexableField f : doc.getFields()) {
            newDoc.add(f);
        }

        annotations = toksAnnotations.get(i);
        if (annotations != null) {
            for (int k : annotations)
                newDoc.add(new IntField("GGS:SpanAnnotation", k, Field.Store.YES));
        }
        tokenWriter.addDocument(newDoc);
    }
    tokenWriter.close();

    IndexWriter sentenceWriter = new IndexWriter(
            FSDirectory.open(Paths.get(indexFolder.toString(), "sentences")), new IndexWriterConfig(analyzer));
    for (int i = 0; i < tempSentenceSearcher.getIndexReader().numDocs(); i++) {
        Document doc = tempSentenceSearcher.doc(i);
        Document newDoc = new Document();
        for (IndexableField f : doc.getFields()) {
            newDoc.add(f);
        }

        annotations = sentsAnnotations.get(i);
        if (annotations != null) {
            for (int k : annotations)
                newDoc.add(new IntField("GGS:SpanAnnotation", k, Field.Store.YES));
        }
        sentenceWriter.addDocument(newDoc);
    }
    sentenceWriter.close();

    tempTokenSearcher.getIndexReader().close();
    tempAnnotationSearcher.getIndexReader().close();
    tempSentenceSearcher.getIndexReader().close();
}

From source file:util.IndexTools.java

License:Open Source License

public IndexTools(IndexWriter indexWriter) {
    try {/*from  w w w.  j  a v  a2  s.c o m*/
        ir = indexWriter.getReader();
        is = new IndexSearcher(ir);
    } catch (IOException e) {
        AppLogger.error.log(Level.SEVERE,
                "Could not access index at location " + indexWriter.getDirectory() + " for reading");
        throw new RuntimeException("Exiting application", e);
    }
}