Example usage for org.apache.lucene.index DirectoryReader open

Introduction

In this page you can find the example usage for org.apache.lucene.index DirectoryReader open.

Prototype

public static DirectoryReader open(final IndexCommit commit) throws IOException

Source Link

Document

Expert: returns an IndexReader reading the index in the given IndexCommit .

Usage

From source file:dk.statsbiblioteket.netark.dvenabler.DVReaderTest.java

License:Apache License

public void testLargerDVEnableIndex() throws IOException {
    final int DOCS = 1000;

    log.info("testLargerDVEnableIndex started");

    final File INDEX_SRC = generateIndex(DOCS);
    final File INDEX_DEST = new File("target/testindex.deletefreely.dest");
    try {//w  ww .j  a  v a 2 s  .co m
        IndexUtils.convert(INDEX_SRC, INDEX_DEST, createDVFieldDescriptions(INDEX_SRC));
        IndexReader readerSrc = DirectoryReader.open(MMapDirectory.open(INDEX_SRC));
        IndexReader readerDest = DirectoryReader.open(MMapDirectory.open(INDEX_DEST));

        long multiCount = 0;
        long singleCount = 0;
        long longCount = 0;
        long doubleCount = 0;
        for (int docID = 0; docID < DOCS; docID++) {
            {
                String[] multisSrc = readerSrc.document(docID).getValues(MULTI);
                if (multisSrc != null) {
                    List<String> dvs = getSortedSetDocValues(readerDest, docID, MULTI);
                    Arrays.sort(multisSrc);
                    Collections.sort(dvs);
                    assertEquals("There should be as many DV as stored for field " + MULTI, multisSrc.length,
                            dvs.size());
                    for (int i = 0; i < multisSrc.length; i++) {
                        assertEquals("Value " + i + " for field " + MULTI + " should be equal", multisSrc[i],
                                dvs.get(i));
                        multiCount++;
                    }
                }
            }
            {
                String singleSrc = readerSrc.document(docID).get(SINGLE);
                if (singleSrc != null) {
                    String dv = getSortedDocValue(readerDest, docID, SINGLE);
                    assertEquals("The DV for field " + SINGLE + " should match the stored value", singleSrc,
                            dv);
                    singleCount++;
                }
            }
            {
                IndexableField fieldSrc = readerSrc.document(docID).getField(LONG);
                if (fieldSrc != null) {
                    long longSrc = fieldSrc.numericValue().longValue();
                    long dv = getLongDocValue(readerDest, docID, LONG);
                    assertEquals("The DV for field " + LONG + " should match the stored value", longSrc, dv);
                    longCount++;
                }
            }
            {
                IndexableField fieldSrc = readerSrc.document(docID).getField(DOUBLE);
                if (fieldSrc != null) {
                    double doubleSrc = fieldSrc.numericValue().doubleValue();
                    double dv = getDoubleDocValue(readerDest, docID, DOUBLE);
                    assertEquals("The DV for field " + DOUBLE + " should match the stored value", doubleSrc,
                            dv);
                    doubleCount++;
                }
            }
        }
        assertTrue("There should be at least 1 value for field " + MULTI + " in a document", multiCount > 0);
        assertTrue("There should be at least 1 value for field " + SINGLE + " in a document", singleCount > 0);
        assertTrue("There should be at least 1 value for field " + LONG + " in a document", longCount > 0);
        assertTrue("There should be at least 1 value for field " + DOUBLE + " in a document", doubleCount > 0);
        readerSrc.close();
        readerDest.close();
    } finally {
        delete(INDEX_SRC);
        delete(INDEX_DEST);
    }
}

From source file:dk.statsbiblioteket.netark.dvenabler.DVReaderTest.java

License:Apache License

public static void assertIndexValues(File index, boolean dvExpected) throws IOException, ParseException {
    IndexReader reader = DirectoryReader.open(MMapDirectory.open(index));
    IndexSearcher searcher = new IndexSearcher(reader);
    try {//w  w  w .  j a  va 2  s . c  om
        assertIndexValues(reader, searcher, dvExpected);
    } finally {
        reader.close();
    }
}

From source file:Dl4j.TermInfo.java

public LuceneDocFetcher(Directory dir, ArrayList<String> docIds) throws Exception {

    globalTermId = 0;/*from ww  w  .j a va 2  s .c  o m*/
    termSeen = new HashMap<>();

    IndexReader reader = DirectoryReader.open(dir);
    // totalExamples = reader.numDocs();
    //++Procheta
    totalExamples = docIds.size();
    docWordMaps = new ArrayList<>(totalExamples);

    // build the per-doc word maps
    for (int i = 0; i < totalExamples; i++) {

        IndexSearcher searcher = new IndexSearcher(reader);
        Similarity sm = new DefaultSimilarity();
        searcher.setSimilarity(sm);
        Analyzer analyzer = new KeywordAnalyzer();
        //System.out.println(id);
        QueryParser queryParser = new QueryParser("id", analyzer);
        Query query = queryParser.parse(docIds.get(i));
        TopDocs topDocs = searcher.search(query, 3);
        //System.out.println(query.toString());
        ScoreDoc[] hits = topDocs.scoreDocs;
        // System.out.println(hits.length);
        Document doc = searcher.doc(hits[0].doc);

        docWordMaps.add(buildTerms(reader, hits[0].doc));
    }

    // iterate through the word maps and build the one-hot vectors
    List<DataSet> allDocVecs = new ArrayList<>(totalExamples);
    for (Map<String, TermInfo> docwordMap : docWordMaps) {
        allDocVecs.add(constructTermVector(docwordMap));
    }

    // Merge all doc vecs into one dataset
    this.dataSet = DataSet.merge(allDocVecs);

    reader.close();
}

From source file:Dl4j.TermInfo.java

public LuceneDocFetcher(Directory dir, ArrayList<String> docIds, ArrayList<String> labels) throws Exception {

    globalTermId = 0;//from  www.j  a v  a2 s . co  m
    termSeen = new HashMap<>();

    IndexReader reader = DirectoryReader.open(dir);
    // totalExamples = reader.numDocs();
    //++Procheta
    totalExamples = docIds.size();
    docWordMaps = new ArrayList<>(totalExamples);

    // build the per-doc word maps
    for (int i = 0; i < totalExamples; i++) {

        IndexSearcher searcher = new IndexSearcher(reader);
        Similarity sm = new DefaultSimilarity();
        searcher.setSimilarity(sm);
        Analyzer analyzer = new KeywordAnalyzer();
        //System.out.println(id);
        QueryParser queryParser = new QueryParser("id", analyzer);
        Query query = queryParser.parse(docIds.get(i));
        TopDocs topDocs = searcher.search(query, 3);
        //System.out.println(query.toString());
        ScoreDoc[] hits = topDocs.scoreDocs;
        // System.out.println(hits.length);
        Document doc = searcher.doc(hits[0].doc);

        docWordMaps.add(buildTerms(reader, hits[0].doc));
    }

    // iterate through the word maps and build the one-hot vectors
    List<DataSet> allDocVecs = new ArrayList<>(totalExamples);
    for (Map<String, TermInfo> docwordMap : docWordMaps) {
        allDocVecs.add(constructTermVector(docwordMap, labels));
    }

    // Merge all doc vecs into one dataset
    this.dataSet = DataSet.merge(allDocVecs);

    reader.close();
}

From source file:doc2vec.LuceneDocIterator.java

public LuceneDocIterator(File indexDir, String stopFile, boolean labelsStoredWithWords) throws Exception {
    reader = DirectoryReader.open(FSDirectory.open(indexDir.toPath()));
    docId = 0;/*from  w w w.ja  v  a  2 s  . c  o  m*/
    analyzer = AMIIndexer.constructAnalyzer(stopFile);
    numDocs = reader.numDocs();
    this.labelsStoredWithWords = labelsStoredWithWords;
}