Example usage for org.apache.lucene.index DirectoryReader open

List of usage examples for org.apache.lucene.index DirectoryReader open

Introduction

In this page you can find the example usage for org.apache.lucene.index DirectoryReader open.

Prototype

public static DirectoryReader open(final IndexCommit commit, Map<String, String> readerAttributes)
        throws IOException 

Source Link

Document

Expert: returns an IndexReader reading the index in the given IndexCommit .

Usage

From source file:at.ac.univie.mminf.luceneSKOS.test.SKOSURIFilterTest.java

License:Apache License

@Test
public void singleUriExpansionWithUnstoredField() throws IOException {
    Document doc = new Document();
    doc.add(new Field("subject", "http://example.com/concept/1", TextField.TYPE_NOT_STORED));
    writer.addDocument(doc);//  w w  w .  j  a v  a 2  s .c o m
    searcher = new IndexSearcher(DirectoryReader.open(writer, false));
    Query query = new TermQuery(new Term("subject", "jumps"));
    TopDocs results = searcher.search(query, 10);
    assertEquals(1, results.totalHits);

    Document indexDoc = searcher.doc(results.scoreDocs[0].doc);
    String[] fieldValues = indexDoc.getValues("subject");
    assertEquals(0, fieldValues.length);
}

From source file:at.ac.univie.mminf.luceneSKOS.test.SKOSURIFilterTest.java

License:Apache License

@Test
public void multipleURIExpansion() throws IOException {
    Document doc = new Document();
    doc.add(new Field("subject", "http://example.com/concept/1", TextField.TYPE_STORED));
    doc.add(new Field("subject", "http://example.com/concept/2", TextField.TYPE_STORED));
    writer.addDocument(doc);//from  w w w  .ja  v a 2s . com
    searcher = new IndexSearcher(DirectoryReader.open(writer, false));

    // querying for alternative term of concept 1
    Query query = new TermQuery(new Term("subject", "hops"));
    TopDocs results = searcher.search(query, 10);
    assertEquals(1, results.totalHits);
    Document indexDoc = searcher.doc(results.scoreDocs[0].doc);
    String[] fieldValues = indexDoc.getValues("subject");
    assertEquals(2, fieldValues.length);

    // querying for alternative term of concept 2
    query = new TermQuery(new Term("subject", "speedy"));
    results = searcher.search(query, 10);
    assertEquals(1, results.totalHits);

    indexDoc = searcher.doc(results.scoreDocs[0].doc);
    fieldValues = indexDoc.getValues("subject");
    assertEquals(2, fieldValues.length);
}

From source file:at.ac.univie.mminf.luceneSKOS.test.termexpansion.AbstractTermExpansionTest.java

License:Apache License

/**
 * This test indexes a sample metadata record (=lucene document) having a
 * "title", "description", and "subject" field, which contains plain subject
 * terms.//from  w w  w . j a va 2s . c  o  m
 * <p/>
 * A search for "arms" doesn't return that record because the term "arms" is
 * not explicitly contained in the record (document).
 *
 * @throws IOException
 * @throws LockObtainFailedException
 * @throws CorruptIndexException
 */
@Test
public void noExpansion() throws IOException {

    /* defining the document to be indexed */
    Document doc = new Document();
    doc.add(new Field("title", "Spearhead", TextField.TYPE_STORED));
    doc.add(new Field("description",
            "Roman iron spearhead. The spearhead was attached to one end of a wooden shaft..."
                    + "The spear was mainly a thrusting weapon, but could also be thrown. "
                    + "It was the principal weapon of the auxiliary soldier... "
                    + "(second - fourth century, Arbeia Roman Fort).",
            TextField.TYPE_NOT_STORED));
    doc.add(new Field("subject", "weapons", TextField.TYPE_NOT_STORED));

    /* setting up a writer with a default (simple) analyzer */
    writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(new SimpleAnalyzer()));

    /* adding the document to the index */
    writer.addDocument(doc);

    /* defining a query that searches over all fields */
    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    builder.add(new TermQuery(new Term("title", "arms")), BooleanClause.Occur.SHOULD)
            .add(new TermQuery(new Term("description", "arms")), BooleanClause.Occur.SHOULD)
            .add(new TermQuery(new Term("subject", "arms")), BooleanClause.Occur.SHOULD);

    /* creating a new searcher */
    searcher = new IndexSearcher(DirectoryReader.open(writer, false));

    TopDocs results = searcher.search(builder.build(), 10);

    /* no results are returned since there is no term match */
    assertEquals(0, results.totalHits);
}

From source file:at.ac.univie.mminf.luceneSKOS.test.termexpansion.LabelbasedTermExpansionTest.java

License:Apache License

/**
 * This test indexes a sample metadata record (=lucene document) having a
 * "title", "description", and "subject" field.
 * <p/>/*from   w  w w.  j a va2  s  . com*/
 * A search for "arms" returns that record as a result because "arms" is
 * defined as an alternative label for "weapons", the term which is
 * contained in the subject field.
 *
 * @throws IOException
 */
@Test
public void labelBasedTermExpansion() throws IOException {

    /* defining the document to be indexed */
    Document doc = new Document();
    doc.add(new Field("title", "Spearhead", TextField.TYPE_STORED));
    doc.add(new Field("description",
            "Roman iron spearhead. The spearhead was attached to one end of a wooden shaft..."
                    + "The spear was mainly a thrusting weapon, but could also be thrown. "
                    + "It was the principal weapon of the auxiliary soldier... "
                    + "(second - fourth century, Arbeia Roman Fort).",
            TextField.TYPE_NOT_STORED));
    doc.add(new Field("subject", "weapons", TextField.TYPE_NOT_STORED));

    /* setting up the SKOS analyzer */
    String skosFile = "src/test/resources/skos_samples/ukat_examples.n3";
    String indexPath = "build/";

    /* ExpansionType.URI->the field to be analyzed (expanded) contains URIs */
    Analyzer skosAnalyzer = new SKOSAnalyzer(indexPath, skosFile, ExpansionType.LABEL);

    /* Define different analyzers for different fields */
    Map<String, Analyzer> analyzerPerField = new HashMap<>();
    analyzerPerField.put("subject", skosAnalyzer);
    PerFieldAnalyzerWrapper indexAnalyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(), analyzerPerField);

    /* setting up a writer with a default (simple) analyzer */
    writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(indexAnalyzer));

    /* adding the document to the index */
    writer.addDocument(doc);

    /* defining a query that searches over all fields */
    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    builder.add(new TermQuery(new Term("title", "arms")), BooleanClause.Occur.SHOULD)
            .add(new TermQuery(new Term("description", "arms")), BooleanClause.Occur.SHOULD)
            .add(new TermQuery(new Term("subject", "arms")), BooleanClause.Occur.SHOULD);

    /* creating a new searcher */
    searcher = new IndexSearcher(DirectoryReader.open(writer, false));

    TopDocs results = searcher.search(builder.build(), 10);

    /* the document matches because "arms" is among the expanded terms */
    assertEquals(1, results.totalHits);

    /* defining a query that searches for a broader concept */
    Query query = new TermQuery(new Term("subject", "military equipment"));

    results = searcher.search(query, 10);

    /* ... also returns the document as result */
    assertEquals(1, results.totalHits);
}

From source file:at.ac.univie.mminf.luceneSKOS.test.termexpansion.URIbasedTermExpansionTest.java

License:Apache License

/**
 * This test indexes a sample metadata record (=lucene document) having a
 * "title", "description", and "subject" field, which is semantically
 * enriched by a URI pointing to a SKOS concept "weapons".
 * <p/>//from www. j  a  v a  2s.  com
 * A search for "arms" returns that record as a result because "arms" is
 * defined as an alternative label (altLabel) for the concept "weapons".
 *
 * @throws IOException
 */
@Test
public void uriBasedTermExpansion() throws IOException {

    /* defining the document to be indexed */
    Document doc = new Document();
    doc.add(new Field("title", "Spearhead", TextField.TYPE_STORED));
    doc.add(new Field("description",
            "Roman iron spearhead. The spearhead was attached to one end of a wooden shaft..."
                    + "The spear was mainly a thrusting weapon, but could also be thrown. "
                    + "It was the principal weapon of the auxiliary soldier... "
                    + "(second - fourth century, Arbeia Roman Fort).",
            TextField.TYPE_NOT_STORED));
    doc.add(new Field("subject", "http://www.ukat.org.uk/thesaurus/concept/859", TextField.TYPE_NOT_STORED));

    /* setting up the SKOS analyzer */
    String skosFile = "src/test/resources/skos_samples/ukat_examples.n3";
    String indexPath = "build/";

    /* ExpansionType.URI->the field to be analyzed (expanded) contains URIs */
    Analyzer skosAnalyzer = new SKOSAnalyzer(indexPath, skosFile, ExpansionType.URI);

    /* Define different analyzers for different fields */
    Map<String, Analyzer> analyzerPerField = new HashMap<>();
    analyzerPerField.put("subject", skosAnalyzer);
    PerFieldAnalyzerWrapper indexAnalyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(), analyzerPerField);

    /* setting up a writer with a default (simple) analyzer */
    writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(indexAnalyzer));

    /* adding the document to the index */
    writer.addDocument(doc);

    /* defining a query that searches over all fields */
    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    builder.add(new TermQuery(new Term("title", "arms")), BooleanClause.Occur.SHOULD)
            .add(new TermQuery(new Term("description", "arms")), BooleanClause.Occur.SHOULD)
            .add(new TermQuery(new Term("subject", "arms")), BooleanClause.Occur.SHOULD);

    /* creating a new searcher */
    searcher = new IndexSearcher(DirectoryReader.open(writer, false));

    TopDocs results = searcher.search(builder.build(), 10);

    /* the document matches because "arms" is among the expanded terms */
    assertEquals(1, results.totalHits);

    /* defining a query that searches for a broader concept */
    Query query = new TermQuery(new Term("subject", "military equipment"));

    results = searcher.search(query, 10);

    /* ... also returns the document as result */
    assertEquals(1, results.totalHits);

}

From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.newsFetch.storm.bolts.LuceneIndexBolt.java

License:Apache License

@Override
public void execute(Tuple input) {
    try {/*from w ww.  jav  a  2s .com*/
        logger.info("New item to add to lucene index");

        // input newsitem
        NewsItem item = (NewsItem) input.getValueByField(StreamIDs.NEWSARTICLEWITHCONTENT);
        termExtract.setAnalyzer(LanguageAnalyzerHelper.getInstance().getAnalyzer(item.getLocale()));
        try (DirectoryReader reader = DirectoryReader.open(writer, true)) {
            termExtract.addTopTerms(item, reader);
        }

        // Convert to lucene document and add to index
        Document doc = NewsItemLuceneDocConverter.newsItemToDocument(item);
        writer.addDocument(doc);
        writer.commit();

        logger.info("emitting " + item.getTerms().size() + " terms");
        for (String term : item.getTerms().keySet()) {
            collector.emit(StreamIDs.TERMSTREAM, new Values(term));
        }

        logger.info("New item in Lucene index");

    } catch (IOException ex) {
        logger.error(ex);
    }
    collector.ack(input);

}

From source file:cn.hbu.cs.esearch.store.LuceneStore.java

License:Apache License

private void updateReader() throws IOException {

    IndexReader oldReader = null;//from  w w w . j a v a  2 s .c om

    if (currentReaderData != null) {
        oldReader = currentReaderData.reader;
    }

    IndexReader idxReader = DirectoryReader.open(indexWriter, true);

    // if reader did not change, no updates were applied, not need to refresh
    if (idxReader == oldReader) {
        return;
    }

    ReaderData readerData = new ReaderData(idxReader);
    currentReaderData = readerData;
    if (oldReaderData != null) {
        ReaderData tmpOld = oldReaderData;
        oldReaderData = currentReaderData;
        tmpOld.close();
    }
    currentReaderData = readerData;
}

From source file:collene.Freedb.java

License:Apache License

public static void BuildIndex(Directory directory) throws Exception {
    String freedbPath = "/Users/gdusbabek/Downloads/freedb-complete-20140701.tar.bz2";

    if (directory == null) {
        System.out.println("Need to specify: { memory | file | cassandra }. Did you misspell something?");
        System.exit(-1);//  w  w w  .  j  a  v a2  s.  c o  m
    }

    FreeDbReader reader = new FreeDbReader(new File(freedbPath), 50000);
    reader.start();

    long indexStart = System.currentTimeMillis();
    Collection<Document> documents = new ArrayList<Document>(BATCH_SIZE);
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(directory, config);

    // stop after this many documents.
    final int maxDocuments = 400000; //Integer.MAX_VALUE;

    FreeDbEntry entry = reader.next();
    int count = 0;
    while (entry != null && count < maxDocuments) {
        Document doc = new Document();
        String any = entry.toString();
        doc.add(new Field("any", any, TextField.TYPE_STORED));
        doc.add(new Field("artist", entry.getArtist(), TextField.TYPE_NOT_STORED));
        doc.add(new Field("album", entry.getAlbum(), TextField.TYPE_NOT_STORED));
        doc.add(new Field("title", entry.getTitle(), TextField.TYPE_NOT_STORED));
        doc.add(new Field("genre", entry.getGenre(), TextField.TYPE_NOT_STORED));
        doc.add(new Field("year", entry.getYear(), TextField.TYPE_NOT_STORED));
        for (int i = 0; i < entry.getTrackCount(); i++) {
            doc.add(new Field("track", entry.getTrack(i), TextField.TYPE_STORED));
        }
        documents.add(doc);
        if (VERBOSE) {
            out.println(any);
        }

        if (documents.size() == BATCH_SIZE) {
            //out.println(String.format("Adding batch at count %d", count));
            writer.addDocuments(documents);
            //out.println("done");
            documents.clear();
        }

        count += 1;
        if (count >= MAX_ENTRIES) {
            // done indexing.
            break;
        }
        entry = reader.next();

        if (count % 100000 == 0) {
            out.println(String.format("Indexed %d documents", count));

            // do a quick morrissey search for fun.
            //                IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(ColDirectory.open(
            //                                new CassandraIO(8192, "collene", "cindex").start("127.0.0.1:9042"),
            //                                new CassandraIO(8192, "collene", "cmeta").start("127.0.0.1:9042"),
            //                                new CassandraIO(8192, "collene", "clock").start("127.0.0.1:9042")
            //                )));
            IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, false));
            QueryParser parser = new QueryParser(Version.LUCENE_4_9, "any", analyzer);
            long searchStart = System.currentTimeMillis();
            Query query = parser.parse("morrissey");
            TopDocs docs = searcher.search(query, 10);
            long searchEnd = System.currentTimeMillis();
            out.println(String.format("%s %d total hits in %d", directory.getClass().getSimpleName(),
                    docs.totalHits, searchEnd - searchStart));
            for (ScoreDoc d : docs.scoreDocs) {
                out.println(String.format("%d %.2f %d", d.doc, d.score, d.shardIndex));
            }
        }
    }

    if (documents.size() > 0) {
        out.println(String.format("Adding batch at count %d", count));
        writer.addDocuments(documents);
        out.println("done");
        documents.clear();

        // do a quick morrissey search for fun.
        IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, false));
        QueryParser parser = new QueryParser(Version.LUCENE_4_9, "any", analyzer);
        long searchStart = System.currentTimeMillis();
        Query query = parser.parse("morrissey");
        TopDocs docs = searcher.search(query, 10);
        long searchEnd = System.currentTimeMillis();
        out.println(String.format("%s %d total hits in %d", directory.getClass().getSimpleName(),
                docs.totalHits, searchEnd - searchStart));
        for (ScoreDoc d : docs.scoreDocs) {
            out.println(String.format("%d %.2f %d", d.doc, d.score, d.shardIndex));
        }
    }

    long indexTime = System.currentTimeMillis() - indexStart;
    out.println(String.format("Indexed %d things in %d ms (%s)", count, indexTime, directory.toString()));

    //        long startMerge = System.currentTimeMillis();
    //        writer.forceMerge(1, true);
    //        long endMerge = System.currentTimeMillis();
    //        out.println(String.format("merge took %d ms", endMerge-startMerge));
    out.println("I think these are the files:");
    for (String s : directory.listAll()) {
        out.println(s);
    }

    writer.close(true);
    directory.close();
}

From source file:collene.TestIndexing.java

License:Apache License

@Test
public void test() throws IOException, ParseException {
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);

    // write it out.
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    IndexWriter writer = new IndexWriter(directory, config);

    for (int i = 0; i < 100; i++) {
        Collection<Document> documents = new ArrayList<Document>();
        Document doc = new Document();
        doc.add(new Field("key", "aaa_" + i, TextField.TYPE_STORED));
        doc.add(new Field("not", "notaaa", TextField.TYPE_NOT_STORED));
        doc.add(new Field("meta", "aaa_meta_aaa_" + i, TextField.TYPE_STORED));
        documents.add(doc);//from ww  w.java2s. co m

        writer.addDocuments(documents);

        writer.commit();
        writer.forceMerge(1);
        writer.forceMergeDeletes(true);
    }

    // now read it back.
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, false));
    QueryParser parser = new QueryParser(Version.LUCENE_4_9, "key", analyzer);

    Query query = parser.parse("aaa_4");
    TopDocs docs = searcher.search(query, 1);
    int idToDelete = docs.scoreDocs[0].doc;
    Assert.assertTrue(docs.totalHits > 0);

    query = parser.parse("fersoius");
    docs = searcher.search(query, 1);
    Assert.assertFalse(docs.totalHits > 0);

    // delete that document.
    DirectoryReader reader = DirectoryReader.open(writer, true);
    writer.tryDeleteDocument(reader, idToDelete);

    reader.close();
    writer.close();

    // list files
    Set<String> files = new HashSet<String>();
    System.out.println("Listing files for " + directory.toString());
    for (String file : directory.listAll()) {
        files.add(file);
        System.out.println(" " + file);
    }

    if (strictFileChecking) {
        System.out.println("String file checking...");
        Sets.SetView<String> difference = Sets.difference(expectedFiles, files);
        Assert.assertEquals(Joiner.on(",").join(difference), 0, difference.size());
    }

    reader = DirectoryReader.open(directory);
    searcher = new IndexSearcher(reader);
    query = parser.parse("aaa_4");
    docs = searcher.search(query, 1);
    reader.close();
    Assert.assertFalse(docs.totalHits > 0);

    directory.close();
}

From source file:collene.TestLuceneAssumptions.java

License:Apache License

@Test
public void testCanSeeUpdatesAfterAdd() throws Exception {
    // this verifies that any reader can see updates after documents are added.
    File fdir = TestUtil.getRandomTempDir();
    pleaseDelete.add(fdir);// ww w . j a  v a2  s  . co  m

    Directory dir = FSDirectory.open(fdir);
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    IndexWriter writer = new IndexWriter(dir, config);

    Document doc0 = new Document();
    Document doc1 = new Document();
    doc0.add(new Field("f0", "aaa", TextField.TYPE_STORED));
    doc1.add(new Field("f0", "bbb", TextField.TYPE_STORED));
    List<Document> docs = Lists.newArrayList(doc0, doc1);
    writer.addDocuments(docs, analyzer);

    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, false));
    QueryParser parser = new QueryParser(Version.LUCENE_4_9, "f0", new StandardAnalyzer(Version.LUCENE_4_9));

    Query query = parser.parse("bbb");
    TopDocs topDocs = searcher.search(query, 10);

    Assert.assertEquals(1, topDocs.totalHits);
    Assert.assertEquals(1, topDocs.scoreDocs.length);

    writer.close();
    dir.close();
}