Example usage for org.apache.lucene.index DirectoryReader open

Introduction

In this page you can find the example usage for org.apache.lucene.index DirectoryReader open.

Prototype

public static DirectoryReader open(final IndexCommit commit, Map<String, String> readerAttributes)
        throws IOException

Source Link

Document

Expert: returns an IndexReader reading the index in the given IndexCommit .

Usage

From source file:at.ac.univie.mminf.luceneSKOS.test.SKOSURIFilterTest.java

License:Apache License

@Test
public void singleUriExpansionWithUnstoredField() throws IOException {
    Document doc = new Document();
    doc.add(new Field("subject", "http://example.com/concept/1", TextField.TYPE_NOT_STORED));
    writer.addDocument(doc);//  w w  w .  j  a v  a 2  s .c o m
    searcher = new IndexSearcher(DirectoryReader.open(writer, false));
    Query query = new TermQuery(new Term("subject", "jumps"));
    TopDocs results = searcher.search(query, 10);
    assertEquals(1, results.totalHits);

    Document indexDoc = searcher.doc(results.scoreDocs[0].doc);
    String[] fieldValues = indexDoc.getValues("subject");
    assertEquals(0, fieldValues.length);
}

From source file:at.ac.univie.mminf.luceneSKOS.test.SKOSURIFilterTest.java

License:Apache License

@Test
public void multipleURIExpansion() throws IOException {
    Document doc = new Document();
    doc.add(new Field("subject", "http://example.com/concept/1", TextField.TYPE_STORED));
    doc.add(new Field("subject", "http://example.com/concept/2", TextField.TYPE_STORED));
    writer.addDocument(doc);//from  w w w  .ja  v a 2s . com
    searcher = new IndexSearcher(DirectoryReader.open(writer, false));

    // querying for alternative term of concept 1
    Query query = new TermQuery(new Term("subject", "hops"));
    TopDocs results = searcher.search(query, 10);
    assertEquals(1, results.totalHits);
    Document indexDoc = searcher.doc(results.scoreDocs[0].doc);
    String[] fieldValues = indexDoc.getValues("subject");
    assertEquals(2, fieldValues.length);

    // querying for alternative term of concept 2
    query = new TermQuery(new Term("subject", "speedy"));
    results = searcher.search(query, 10);
    assertEquals(1, results.totalHits);

    indexDoc = searcher.doc(results.scoreDocs[0].doc);
    fieldValues = indexDoc.getValues("subject");
    assertEquals(2, fieldValues.length);
}

From source file:at.ac.univie.mminf.luceneSKOS.test.termexpansion.AbstractTermExpansionTest.java

License:Apache License

/**
 * This test indexes a sample metadata record (=lucene document) having a
 * "title", "description", and "subject" field, which contains plain subject
 * terms.//from  w w  w . j a va 2s . c  o  m
 * <p/>
 * A search for "arms" doesn't return that record because the term "arms" is
 * not explicitly contained in the record (document).
 *
 * @throws IOException
 * @throws LockObtainFailedException
 * @throws CorruptIndexException
 */
@Test
public void noExpansion() throws IOException {

    /* defining the document to be indexed */
    Document doc = new Document();
    doc.add(new Field("title", "Spearhead", TextField.TYPE_STORED));
    doc.add(new Field("description",
            "Roman iron spearhead. The spearhead was attached to one end of a wooden shaft..."
                    + "The spear was mainly a thrusting weapon, but could also be thrown. "
                    + "It was the principal weapon of the auxiliary soldier... "
                    + "(second - fourth century, Arbeia Roman Fort).",
            TextField.TYPE_NOT_STORED));
    doc.add(new Field("subject", "weapons", TextField.TYPE_NOT_STORED));

    /* setting up a writer with a default (simple) analyzer */
    writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(new SimpleAnalyzer()));

    /* adding the document to the index */
    writer.addDocument(doc);

    /* defining a query that searches over all fields */
    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    builder.add(new TermQuery(new Term("title", "arms")), BooleanClause.Occur.SHOULD)
            .add(new TermQuery(new Term("description", "arms")), BooleanClause.Occur.SHOULD)
            .add(new TermQuery(new Term("subject", "arms")), BooleanClause.Occur.SHOULD);

    /* creating a new searcher */
    searcher = new IndexSearcher(DirectoryReader.open(writer, false));

    TopDocs results = searcher.search(builder.build(), 10);

    /* no results are returned since there is no term match */
    assertEquals(0, results.totalHits);
}

From source file:at.ac.univie.mminf.luceneSKOS.test.termexpansion.LabelbasedTermExpansionTest.java

License:Apache License

/**
 * This test indexes a sample metadata record (=lucene document) having a
 * "title", "description", and "subject" field.
 * <p/>/*from   w  w w.  j a va2  s  . com*/
 * A search for "arms" returns that record as a result because "arms" is
 * defined as an alternative label for "weapons", the term which is
 * contained in the subject field.
 *
 * @throws IOException
 */
@Test
public void labelBasedTermExpansion() throws IOException {

    /* defining the document to be indexed */
    Document doc = new Document();
    doc.add(new Field("title", "Spearhead", TextField.TYPE_STORED));
    doc.add(new Field("description",
            "Roman iron spearhead. The spearhead was attached to one end of a wooden shaft..."
                    + "The spear was mainly a thrusting weapon, but could also be thrown. "
                    + "It was the principal weapon of the auxiliary soldier... "
                    + "(second - fourth century, Arbeia Roman Fort).",
            TextField.TYPE_NOT_STORED));
    doc.add(new Field("subject", "weapons", TextField.TYPE_NOT_STORED));

    /* setting up the SKOS analyzer */
    String skosFile = "src/test/resources/skos_samples/ukat_examples.n3";
    String indexPath = "build/";

    /* ExpansionType.URI->the field to be analyzed (expanded) contains URIs */
    Analyzer skosAnalyzer = new SKOSAnalyzer(indexPath, skosFile, ExpansionType.LABEL);

    /* Define different analyzers for different fields */
    Map<String, Analyzer> analyzerPerField = new HashMap<>();
    analyzerPerField.put("subject", skosAnalyzer);
    PerFieldAnalyzerWrapper indexAnalyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(), analyzerPerField);

    /* setting up a writer with a default (simple) analyzer */
    writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(indexAnalyzer));

    /* adding the document to the index */
    writer.addDocument(doc);

    /* defining a query that searches over all fields */
    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    builder.add(new TermQuery(new Term("title", "arms")), BooleanClause.Occur.SHOULD)
            .add(new TermQuery(new Term("description", "arms")), BooleanClause.Occur.SHOULD)
            .add(new TermQuery(new Term("subject", "arms")), BooleanClause.Occur.SHOULD);

    /* creating a new searcher */
    searcher = new IndexSearcher(DirectoryReader.open(writer, false));

    TopDocs results = searcher.search(builder.build(), 10);

    /* the document matches because "arms" is among the expanded terms */
    assertEquals(1, results.totalHits);

    /* defining a query that searches for a broader concept */
    Query query = new TermQuery(new Term("subject", "military equipment"));

    results = searcher.search(query, 10);

    /* ... also returns the document as result */
    assertEquals(1, results.totalHits);
}

From source file:at.ac.univie.mminf.luceneSKOS.test.termexpansion.URIbasedTermExpansionTest.java

License:Apache License

/**
 * This test indexes a sample metadata record (=lucene document) having a
 * "title", "description", and "subject" field, which is semantically
 * enriched by a URI pointing to a SKOS concept "weapons".
 * <p/>//from www. j  a  v a  2s.  com
 * A search for "arms" returns that record as a result because "arms" is
 * defined as an alternative label (altLabel) for the concept "weapons".
 *
 * @throws IOException
 */
@Test
public void uriBasedTermExpansion() throws IOException {

    /* defining the document to be indexed */
    Document doc = new Document();
    doc.add(new Field("title", "Spearhead", TextField.TYPE_STORED));
    doc.add(new Field("description",
            "Roman iron spearhead. The spearhead was attached to one end of a wooden shaft..."
                    + "The spear was mainly a thrusting weapon, but could also be thrown. "
                    + "It was the principal weapon of the auxiliary soldier... "
                    + "(second - fourth century, Arbeia Roman Fort).",
            TextField.TYPE_NOT_STORED));
    doc.add(new Field("subject", "http://www.ukat.org.uk/thesaurus/concept/859", TextField.TYPE_NOT_STORED));

    /* setting up the SKOS analyzer */
    String skosFile = "src/test/resources/skos_samples/ukat_examples.n3";
    String indexPath = "build/";

    /* ExpansionType.URI->the field to be analyzed (expanded) contains URIs */
    Analyzer skosAnalyzer = new SKOSAnalyzer(indexPath, skosFile, ExpansionType.URI);

    /* Define different analyzers for different fields */
    Map<String, Analyzer> analyzerPerField = new HashMap<>();
    analyzerPerField.put("subject", skosAnalyzer);
    PerFieldAnalyzerWrapper indexAnalyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(), analyzerPerField);

    /* setting up a writer with a default (simple) analyzer */
    writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(indexAnalyzer));

    /* adding the document to the index */
    writer.addDocument(doc);

    /* defining a query that searches over all fields */
    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    builder.add(new TermQuery(new Term("title", "arms")), BooleanClause.Occur.SHOULD)
            .add(new TermQuery(new Term("description", "arms")), BooleanClause.Occur.SHOULD)
            .add(new TermQuery(new Term("subject", "arms")), BooleanClause.Occur.SHOULD);

    /* creating a new searcher */
    searcher = new IndexSearcher(DirectoryReader.open(writer, false));

    TopDocs results = searcher.search(builder.build(), 10);

    /* the document matches because "arms" is among the expanded terms */
    assertEquals(1, results.totalHits);

    /* defining a query that searches for a broader concept */
    Query query = new TermQuery(new Term("subject", "military equipment"));

    results = searcher.search(query, 10);

    /* ... also returns the document as result */
    assertEquals(1, results.totalHits);

}

From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.newsFetch.storm.bolts.LuceneIndexBolt.java

License:Apache License

@Override
public void execute(Tuple input) {
    try {/*from w ww.  jav  a  2s .com*/
        logger.info("New item to add to lucene index");

        // input newsitem
        NewsItem item = (NewsItem) input.getValueByField(StreamIDs.NEWSARTICLEWITHCONTENT);
        termExtract.setAnalyzer(LanguageAnalyzerHelper.getInstance().getAnalyzer(item.getLocale()));
        try (DirectoryReader reader = DirectoryReader.open(writer, true)) {
            termExtract.addTopTerms(item, reader);
        }

        // Convert to lucene document and add to index
        Document doc = NewsItemLuceneDocConverter.newsItemToDocument(item);
        writer.addDocument(doc);
        writer.commit();

        logger.info("emitting " + item.getTerms().size() + " terms");
        for (String term : item.getTerms().keySet()) {
            collector.emit(StreamIDs.TERMSTREAM, new Values(term));
        }

        logger.info("New item in Lucene index");

    } catch (IOException ex) {
        logger.error(ex);
    }
    collector.ack(input);

}

From source file:cn.hbu.cs.esearch.store.LuceneStore.java

License:Apache License

private void updateReader() throws IOException {

    IndexReader oldReader = null;//from  w w w . j a v a  2 s .c om

    if (currentReaderData != null) {
        oldReader = currentReaderData.reader;
    }

    IndexReader idxReader = DirectoryReader.open(indexWriter, true);

    // if reader did not change, no updates were applied, not need to refresh
    if (idxReader == oldReader) {
        return;
    }

    ReaderData readerData = new ReaderData(idxReader);
    currentReaderData = readerData;
    if (oldReaderData != null) {
        ReaderData tmpOld = oldReaderData;
        oldReaderData = currentReaderData;
        tmpOld.close();
    }
    currentReaderData = readerData;
}

From source file:collene.Freedb.java

License:Apache License

public static void BuildIndex(Directory directory) throws Exception {
    String freedbPath = "/Users/gdusbabek/Downloads/freedb-complete-20140701.tar.bz2";

    if (directory == null) {
        System.out.println("Need to specify: { memory | file | cassandra }. Did you misspell something?");
        System.exit(-1);//  w  w w  .  j  a  v a2  s.  c o  m
    }

    FreeDbReader reader = new FreeDbReader(new File(freedbPath), 50000);
    reader.start();

    long indexStart = System.currentTimeMillis();
    Collection<Document> documents = new ArrayList<Document>(BATCH_SIZE);
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(directory, config);

    // stop after this many documents.
    final int maxDocuments = 400000; //Integer.MAX_VALUE;

    FreeDbEntry entry = reader.next();
    int count = 0;
    while (entry != null && count < maxDocuments) {
        Document doc = new Document();
        String any = entry.toString();
        doc.add(new Field("any", any, TextField.TYPE_STORED));
        doc.add(new Field("artist", entry.getArtist(), TextField.TYPE_NOT_STORED));
        doc.add(new Field("album", entry.getAlbum(), TextField.TYPE_NOT_STORED));
        doc.add(new Field("title", entry.getTitle(), TextField.TYPE_NOT_STORED));
        doc.add(new Field("genre", entry.getGenre(), TextField.TYPE_NOT_STORED));
        doc.add(new Field("year", entry.getYear(), TextField.TYPE_NOT_STORED));
        for (int i = 0; i < entry.getTrackCount(); i++) {
            doc.add(new Field("track", entry.getTrack(i), TextField.TYPE_STORED));
        }
        documents.add(doc);
        if (VERBOSE) {
            out.println(any);
        }

        if (documents.size() == BATCH_SIZE) {
            //out.println(String.format("Adding batch at count %d", count));
            writer.addDocuments(documents);
            //out.println("done");
            documents.clear();
        }

        count += 1;
        if (count >= MAX_ENTRIES) {
            // done indexing.
            break;
        }
        entry = reader.next();

        if (count % 100000 == 0) {
            out.println(String.format("Indexed %d documents", count));

            // do a quick morrissey search for fun.
            //                IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(ColDirectory.open(
            //                                new CassandraIO(8192, "collene", "cindex").start("127.0.0.1:9042"),
            //                                new CassandraIO(8192, "collene", "cmeta").start("127.0.0.1:9042"),
            //                                new CassandraIO(8192, "collene", "clock").start("127.0.0.1:9042")
            //                )));
            IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, false));
            QueryParser parser = new QueryParser(Version.LUCENE_4_9, "any", analyzer);
            long searchStart = System.currentTimeMillis();
            Query query = parser.parse("morrissey");
            TopDocs docs = searcher.search(query, 10);
            long searchEnd = System.currentTimeMillis();
            out.println(String.format("%s %d total hits in %d", directory.getClass().getSimpleName(),
                    docs.totalHits, searchEnd - searchStart));
            for (ScoreDoc d : docs.scoreDocs) {
                out.println(String.format("%d %.2f %d", d.doc, d.score, d.shardIndex));
            }
        }
    }

    if (documents.size() > 0) {
        out.println(String.format("Adding batch at count %d", count));
        writer.addDocuments(documents);
        out.println("done");
        documents.clear();

        // do a quick morrissey search for fun.
        IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, false));
        QueryParser parser = new QueryParser(Version.LUCENE_4_9, "any", analyzer);
        long searchStart = System.currentTimeMillis();
        Query query = parser.parse("morrissey");
        TopDocs docs = searcher.search(query, 10);
        long searchEnd = System.currentTimeMillis();
        out.println(String.format("%s %d total hits in %d", directory.getClass().getSimpleName(),
                docs.totalHits, searchEnd - searchStart));
        for (ScoreDoc d : docs.scoreDocs) {
            out.println(String.format("%d %.2f %d", d.doc, d.score, d.shardIndex));
        }
    }

    long indexTime = System.currentTimeMillis() - indexStart;
    out.println(String.format("Indexed %d things in %d ms (%s)", count, indexTime, directory.toString()));

    //        long startMerge = System.currentTimeMillis();
    //        writer.forceMerge(1, true);
    //        long endMerge = System.currentTimeMillis();
    //        out.println(String.format("merge took %d ms", endMerge-startMerge));
    out.println("I think these are the files:");
    for (String s : directory.listAll()) {
        out.println(s);
    }

    writer.close(true);
    directory.close();
}

From source file:collene.TestIndexing.java

License:Apache License

@Test
public void test() throws IOException, ParseException {
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);

    // write it out.
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    IndexWriter writer = new IndexWriter(directory, config);

    for (int i = 0; i < 100; i++) {
        Collection<Document> documents = new ArrayList<Document>();
        Document doc = new Document();
        doc.add(new Field("key", "aaa_" + i, TextField.TYPE_STORED));
        doc.add(new Field("not", "notaaa", TextField.TYPE_NOT_STORED));
        doc.add(new Field("meta", "aaa_meta_aaa_" + i, TextField.TYPE_STORED));
        documents.add(doc);//from ww  w.java2s. co m

        writer.addDocuments(documents);

        writer.commit();
        writer.forceMerge(1);
        writer.forceMergeDeletes(true);
    }

    // now read it back.
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, false));
    QueryParser parser = new QueryParser(Version.LUCENE_4_9, "key", analyzer);

    Query query = parser.parse("aaa_4");
    TopDocs docs = searcher.search(query, 1);
    int idToDelete = docs.scoreDocs[0].doc;
    Assert.assertTrue(docs.totalHits > 0);

    query = parser.parse("fersoius");
    docs = searcher.search(query, 1);
    Assert.assertFalse(docs.totalHits > 0);

    // delete that document.
    DirectoryReader reader = DirectoryReader.open(writer, true);
    writer.tryDeleteDocument(reader, idToDelete);

    reader.close();
    writer.close();

    // list files
    Set<String> files = new HashSet<String>();
    System.out.println("Listing files for " + directory.toString());
    for (String file : directory.listAll()) {
        files.add(file);
        System.out.println(" " + file);
    }

    if (strictFileChecking) {
        System.out.println("String file checking...");
        Sets.SetView<String> difference = Sets.difference(expectedFiles, files);
        Assert.assertEquals(Joiner.on(",").join(difference), 0, difference.size());
    }

    reader = DirectoryReader.open(directory);
    searcher = new IndexSearcher(reader);
    query = parser.parse("aaa_4");
    docs = searcher.search(query, 1);
    reader.close();
    Assert.assertFalse(docs.totalHits > 0);

    directory.close();
}

From source file:collene.TestLuceneAssumptions.java

License:Apache License

@Test
public void testCanSeeUpdatesAfterAdd() throws Exception {
    // this verifies that any reader can see updates after documents are added.
    File fdir = TestUtil.getRandomTempDir();
    pleaseDelete.add(fdir);// ww w . j a  v a2  s  . co  m

    Directory dir = FSDirectory.open(fdir);
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    IndexWriter writer = new IndexWriter(dir, config);

    Document doc0 = new Document();
    Document doc1 = new Document();
    doc0.add(new Field("f0", "aaa", TextField.TYPE_STORED));
    doc1.add(new Field("f0", "bbb", TextField.TYPE_STORED));
    List<Document> docs = Lists.newArrayList(doc0, doc1);
    writer.addDocuments(docs, analyzer);

    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, false));
    QueryParser parser = new QueryParser(Version.LUCENE_4_9, "f0", new StandardAnalyzer(Version.LUCENE_4_9));

    Query query = parser.parse("bbb");
    TopDocs topDocs = searcher.search(query, 10);

    Assert.assertEquals(1, topDocs.totalHits);
    Assert.assertEquals(1, topDocs.scoreDocs.length);

    writer.close();
    dir.close();
}