Example usage for org.apache.lucene.index IndexReader document

List of usage examples for org.apache.lucene.index IndexReader document

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader document.

Prototype




public final Document document(int docID) throws IOException 

Source Link

Document

Returns the stored fields of the nth Document in this index.

Usage

From source file:org.openrdf.sail.lucene.LuceneIndex.java

License:BSD License

/**
 * Returns a Document representing the specified document ID (combination of
 * resource and context), or null when no such Document exists yet.
 *//*ww w .  ja  v  a  2  s.c  om*/
private Document getDocument(Term idTerm) throws IOException {
    IndexReader reader = getIndexReader();
    TermDocs termDocs = reader.termDocs(idTerm);

    try {
        if (termDocs.next()) {
            // return the Document and make sure there are no others
            int docNr = termDocs.doc();
            if (termDocs.next()) {
                throw new RuntimeException("Multiple Documents for resource " + idTerm.text());
            }

            return reader.document(docNr);
        } else {
            // no such Document
            return null;
        }
    } finally {
        termDocs.close();
    }
}

From source file:org.openrdf.sail.lucene.LuceneIndex.java

License:BSD License

/**
 * Returns a list of Documents representing the specified Resource (empty
 * when no such Document exists yet). Each document represent a set of
 * statements with the specified Resource as a subject, which are stored in a
 * specific context/*from www  .  j  av  a2 s.c om*/
 */
private List<Document> getDocuments(Term uriTerm) throws IOException {

    List<Document> result = new LinkedList<Document>();

    IndexReader reader = getIndexReader();
    TermDocs termDocs = reader.termDocs(uriTerm);

    try {
        while (termDocs.next()) {
            int docNr = termDocs.doc();
            result.add(reader.document(docNr));
        }
    } finally {
        termDocs.close();
    }

    return result;
}

From source file:org.openrdf.sail.lucene.LuceneIndex.java

License:BSD License

private void logIndexStats() {
    try {//www.j ava  2 s. c o  m
        IndexReader reader = null;
        try {
            reader = getIndexReader();

            Document doc;
            int totalFields = 0;

            Set<String> ids = new HashSet<String>();
            String[] idArray;
            int count = 0;
            for (int i = 0; i < reader.maxDoc(); i++) {
                if (reader.isDeleted(i))
                    continue;
                doc = reader.document(i);
                totalFields += doc.getFields().size();
                count++;
                idArray = doc.getValues("id");
                for (String id : idArray)
                    ids.add(id);

            }

            logger.info("Total documents in the index: " + reader.numDocs()
                    + ", number of deletable documents in the index: " + reader.numDeletedDocs()
                    + ", valid documents: " + count + ", total fields in all documents: " + totalFields
                    + ", average number of fields per document: " + ((double) totalFields) / reader.numDocs());
            logger.info("Distinct ids in the index: " + ids.size());

        } finally {
            if (currentMonitor != null) {
                currentMonitor.closeWhenPossible();
                currentMonitor = null;
            }
        }
    } catch (IOException e) {
        logger.warn(e.getMessage(), e);
    }

}

From source file:org.openrdf.sail.lucene.LuceneIndexTest.java

License:BSD License

public void testAddStatement() throws IOException, ParseException {
    // add a statement to an index
    index.addStatement(statement11);/* www  . ja  va2s .co  m*/

    // check that it arrived properly
    IndexReader reader = IndexReader.open(directory);
    assertEquals(1, reader.numDocs());

    Term term = new Term(LuceneIndex.URI_FIELD_NAME, subject.toString());
    TermDocs docs = reader.termDocs(term);
    assertTrue(docs.next());

    int documentNr = docs.doc();
    Document document = reader.document(documentNr);
    assertEquals(subject.toString(), document.get(LuceneIndex.URI_FIELD_NAME));
    assertEquals(object1.getLabel(), document.get(predicate1.toString()));

    assertFalse(docs.next());
    docs.close();
    reader.close();

    // add another statement
    index.addStatement(statement12);

    // See if everything remains consistent. We must create a new IndexReader
    // in order to be able to see the updates
    reader = IndexReader.open(directory);
    assertEquals(1, reader.numDocs()); // #docs should *not* have increased

    docs = reader.termDocs(term);
    assertTrue(docs.next());

    documentNr = docs.doc();
    document = reader.document(documentNr);
    assertEquals(subject.toString(), document.get(LuceneIndex.URI_FIELD_NAME));
    assertEquals(object1.getLabel(), document.get(predicate1.toString()));
    assertEquals(object2.getLabel(), document.get(predicate2.toString()));

    assertFalse(docs.next());
    docs.close();

    // see if we can query for these literals
    IndexSearcher searcher = new IndexSearcher(reader);
    QueryParser parser = new QueryParser(Version.LUCENE_35, LuceneIndex.TEXT_FIELD_NAME, analyzer);

    Query query = parser.parse(object1.getLabel());
    System.out.println("query=" + query);
    TotalHitCountCollector results = new TotalHitCountCollector();
    searcher.search(query, results);
    assertEquals(1, results.getTotalHits());

    query = parser.parse(object2.getLabel());
    results = new TotalHitCountCollector();
    searcher.search(query, results);
    assertEquals(1, results.getTotalHits());

    searcher.close();
    reader.close();

    // remove the first statement
    index.removeStatement(statement11);

    // check that that statement is actually removed and that the other still
    // exists
    reader = IndexReader.open(directory);
    assertEquals(1, reader.numDocs());

    docs = reader.termDocs(term);
    assertTrue(docs.next());

    documentNr = docs.doc();
    document = reader.document(documentNr);
    assertEquals(subject.toString(), document.get(LuceneIndex.URI_FIELD_NAME));
    assertNull(document.get(predicate1.toString()));
    assertEquals(object2.getLabel(), document.get(predicate2.toString()));

    assertFalse(docs.next());
    docs.close();

    reader.close();

    // remove the other statement
    index.removeStatement(statement12);

    // check that there are no documents left (i.e. the last Document was
    // removed completely, rather than its remaining triple removed)
    reader = IndexReader.open(directory);
    assertEquals(0, reader.numDocs());
    reader.close();
}

From source file:org.openrdf.sail.lucene3.LuceneIndex.java

License:BSD License

/**
 * Returns a Document representing the specified document ID (combination of
 * resource and context), or null when no such Document exists yet.
 *//* w ww . j  a  va 2  s. c o  m*/
private Document getDocument(Term idTerm) throws IOException {
    IndexReader reader = getIndexReader();
    TermDocs termDocs = reader.termDocs(idTerm);

    try {
        if (termDocs.next()) {
            // return the Document and make sure there are no others
            int docNr = termDocs.doc();
            if (termDocs.next()) {
                throw new IOException("Multiple Documents for resource " + idTerm.text());
            }

            return reader.document(docNr);
        } else {
            // no such Document
            return null;
        }
    } finally {
        termDocs.close();
    }
}

From source file:org.openrdf.sail.lucene3.LuceneIndex.java

License:BSD License

/**
 * Returns a list of Documents representing the specified Resource (empty
 * when no such Document exists yet). Each document represent a set of
 * statements with the specified Resource as a subject, which are stored in a
 * specific context//from  w w w.  j av  a  2 s . c o m
 */
private List<Document> getDocuments(Term uriTerm) throws IOException {
    List<Document> result = new ArrayList<Document>();

    IndexReader reader = getIndexReader();
    TermDocs termDocs = reader.termDocs(uriTerm);

    try {
        while (termDocs.next()) {
            int docNr = termDocs.doc();
            result.add(reader.document(docNr));
        }
    } finally {
        termDocs.close();
    }

    return result;
}

From source file:org.openrdf.sail.lucene3.LuceneIndex.java

License:BSD License

private static Document readDocument(IndexReader reader, int docId, Set<String> fieldsToLoad)
        throws IOException {
    return (fieldsToLoad == null) ? reader.document(docId)
            : reader.document(docId, new DocumentFieldSelector(fieldsToLoad));
}

From source file:org.openrdf.sail.lucene3.LuceneIndexTest.java

License:BSD License

@Test
public void testAddStatement() throws IOException, ParseException {
    // add a statement to an index
    index.begin();/*from w w  w . j  a v  a 2s .  co  m*/
    index.addStatement(statement11);
    index.commit();

    // check that it arrived properly
    IndexReader reader = IndexReader.open(directory);
    assertEquals(1, reader.numDocs());

    Term term = new Term(SearchFields.URI_FIELD_NAME, subject.toString());
    TermDocs docs = reader.termDocs(term);
    assertTrue(docs.next());

    int documentNr = docs.doc();
    Document document = reader.document(documentNr);
    assertEquals(subject.toString(), document.get(SearchFields.URI_FIELD_NAME));
    assertEquals(object1.getLabel(), document.get(predicate1.toString()));

    assertFalse(docs.next());
    docs.close();
    reader.close();

    // add another statement
    index.begin();
    index.addStatement(statement12);
    index.commit();

    // See if everything remains consistent. We must create a new IndexReader
    // in order to be able to see the updates
    reader = IndexReader.open(directory);
    assertEquals(1, reader.numDocs()); // #docs should *not* have increased

    docs = reader.termDocs(term);
    assertTrue(docs.next());

    documentNr = docs.doc();
    document = reader.document(documentNr);
    assertEquals(subject.toString(), document.get(SearchFields.URI_FIELD_NAME));
    assertEquals(object1.getLabel(), document.get(predicate1.toString()));
    assertEquals(object2.getLabel(), document.get(predicate2.toString()));

    assertFalse(docs.next());
    docs.close();

    // see if we can query for these literals
    IndexSearcher searcher = new IndexSearcher(reader);
    QueryParser parser = new QueryParser(Version.LUCENE_35, SearchFields.TEXT_FIELD_NAME, analyzer);

    Query query = parser.parse(object1.getLabel());
    System.out.println("query=" + query);
    TotalHitCountCollector results = new TotalHitCountCollector();
    searcher.search(query, results);
    assertEquals(1, results.getTotalHits());

    query = parser.parse(object2.getLabel());
    results = new TotalHitCountCollector();
    searcher.search(query, results);
    assertEquals(1, results.getTotalHits());

    searcher.close();
    reader.close();

    // remove the first statement
    index.begin();
    index.removeStatement(statement11);
    index.commit();

    // check that that statement is actually removed and that the other still
    // exists
    reader = IndexReader.open(directory);
    assertEquals(1, reader.numDocs());

    docs = reader.termDocs(term);
    assertTrue(docs.next());

    documentNr = docs.doc();
    document = reader.document(documentNr);
    assertEquals(subject.toString(), document.get(SearchFields.URI_FIELD_NAME));
    assertNull(document.get(predicate1.toString()));
    assertEquals(object2.getLabel(), document.get(predicate2.toString()));

    assertFalse(docs.next());
    docs.close();

    reader.close();

    // remove the other statement
    index.begin();
    index.removeStatement(statement12);
    index.commit();

    // check that there are no documents left (i.e. the last Document was
    // removed completely, rather than its remaining triple removed)
    reader = IndexReader.open(directory);
    assertEquals(0, reader.numDocs());
    reader.close();
}

From source file:org.opentravel.schemacompiler.index.QueryTask.java

License:Apache License

protected void displayIndex() {
    IndexSearcher searcher = null;//w ww  .  ja v  a2 s  .  c o  m
    try {
        searcher = searchManager.acquire();
        IndexReader reader = searcher.getIndexReader();

        for (int i = 0; i < reader.maxDoc(); i++) {
            Document doc = reader.document(i);

            System.out.println("DOCUMENT: " + doc.get(IDENTITY_FIELD));
            System.out.println("  " + BASE_NAMESPACE_FIELD + " : " + doc.get(BASE_NAMESPACE_FIELD));
            System.out.println("  " + FILENAME_FIELD + " : " + doc.get(FILENAME_FIELD));
            System.out.println("  " + STATUS_FIELD + " : " + doc.get(STATUS_FIELD));
            System.out.println("  " + VERSION_FIELD + " : " + doc.get(VERSION_FIELD));
            System.out.println("  " + VERSION_TYPE_FIELD + " : " + doc.get(VERSION_TYPE_FIELD));
        }
    } catch (Throwable t) {
        t.printStackTrace(System.out);

    } finally {
        try {
            if (searcher != null)
                searchManager.release(searcher);

        } catch (IOException e) {
            // Ignore error and continue
        }
    }
}

From source file:org.roosster.store.EntryStore.java

License:Open Source License

/**
 *
 *//*from  w w  w . jav a2 s.co  m*/
private Entry[] getEntries(URL url, IndexReader reader) throws IOException {
    if (!isInitialized())
        throw new IllegalStateException("Database must be initialized before use!");

    if (url == null)
        throw new IllegalArgumentException("Parameter 'url' is not allowed to be null");

    boolean closeReader = false;
    TermDocs docs = null;
    try {
        LOG.debug("Getting Entry with URL: " + url);

        if (reader == null) {
            reader = getReader();
            closeReader = true;
        }

        Term term = new Term(URLHASH, computeHash(url));
        docs = reader.termDocs(term);

        List entries = new ArrayList();
        while (docs.next()) {
            entries.add(new Entry(reader.document(docs.doc()), 0));
        }

        LOG.debug("Found " + entries.size() + " entries for URL " + url);

        return (Entry[]) entries.toArray(new Entry[0]);

    } finally {

        if (closeReader && reader != null)
            reader.close();

        if (docs != null)
            docs.close();
    }
}