List of usage examples for org.apache.lucene.index IndexReader document
public final Document document(int docID) throws IOException
n
th Document
in this index. From source file:org.openrdf.sail.lucene.LuceneIndex.java
License:BSD License
/** * Returns a Document representing the specified document ID (combination of * resource and context), or null when no such Document exists yet. *//*ww w . ja v a 2 s.c om*/ private Document getDocument(Term idTerm) throws IOException { IndexReader reader = getIndexReader(); TermDocs termDocs = reader.termDocs(idTerm); try { if (termDocs.next()) { // return the Document and make sure there are no others int docNr = termDocs.doc(); if (termDocs.next()) { throw new RuntimeException("Multiple Documents for resource " + idTerm.text()); } return reader.document(docNr); } else { // no such Document return null; } } finally { termDocs.close(); } }
From source file:org.openrdf.sail.lucene.LuceneIndex.java
License:BSD License
/** * Returns a list of Documents representing the specified Resource (empty * when no such Document exists yet). Each document represent a set of * statements with the specified Resource as a subject, which are stored in a * specific context/*from www . j av a2 s.c om*/ */ private List<Document> getDocuments(Term uriTerm) throws IOException { List<Document> result = new LinkedList<Document>(); IndexReader reader = getIndexReader(); TermDocs termDocs = reader.termDocs(uriTerm); try { while (termDocs.next()) { int docNr = termDocs.doc(); result.add(reader.document(docNr)); } } finally { termDocs.close(); } return result; }
From source file:org.openrdf.sail.lucene.LuceneIndex.java
License:BSD License
private void logIndexStats() { try {//www.j ava 2 s. c o m IndexReader reader = null; try { reader = getIndexReader(); Document doc; int totalFields = 0; Set<String> ids = new HashSet<String>(); String[] idArray; int count = 0; for (int i = 0; i < reader.maxDoc(); i++) { if (reader.isDeleted(i)) continue; doc = reader.document(i); totalFields += doc.getFields().size(); count++; idArray = doc.getValues("id"); for (String id : idArray) ids.add(id); } logger.info("Total documents in the index: " + reader.numDocs() + ", number of deletable documents in the index: " + reader.numDeletedDocs() + ", valid documents: " + count + ", total fields in all documents: " + totalFields + ", average number of fields per document: " + ((double) totalFields) / reader.numDocs()); logger.info("Distinct ids in the index: " + ids.size()); } finally { if (currentMonitor != null) { currentMonitor.closeWhenPossible(); currentMonitor = null; } } } catch (IOException e) { logger.warn(e.getMessage(), e); } }
From source file:org.openrdf.sail.lucene.LuceneIndexTest.java
License:BSD License
public void testAddStatement() throws IOException, ParseException { // add a statement to an index index.addStatement(statement11);/* www . ja va2s .co m*/ // check that it arrived properly IndexReader reader = IndexReader.open(directory); assertEquals(1, reader.numDocs()); Term term = new Term(LuceneIndex.URI_FIELD_NAME, subject.toString()); TermDocs docs = reader.termDocs(term); assertTrue(docs.next()); int documentNr = docs.doc(); Document document = reader.document(documentNr); assertEquals(subject.toString(), document.get(LuceneIndex.URI_FIELD_NAME)); assertEquals(object1.getLabel(), document.get(predicate1.toString())); assertFalse(docs.next()); docs.close(); reader.close(); // add another statement index.addStatement(statement12); // See if everything remains consistent. We must create a new IndexReader // in order to be able to see the updates reader = IndexReader.open(directory); assertEquals(1, reader.numDocs()); // #docs should *not* have increased docs = reader.termDocs(term); assertTrue(docs.next()); documentNr = docs.doc(); document = reader.document(documentNr); assertEquals(subject.toString(), document.get(LuceneIndex.URI_FIELD_NAME)); assertEquals(object1.getLabel(), document.get(predicate1.toString())); assertEquals(object2.getLabel(), document.get(predicate2.toString())); assertFalse(docs.next()); docs.close(); // see if we can query for these literals IndexSearcher searcher = new IndexSearcher(reader); QueryParser parser = new QueryParser(Version.LUCENE_35, LuceneIndex.TEXT_FIELD_NAME, analyzer); Query query = parser.parse(object1.getLabel()); System.out.println("query=" + query); TotalHitCountCollector results = new TotalHitCountCollector(); searcher.search(query, results); assertEquals(1, results.getTotalHits()); query = parser.parse(object2.getLabel()); results = new TotalHitCountCollector(); searcher.search(query, results); assertEquals(1, results.getTotalHits()); searcher.close(); reader.close(); // remove the first statement index.removeStatement(statement11); // check that that statement is actually removed and that the other still // exists reader = IndexReader.open(directory); assertEquals(1, reader.numDocs()); docs = reader.termDocs(term); assertTrue(docs.next()); documentNr = docs.doc(); document = reader.document(documentNr); assertEquals(subject.toString(), document.get(LuceneIndex.URI_FIELD_NAME)); assertNull(document.get(predicate1.toString())); assertEquals(object2.getLabel(), document.get(predicate2.toString())); assertFalse(docs.next()); docs.close(); reader.close(); // remove the other statement index.removeStatement(statement12); // check that there are no documents left (i.e. the last Document was // removed completely, rather than its remaining triple removed) reader = IndexReader.open(directory); assertEquals(0, reader.numDocs()); reader.close(); }
From source file:org.openrdf.sail.lucene3.LuceneIndex.java
License:BSD License
/** * Returns a Document representing the specified document ID (combination of * resource and context), or null when no such Document exists yet. *//* w ww . j a va 2 s. c o m*/ private Document getDocument(Term idTerm) throws IOException { IndexReader reader = getIndexReader(); TermDocs termDocs = reader.termDocs(idTerm); try { if (termDocs.next()) { // return the Document and make sure there are no others int docNr = termDocs.doc(); if (termDocs.next()) { throw new IOException("Multiple Documents for resource " + idTerm.text()); } return reader.document(docNr); } else { // no such Document return null; } } finally { termDocs.close(); } }
From source file:org.openrdf.sail.lucene3.LuceneIndex.java
License:BSD License
/** * Returns a list of Documents representing the specified Resource (empty * when no such Document exists yet). Each document represent a set of * statements with the specified Resource as a subject, which are stored in a * specific context//from w w w. j av a 2 s . c o m */ private List<Document> getDocuments(Term uriTerm) throws IOException { List<Document> result = new ArrayList<Document>(); IndexReader reader = getIndexReader(); TermDocs termDocs = reader.termDocs(uriTerm); try { while (termDocs.next()) { int docNr = termDocs.doc(); result.add(reader.document(docNr)); } } finally { termDocs.close(); } return result; }
From source file:org.openrdf.sail.lucene3.LuceneIndex.java
License:BSD License
private static Document readDocument(IndexReader reader, int docId, Set<String> fieldsToLoad) throws IOException { return (fieldsToLoad == null) ? reader.document(docId) : reader.document(docId, new DocumentFieldSelector(fieldsToLoad)); }
From source file:org.openrdf.sail.lucene3.LuceneIndexTest.java
License:BSD License
@Test public void testAddStatement() throws IOException, ParseException { // add a statement to an index index.begin();/*from w w w . j a v a 2s . co m*/ index.addStatement(statement11); index.commit(); // check that it arrived properly IndexReader reader = IndexReader.open(directory); assertEquals(1, reader.numDocs()); Term term = new Term(SearchFields.URI_FIELD_NAME, subject.toString()); TermDocs docs = reader.termDocs(term); assertTrue(docs.next()); int documentNr = docs.doc(); Document document = reader.document(documentNr); assertEquals(subject.toString(), document.get(SearchFields.URI_FIELD_NAME)); assertEquals(object1.getLabel(), document.get(predicate1.toString())); assertFalse(docs.next()); docs.close(); reader.close(); // add another statement index.begin(); index.addStatement(statement12); index.commit(); // See if everything remains consistent. We must create a new IndexReader // in order to be able to see the updates reader = IndexReader.open(directory); assertEquals(1, reader.numDocs()); // #docs should *not* have increased docs = reader.termDocs(term); assertTrue(docs.next()); documentNr = docs.doc(); document = reader.document(documentNr); assertEquals(subject.toString(), document.get(SearchFields.URI_FIELD_NAME)); assertEquals(object1.getLabel(), document.get(predicate1.toString())); assertEquals(object2.getLabel(), document.get(predicate2.toString())); assertFalse(docs.next()); docs.close(); // see if we can query for these literals IndexSearcher searcher = new IndexSearcher(reader); QueryParser parser = new QueryParser(Version.LUCENE_35, SearchFields.TEXT_FIELD_NAME, analyzer); Query query = parser.parse(object1.getLabel()); System.out.println("query=" + query); TotalHitCountCollector results = new TotalHitCountCollector(); searcher.search(query, results); assertEquals(1, results.getTotalHits()); query = parser.parse(object2.getLabel()); results = new TotalHitCountCollector(); searcher.search(query, results); assertEquals(1, results.getTotalHits()); searcher.close(); reader.close(); // remove the first statement index.begin(); index.removeStatement(statement11); index.commit(); // check that that statement is actually removed and that the other still // exists reader = IndexReader.open(directory); assertEquals(1, reader.numDocs()); docs = reader.termDocs(term); assertTrue(docs.next()); documentNr = docs.doc(); document = reader.document(documentNr); assertEquals(subject.toString(), document.get(SearchFields.URI_FIELD_NAME)); assertNull(document.get(predicate1.toString())); assertEquals(object2.getLabel(), document.get(predicate2.toString())); assertFalse(docs.next()); docs.close(); reader.close(); // remove the other statement index.begin(); index.removeStatement(statement12); index.commit(); // check that there are no documents left (i.e. the last Document was // removed completely, rather than its remaining triple removed) reader = IndexReader.open(directory); assertEquals(0, reader.numDocs()); reader.close(); }
From source file:org.opentravel.schemacompiler.index.QueryTask.java
License:Apache License
protected void displayIndex() { IndexSearcher searcher = null;//w ww . ja v a2 s . c o m try { searcher = searchManager.acquire(); IndexReader reader = searcher.getIndexReader(); for (int i = 0; i < reader.maxDoc(); i++) { Document doc = reader.document(i); System.out.println("DOCUMENT: " + doc.get(IDENTITY_FIELD)); System.out.println(" " + BASE_NAMESPACE_FIELD + " : " + doc.get(BASE_NAMESPACE_FIELD)); System.out.println(" " + FILENAME_FIELD + " : " + doc.get(FILENAME_FIELD)); System.out.println(" " + STATUS_FIELD + " : " + doc.get(STATUS_FIELD)); System.out.println(" " + VERSION_FIELD + " : " + doc.get(VERSION_FIELD)); System.out.println(" " + VERSION_TYPE_FIELD + " : " + doc.get(VERSION_TYPE_FIELD)); } } catch (Throwable t) { t.printStackTrace(System.out); } finally { try { if (searcher != null) searchManager.release(searcher); } catch (IOException e) { // Ignore error and continue } } }
From source file:org.roosster.store.EntryStore.java
License:Open Source License
/** * *//*from w w w . jav a2 s.co m*/ private Entry[] getEntries(URL url, IndexReader reader) throws IOException { if (!isInitialized()) throw new IllegalStateException("Database must be initialized before use!"); if (url == null) throw new IllegalArgumentException("Parameter 'url' is not allowed to be null"); boolean closeReader = false; TermDocs docs = null; try { LOG.debug("Getting Entry with URL: " + url); if (reader == null) { reader = getReader(); closeReader = true; } Term term = new Term(URLHASH, computeHash(url)); docs = reader.termDocs(term); List entries = new ArrayList(); while (docs.next()) { entries.add(new Entry(reader.document(docs.doc()), 0)); } LOG.debug("Found " + entries.size() + " entries for URL " + url); return (Entry[]) entries.toArray(new Entry[0]); } finally { if (closeReader && reader != null) reader.close(); if (docs != null) docs.close(); } }