List of usage examples for org.apache.lucene.index IndexReader leaves
public final List<LeafReaderContext> leaves()
From source file:org.exist.indexing.lucene.XMLToQuery.java
License:Open Source License
private Term[] expandTerms(String field, String queryStr) throws XPathException { List<Term> termList = new ArrayList<>(8); Automaton automaton = WildcardQuery.toAutomaton(new Term(field, queryStr)); CompiledAutomaton compiled = new CompiledAutomaton(automaton); IndexReader reader = null; try {/* ww w. j a v a 2 s .c o m*/ reader = index.getReader(); for (AtomicReaderContext atomic : reader.leaves()) { Terms terms = atomic.reader().terms(field); if (terms != null) { TermsEnum termsEnum = compiled.getTermsEnum(terms); BytesRef data = termsEnum.next(); while (data != null) { String term = data.utf8ToString(); termList.add(new Term(field, term)); data = termsEnum.next(); } } } } catch (IOException e) { throw new XPathException("Lucene index error while creating query: " + e.getMessage(), e); } finally { index.releaseReader(reader); } Term[] matchingTerms = new Term[termList.size()]; return termList.toArray(matchingTerms); }
From source file:org.exist.indexing.range.RangeIndexWorker.java
License:Open Source License
private void scan(DocumentSet docs, NodeSet nodes, String start, String end, long max, TreeMap<String, Occurrences> map, IndexReader reader, String field) throws IOException { List<AtomicReaderContext> leaves = reader.leaves(); for (AtomicReaderContext context : leaves) { NumericDocValues docIdValues = context.reader().getNumericDocValues(FIELD_DOC_ID); BinaryDocValues nodeIdValues = context.reader().getBinaryDocValues(FIELD_NODE_ID); Bits liveDocs = context.reader().getLiveDocs(); Terms terms = context.reader().terms(field); if (terms == null) continue; TermsEnum termsIter = terms.iterator(null); if (termsIter.next() == null) { continue; }//from ww w.j av a 2 s . c o m do { if (map.size() >= max) { break; } BytesRef ref = termsIter.term(); String term = ref.utf8ToString(); boolean include = true; if (end != null) { if (term.compareTo(end) > 0) include = false; } else if (start != null && !term.startsWith(start)) include = false; if (include) { DocsEnum docsEnum = termsIter.docs(null, null); while (docsEnum.nextDoc() != DocsEnum.NO_MORE_DOCS) { if (liveDocs != null && !liveDocs.get(docsEnum.docID())) { continue; } int docId = (int) docIdValues.get(docsEnum.docID()); DocumentImpl storedDocument = docs.getDoc(docId); if (storedDocument == null) continue; NodeId nodeId = null; if (nodes != null) { BytesRef nodeIdRef = new BytesRef(buf); nodeIdValues.get(docsEnum.docID(), nodeIdRef); int units = ByteConversion.byteToShort(nodeIdRef.bytes, nodeIdRef.offset); nodeId = index.getBrokerPool().getNodeFactory().createFromData(units, nodeIdRef.bytes, nodeIdRef.offset + 2); } if (nodeId == null || nodes.get(storedDocument, nodeId) != null) { Occurrences oc = map.get(term); if (oc == null) { oc = new Occurrences(term); map.put(term, oc); } oc.addDocument(storedDocument); oc.addOccurrences(docsEnum.freq()); } } } } while (termsIter.next() != null); } }
From source file:org.hibernate.search.test.directoryProvider.FSDirectoryTest.java
License:LGPL
/** * Project a field as a String from a Lucene Document matching the provided term. * The method asserts that one match is found, and no more. *//* w w w . ja va 2 s . com*/ private String projectSingleField(IndexReader reader, String fieldName, Term term) throws IOException { String projection = null; for (LeafReaderContext leaf : reader.leaves()) { final LeafReader atomicReader = leaf.reader(); final DocsEnum termDocsEnum = atomicReader.termDocsEnum(term); while (termDocsEnum.nextDoc() != DocsEnum.NO_MORE_DOCS) { final int docID = termDocsEnum.docID(); org.apache.lucene.document.Document document = reader.document(docID); String value = document.get(fieldName); Assert.assertNull( "duplicate matches found! This method assumes a single document will match the Term.", projection); projection = value; } } Assert.assertNotNull(projection); return projection; }
From source file:org.meresco.lucene.search.SuperIndexSearcher.java
License:Open Source License
public SuperIndexSearcher(IndexReader reader, ExecutorService executor, int tasks) { super(reader); this.executor = executor; this.grouped_leaves = this.group_leaves(reader.leaves(), tasks); // for (List<AtomicReaderContext> l : this.grouped_leaves) { // int t = 0; // for (AtomicReaderContext ctx : l) // t += ctx.reader().numDocs(); // System.out.print(" " + t + " "); // }//from ww w . j a v a 2 s. c o m // System.out.println(); }
From source file:org.meresco.lucene.suggestion.SuggestionNGramIndex.java
License:Open Source License
public void createSuggestions(IndexReader reader, String suggestionFieldname, String keyFieldname, IndexingState indexingState) throws IOException { Bits liveDocs = MultiFields.getLiveDocs(reader); List<AtomicReaderContext> leaves = reader.leaves(); Terms terms = MultiFields.getTerms(reader, suggestionFieldname); if (terms == null) return;// w ww .j av a2s . c o m TermsEnum termsEnum = terms.iterator(null); BytesRef term; while ((term = termsEnum.next()) != null) { List<Long> keys = new ArrayList<>(); DocsEnum docsEnum = termsEnum.docs(liveDocs, null, DocsEnum.FLAG_NONE); while (true) { int docId = docsEnum.nextDoc(); if (docId == DocsEnum.NO_MORE_DOCS) { break; } keys.add(keyForDoc(docId, leaves, keyFieldname)); } if (keys.size() > 0) { String[] values = term.utf8ToString().split(SuggestionIndex.CONCAT_MARKER.replace("$", "\\$")); indexNGram(values[0], values[1], values[2], keys); indexingState.count++; } } this.commit(); }
From source file:org.neo4j.kernel.api.impl.schema.sampler.NonUniqueLuceneIndexSampler.java
License:Open Source License
@Override protected IndexSample performSampling() throws IndexNotFoundKernelException { NonUniqueIndexSampler sampler = new NonUniqueIndexSampler(indexSamplingConfig.sampleSizeLimit()); IndexReader indexReader = indexSearcher.getIndexReader(); for (LeafReaderContext readerContext : indexReader.leaves()) { try {//from ww w . j a v a 2s. c om Set<String> fieldNames = getFieldNamesToSample(readerContext); for (String fieldName : fieldNames) { Terms terms = readerContext.reader().terms(fieldName); if (terms != null) { TermsEnum termsEnum = LuceneDocumentStructure.originalTerms(terms, fieldName); BytesRef termsRef; while ((termsRef = termsEnum.next()) != null) { sampler.include(termsRef.utf8ToString(), termsEnum.docFreq()); checkCancellation(); } } } } catch (IOException e) { throw new RuntimeException(e); } } return sampler.result(indexReader.numDocs()); }
From source file:org.openrdf.sail.lucene4.LuceneIndex.java
License:BSD License
/** * Returns a Document representing the specified document ID (combination of * resource and context), or null when no such Document exists yet. */// w w w .j a v a 2 s . c o m private Document getDocument(Term idTerm) throws IOException { IndexReader reader = getIndexReader(); List<AtomicReaderContext> leaves = reader.leaves(); int size = leaves.size(); for (int i = 0; i < size; i++) { AtomicReader lreader = leaves.get(i).reader(); Document document = getDocument(lreader, idTerm); if (document != null) { return document; } } // no such Document return null; }
From source file:org.openrdf.sail.lucene4.LuceneIndex.java
License:BSD License
/** * Returns a list of Documents representing the specified Resource (empty * when no such Document exists yet). Each document represent a set of * statements with the specified Resource as a subject, which are stored in a * specific context/* w ww . j a v a 2 s . co m*/ */ private List<Document> getDocuments(Term uriTerm) throws IOException { List<Document> result = new ArrayList<Document>(); IndexReader reader = getIndexReader(); List<AtomicReaderContext> leaves = reader.leaves(); int size = leaves.size(); for (int i = 0; i < size; i++) { AtomicReader lreader = leaves.get(i).reader(); addDocuments(lreader, uriTerm, result); } return result; }
From source file:org.openrdf.sail.lucene4.LuceneIndex.java
License:BSD License
private static boolean isDeleted(IndexReader reader, int docId) { if (reader.hasDeletions()) { List<AtomicReaderContext> leaves = reader.leaves(); int size = leaves.size(); for (int i = 0; i < size; i++) { Bits liveDocs = leaves.get(i).reader().getLiveDocs(); if (docId < liveDocs.length()) { boolean isDeleted = !liveDocs.get(docId); if (isDeleted) { return true; }//w ww . jav a 2s . com } } return false; } else { return false; } }
From source file:org.tallison.lucene.search.concordance.TestSimpleAnalyzerUtil.java
License:Apache License
private void executeNeedleTests(Analyzer analyzer) throws Exception { String needle = getNeedle(analyzer); int numFieldValues = 23; Directory directory = buildNeedleIndex(needle, analyzer, numFieldValues); IndexReader reader = DirectoryReader.open(directory); LeafReaderContext ctx = reader.leaves().get(0); LeafReader r = ctx.reader();//from w w w . j a v a2 s.c o m PostingsEnum dpe = r.postings(new Term(FIELD, needle), PostingsEnum.ALL); int numTests = 0; try { while (dpe.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int frq = dpe.freq(); int advanced = 0; String[] fieldValues = r.document(dpe.docID()).getValues(FIELD); while (++advanced < frq) { dpe.nextPosition(); String rebuilt = SimpleAnalyzerUtil.substringFromMultiValuedFields(dpe.startOffset(), dpe.endOffset(), fieldValues, analyzer.getOffsetGap(FIELD), " | "); assertEquals(needle, rebuilt); numTests++; } } } finally { reader.close(); directory.close(); } assertEquals("number of tests", numFieldValues - 1, numTests); }