Example usage for org.apache.lucene.index IndexReader leaves

List of usage examples for org.apache.lucene.index IndexReader leaves

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader leaves.

Prototype

public final List<LeafReaderContext> leaves() 

Source Link

Document

Returns the reader's leaves, or itself if this reader is atomic.

Usage

From source file:org.exist.indexing.lucene.XMLToQuery.java

License:Open Source License

private Term[] expandTerms(String field, String queryStr) throws XPathException {
    List<Term> termList = new ArrayList<>(8);
    Automaton automaton = WildcardQuery.toAutomaton(new Term(field, queryStr));
    CompiledAutomaton compiled = new CompiledAutomaton(automaton);
    IndexReader reader = null;
    try {/* ww  w.  j  a v  a 2  s .c  o m*/
        reader = index.getReader();

        for (AtomicReaderContext atomic : reader.leaves()) {
            Terms terms = atomic.reader().terms(field);
            if (terms != null) {
                TermsEnum termsEnum = compiled.getTermsEnum(terms);
                BytesRef data = termsEnum.next();
                while (data != null) {
                    String term = data.utf8ToString();
                    termList.add(new Term(field, term));
                    data = termsEnum.next();
                }
            }
        }
    } catch (IOException e) {
        throw new XPathException("Lucene index error while creating query: " + e.getMessage(), e);
    } finally {
        index.releaseReader(reader);
    }
    Term[] matchingTerms = new Term[termList.size()];
    return termList.toArray(matchingTerms);
}

From source file:org.exist.indexing.range.RangeIndexWorker.java

License:Open Source License

private void scan(DocumentSet docs, NodeSet nodes, String start, String end, long max,
        TreeMap<String, Occurrences> map, IndexReader reader, String field) throws IOException {
    List<AtomicReaderContext> leaves = reader.leaves();
    for (AtomicReaderContext context : leaves) {
        NumericDocValues docIdValues = context.reader().getNumericDocValues(FIELD_DOC_ID);
        BinaryDocValues nodeIdValues = context.reader().getBinaryDocValues(FIELD_NODE_ID);
        Bits liveDocs = context.reader().getLiveDocs();
        Terms terms = context.reader().terms(field);
        if (terms == null)
            continue;
        TermsEnum termsIter = terms.iterator(null);
        if (termsIter.next() == null) {
            continue;
        }//from ww w.j av  a 2  s .  c o m
        do {
            if (map.size() >= max) {
                break;
            }
            BytesRef ref = termsIter.term();
            String term = ref.utf8ToString();
            boolean include = true;
            if (end != null) {
                if (term.compareTo(end) > 0)
                    include = false;
            } else if (start != null && !term.startsWith(start))
                include = false;
            if (include) {
                DocsEnum docsEnum = termsIter.docs(null, null);
                while (docsEnum.nextDoc() != DocsEnum.NO_MORE_DOCS) {
                    if (liveDocs != null && !liveDocs.get(docsEnum.docID())) {
                        continue;
                    }
                    int docId = (int) docIdValues.get(docsEnum.docID());
                    DocumentImpl storedDocument = docs.getDoc(docId);
                    if (storedDocument == null)
                        continue;
                    NodeId nodeId = null;
                    if (nodes != null) {
                        BytesRef nodeIdRef = new BytesRef(buf);
                        nodeIdValues.get(docsEnum.docID(), nodeIdRef);
                        int units = ByteConversion.byteToShort(nodeIdRef.bytes, nodeIdRef.offset);
                        nodeId = index.getBrokerPool().getNodeFactory().createFromData(units, nodeIdRef.bytes,
                                nodeIdRef.offset + 2);
                    }
                    if (nodeId == null || nodes.get(storedDocument, nodeId) != null) {
                        Occurrences oc = map.get(term);
                        if (oc == null) {
                            oc = new Occurrences(term);
                            map.put(term, oc);
                        }
                        oc.addDocument(storedDocument);
                        oc.addOccurrences(docsEnum.freq());
                    }
                }
            }
        } while (termsIter.next() != null);
    }
}

From source file:org.hibernate.search.test.directoryProvider.FSDirectoryTest.java

License:LGPL

/**
 * Project a field as a String from a Lucene Document matching the provided term.
 * The method asserts that one match is found, and no more.
 *//* w  w w  .  ja  va  2 s  .  com*/
private String projectSingleField(IndexReader reader, String fieldName, Term term) throws IOException {
    String projection = null;
    for (LeafReaderContext leaf : reader.leaves()) {
        final LeafReader atomicReader = leaf.reader();
        final DocsEnum termDocsEnum = atomicReader.termDocsEnum(term);
        while (termDocsEnum.nextDoc() != DocsEnum.NO_MORE_DOCS) {
            final int docID = termDocsEnum.docID();
            org.apache.lucene.document.Document document = reader.document(docID);
            String value = document.get(fieldName);
            Assert.assertNull(
                    "duplicate matches found! This method assumes a single document will match the Term.",
                    projection);
            projection = value;
        }
    }
    Assert.assertNotNull(projection);
    return projection;
}

From source file:org.meresco.lucene.search.SuperIndexSearcher.java

License:Open Source License

public SuperIndexSearcher(IndexReader reader, ExecutorService executor, int tasks) {
    super(reader);
    this.executor = executor;
    this.grouped_leaves = this.group_leaves(reader.leaves(), tasks);
    //        for (List<AtomicReaderContext> l : this.grouped_leaves) {
    //            int t = 0;
    //            for (AtomicReaderContext ctx : l)
    //                t += ctx.reader().numDocs();
    //             System.out.print(" " + t + " ");
    //        }//from  ww  w .  j  a v a 2  s. c o m
    //        System.out.println();
}

From source file:org.meresco.lucene.suggestion.SuggestionNGramIndex.java

License:Open Source License

public void createSuggestions(IndexReader reader, String suggestionFieldname, String keyFieldname,
        IndexingState indexingState) throws IOException {
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    List<AtomicReaderContext> leaves = reader.leaves();
    Terms terms = MultiFields.getTerms(reader, suggestionFieldname);
    if (terms == null)
        return;// w  ww  .j av  a2s  . c  o m
    TermsEnum termsEnum = terms.iterator(null);
    BytesRef term;
    while ((term = termsEnum.next()) != null) {
        List<Long> keys = new ArrayList<>();
        DocsEnum docsEnum = termsEnum.docs(liveDocs, null, DocsEnum.FLAG_NONE);
        while (true) {
            int docId = docsEnum.nextDoc();
            if (docId == DocsEnum.NO_MORE_DOCS) {
                break;
            }
            keys.add(keyForDoc(docId, leaves, keyFieldname));
        }
        if (keys.size() > 0) {
            String[] values = term.utf8ToString().split(SuggestionIndex.CONCAT_MARKER.replace("$", "\\$"));
            indexNGram(values[0], values[1], values[2], keys);
            indexingState.count++;
        }
    }
    this.commit();
}

From source file:org.neo4j.kernel.api.impl.schema.sampler.NonUniqueLuceneIndexSampler.java

License:Open Source License

@Override
protected IndexSample performSampling() throws IndexNotFoundKernelException {
    NonUniqueIndexSampler sampler = new NonUniqueIndexSampler(indexSamplingConfig.sampleSizeLimit());
    IndexReader indexReader = indexSearcher.getIndexReader();
    for (LeafReaderContext readerContext : indexReader.leaves()) {
        try {//from ww  w  . j  a  v a 2s.  c  om
            Set<String> fieldNames = getFieldNamesToSample(readerContext);
            for (String fieldName : fieldNames) {
                Terms terms = readerContext.reader().terms(fieldName);
                if (terms != null) {
                    TermsEnum termsEnum = LuceneDocumentStructure.originalTerms(terms, fieldName);
                    BytesRef termsRef;
                    while ((termsRef = termsEnum.next()) != null) {
                        sampler.include(termsRef.utf8ToString(), termsEnum.docFreq());
                        checkCancellation();
                    }
                }
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    return sampler.result(indexReader.numDocs());
}

From source file:org.openrdf.sail.lucene4.LuceneIndex.java

License:BSD License

/**
 * Returns a Document representing the specified document ID (combination of
 * resource and context), or null when no such Document exists yet.
 *///  w w w .j a  v  a  2 s  .  c o m
private Document getDocument(Term idTerm) throws IOException {
    IndexReader reader = getIndexReader();
    List<AtomicReaderContext> leaves = reader.leaves();
    int size = leaves.size();
    for (int i = 0; i < size; i++) {
        AtomicReader lreader = leaves.get(i).reader();
        Document document = getDocument(lreader, idTerm);
        if (document != null) {
            return document;
        }
    }
    // no such Document
    return null;
}

From source file:org.openrdf.sail.lucene4.LuceneIndex.java

License:BSD License

/**
 * Returns a list of Documents representing the specified Resource (empty
 * when no such Document exists yet). Each document represent a set of
 * statements with the specified Resource as a subject, which are stored in a
 * specific context/* w ww . j a  v a 2 s  .  co  m*/
 */
private List<Document> getDocuments(Term uriTerm) throws IOException {
    List<Document> result = new ArrayList<Document>();

    IndexReader reader = getIndexReader();
    List<AtomicReaderContext> leaves = reader.leaves();
    int size = leaves.size();
    for (int i = 0; i < size; i++) {
        AtomicReader lreader = leaves.get(i).reader();
        addDocuments(lreader, uriTerm, result);
    }

    return result;
}

From source file:org.openrdf.sail.lucene4.LuceneIndex.java

License:BSD License

private static boolean isDeleted(IndexReader reader, int docId) {
    if (reader.hasDeletions()) {
        List<AtomicReaderContext> leaves = reader.leaves();
        int size = leaves.size();
        for (int i = 0; i < size; i++) {
            Bits liveDocs = leaves.get(i).reader().getLiveDocs();
            if (docId < liveDocs.length()) {
                boolean isDeleted = !liveDocs.get(docId);
                if (isDeleted) {
                    return true;
                }//w ww . jav a 2s  . com
            }
        }
        return false;
    } else {
        return false;
    }
}

From source file:org.tallison.lucene.search.concordance.TestSimpleAnalyzerUtil.java

License:Apache License

private void executeNeedleTests(Analyzer analyzer) throws Exception {

    String needle = getNeedle(analyzer);
    int numFieldValues = 23;

    Directory directory = buildNeedleIndex(needle, analyzer, numFieldValues);

    IndexReader reader = DirectoryReader.open(directory);

    LeafReaderContext ctx = reader.leaves().get(0);
    LeafReader r = ctx.reader();//from  w w w  . j a  v  a2  s.c o m

    PostingsEnum dpe = r.postings(new Term(FIELD, needle), PostingsEnum.ALL);
    int numTests = 0;
    try {
        while (dpe.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
            int frq = dpe.freq();
            int advanced = 0;

            String[] fieldValues = r.document(dpe.docID()).getValues(FIELD);
            while (++advanced < frq) {
                dpe.nextPosition();
                String rebuilt = SimpleAnalyzerUtil.substringFromMultiValuedFields(dpe.startOffset(),
                        dpe.endOffset(), fieldValues, analyzer.getOffsetGap(FIELD), " | ");
                assertEquals(needle, rebuilt);
                numTests++;
            }
        }
    } finally {
        reader.close();
        directory.close();
    }
    assertEquals("number of tests", numFieldValues - 1, numTests);
}