Example usage for org.apache.lucene.index IndexReader maxDoc

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader maxDoc.

Prototype

public abstract int maxDoc();

Source Link

Document

Returns one greater than the largest possible document number.

Usage

From source file:com.cis.MultiFilter.java

License:Open Source License

public BitSet bits(IndexReader reader) throws IOException {
    //Iterate through list of filters and apply the boolean AND operation
    //on each bitSet. The AND operator has the affect that only documents
    //that are allowed by every single filter in the filter list will be
    //allowed by this MultiFilter.
    int filterListSize = filterList.size();
    if (filterListSize > 0) {
        BitSet bits = ((Filter) filterList.get(0)).bits(reader);
        for (int i = 1; i < filterListSize; i++) {
            bits.and(((Filter) filterList.get(i)).bits(reader));
        }//from  w ww.j a v a 2s.c o  m
        return bits;
    }
    //There are no filters defined. In this case, we return a new
    //BitSet that will filter out all documents. This is probably the most
    //consistent behavior with the Lucene API. It's also a lot more
    //efficient considering the BitSet implementation.
    else {
        return new BitSet(reader.maxDoc());
    }
}

From source file:com.datasalt.pangool.solr.TestSolrOutputFormat.java

License:Apache License

@Test
public void test() throws Exception {
    trash(OUTPUT);/*from   w ww . j a  v  a 2  s  . com*/

    TupleSolrOutputFormatExample example = new TupleSolrOutputFormatExample();
    example.run(INPUT, OUTPUT, getConf());

    // Assert that indexes have been created
    assertTrue(new File(OUTPUT + "/part-00000/data/index").exists());
    assertTrue(new File(OUTPUT + "/FR/part-00000/data/index").exists());
    assertTrue(new File(OUTPUT + "/ES/part-00000/data/index").exists());

    // Validate data inside index
    IndexReader r = IndexReader.open(FSDirectory.open(new File(OUTPUT + "/part-00000/data/index")));
    assertEquals(2, r.maxDoc());

    int contentAssertions = 0;
    Set<String> distinctMessages = new HashSet<String>();

    for (int i = 0; i < 2; i++) {
        String document = r.document(i).toString();
        distinctMessages.add(document);
        if (document.contains("user_id:user1")) {
            assertTrue(document.contains("Hi, this is a message from user1."));
            contentAssertions++;
        } else if (document.contains("user_id:user4")) {
            assertTrue(document.contains("Hi, this is another message from user4."));
            contentAssertions++;
        }
    }

    assertEquals(2, distinctMessages.size());
    assertEquals(2, contentAssertions);

    r = IndexReader.open(FSDirectory.open(new File(OUTPUT + "/FR/part-00000/data/index")));
    assertEquals(1, r.maxDoc());

    String document = r.document(0).toString();
    assertTrue(document.contains("user_id:user3"));
    assertTrue(document.contains("Oh la l!"));

    r = IndexReader.open(FSDirectory.open(new File(OUTPUT + "/ES/part-00000/data/index")));
    assertEquals(1, r.maxDoc());

    document = r.document(0).toString();
    assertTrue(document.contains("user_id:user2"));
    assertTrue(document.contains("Yo no hablo ingls."));

    document = r.document(0).toString();

    trash(OUTPUT);
}

From source file:com.doculibre.constellio.lucene.BaseLuceneIndexHelper.java

License:Open Source License

protected synchronized int getDocNum(T object) {
    int docNum;//from www . ja  v a 2  s.  com
    String uniqueIndexFieldName = getUniqueIndexFieldName();
    String uniqueIndexFieldValue = getUniqueIndexFieldValue(object);
    if (uniqueIndexFieldValue != null) {
        String query = uniqueIndexFieldName + ":" + uniqueIndexFieldValue;
        try {
            Analyzer analyzer = analyzerProvider.getAnalyzer(Locale.FRENCH);
            QueryParser multiFielsQP = new QueryParser(Version.LUCENE_44, uniqueIndexFieldName, analyzer);
            Query luceneQuery = multiFielsQP.parse(query);

            Directory directory = FSDirectory.open(indexDir);
            IndexReader reader = DirectoryReader.open(directory);
            IndexSearcher indexSearcher = new IndexSearcher(reader);
            TopDocs topDocs = indexSearcher.search(luceneQuery, reader.maxDoc());
            if (topDocs.totalHits > 0) {
                docNum = topDocs.scoreDocs[0].doc;
            } else {
                docNum = -1;
            }
            //               indexSearcher.close();
            // TODO add finally
            reader.close();
            directory.close();
        } catch (ParseException e) {
            throw new RuntimeException(e);
        } catch (CorruptIndexException e) {
            throw new RuntimeException(e);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    } else {
        docNum = -1;
    }
    return docNum;
}

From source file:com.downtree.tourbus.search.LocationFilter.java

@Override
public BitSet bits(IndexReader reader) throws IOException {
    BitSet bits = new BitSet(reader.maxDoc());

    TermDocs docs = reader.termDocs(new Term("ferret_class", m_type));
    while (docs.next()) {
        Document doc = reader.document(docs.doc());
        String value = doc.get("latitude");
        if (value == null)
            continue;

        try {//from   w w  w .  ja  va 2 s  . c  o m
            double latitude = Double.parseDouble(value) * DEG2RAD;
            double longitude = Double.parseDouble(doc.get("longitude")) * DEG2RAD;

            double x = (Math.sin(latitude) * Math.sin(m_centerLat))
                    + (Math.cos(latitude) * Math.cos(m_centerLat) * Math.cos(longitude - m_centerLong));

            double distance = 0;
            if (x > -1 && x < 1) {
                distance = Math.acos(x) * EARTH_RADIUS;
            }

            if (distance <= m_radius) {
                bits.set(docs.doc());
            }
        } catch (Exception e) {
            SolrException.logOnce(SolrCore.log, "Error in location filter", e);
            continue;
        }
    }

    return bits;
}

From source file:com.duroty.lucene.filter.ChainedFilter.java

License:Apache License

/**
 * Delegates to each filter in the chain.
 * @param reader IndexReader//from w  w w. j  a v a  2s.c  o m
 * @param logic Logical operation
 * @return BitSet
 */
private BitSet bits(IndexReader reader, int logic) throws IOException {
    BitSet result;
    int i = 0;

    /**
     * First AND operation takes place against a completely false
     * bitset and will always return zero results. Thanks to
     * Daniel Armbrust for pointing this out and suggesting workaround.
     */
    if (logic == AND) {
        result = (BitSet) chain[i].bits(reader).clone();
        ++i;
    } else {
        result = new BitSet(reader.maxDoc());
    }

    for (; i < chain.length; i++) {
        doChain(result, reader, logic, chain[i]);
    }

    return result;
}

From source file:com.duroty.lucene.filter.ChainedFilter.java

License:Apache License

/**
 * Delegates to each filter in the chain.
 * @param reader IndexReader//w  w  w . j a  v  a  2 s. c  o m
 * @param logic Logical operation
 * @return BitSet
 */
private BitSet bits(IndexReader reader, int[] logic) throws IOException {
    if (logic.length != chain.length) {
        throw new IllegalArgumentException("Invalid number of elements in logic array");
    }

    BitSet result;
    int i = 0;

    /**
     * First AND operation takes place against a completely false
     * bitset and will always return zero results. Thanks to
     * Daniel Armbrust for pointing this out and suggesting workaround.
     */
    if (logic[0] == AND) {
        result = (BitSet) chain[i].bits(reader).clone();
        ++i;
    } else {
        result = new BitSet(reader.maxDoc());
    }

    for (; i < chain.length; i++) {
        doChain(result, reader, logic[i], chain[i]);
    }

    return result;
}

From source file:com.esri.gpt.catalog.lucene.AclFilter.java

License:Apache License

/**
 * Queries for documents that have no values associated with the field.
 * @param reader the index reader//ww w.  ja va  2  s.  c  om
 * @return the OpenBitSet (documents with no values set to true)
 * @throws IOException if an exception is encountered while reading the index
 */
private OpenBitSet queryNulls(IndexReader reader, String field) throws IOException {
    int nBits = reader.maxDoc();
    OpenBitSet bitSet = new OpenBitSet(nBits);
    TermEnum termEnum = null;
    TermDocs termDocs = null;
    if ((field != null) && (field.trim().length() > 0)) {
        try {

            // find all documents that have a term for the field, then flip the bit set
            termEnum = reader.terms(new Term(field));
            termDocs = reader.termDocs();
            do {
                Term term = termEnum.term();
                if ((term != null) && term.field().equals(field)) {
                    termDocs.seek(term);
                    while (termDocs.next()) {
                        bitSet.fastSet(termDocs.doc());
                    }
                }
            } while (termEnum.next());

            bitSet.flip(0, nBits);
            if (reader.hasDeletions()) {
                for (int i = 0; i < nBits; i++) {
                    if (bitSet.get(i) && reader.isDeleted(i)) {
                        bitSet.fastFlip(i);
                    }
                }
            }

        } finally {
            try {
                if (termEnum != null)
                    termEnum.close();
            } catch (Exception ef) {
            }
            try {
                if (termDocs != null)
                    termDocs.close();
            } catch (Exception ef) {
            }
        }
    }
    return bitSet;
}

From source file:com.esri.gpt.catalog.lucene.AclFilter.java

License:Apache License

/**
 * Queries for documents that match one or more of the supplied values.
 * @param reader the index reader//from  w  w  w. jav a 2  s.  c  om
 * @return the OpenBitSet (documents with matches are set to true)
 * @throws IOException if an exception is encountered while reading the index
 */
private OpenBitSet queryValues(IndexReader reader, String field, String[] values) throws IOException {
    OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
    if ((values != null) && (values.length > 0)) {
        TermDocs termDocs = null;
        try {
            Term baseTerm = new Term(field);
            termDocs = reader.termDocs();
            for (String value : values) {
                termDocs.seek(baseTerm.createTerm(value.trim().toLowerCase()));
                while (termDocs.next()) {
                    bitSet.set(termDocs.doc());
                }
            }
        } finally {
            try {
                if (termDocs != null)
                    termDocs.close();
            } catch (Exception ef) {
            }
        }
    }
    return bitSet;
}

From source file:com.esri.gpt.catalog.lucene.SchemaFilter.java

License:Apache License

/**
 * Queries for documents that match the supplied value.
 * @param reader the index reader//from w  w w.  ja va 2 s. co  m
 * @return the OpenBitSet (documents with matches are set to true)
 * @throws IOException if an exception is encountered while reading the index
 */
private OpenBitSet queryValue(IndexReader reader, String field, String value) throws IOException {
    OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
    if ((value != null) && (value.length() > 0)) {
        TermDocs termDocs = null;
        try {
            Term term = new Term(field, value);
            termDocs = reader.termDocs();
            termDocs.seek(term);
            while (termDocs.next()) {
                bitSet.set(termDocs.doc());
            }
        } finally {
            try {
                if (termDocs != null)
                    termDocs.close();
            } catch (Exception ef) {
            }
        }
    }
    return bitSet;
}

From source file:com.esri.gpt.catalog.lucene.stats.GlobalFieldStats.java

License:Apache License

/**
 * Executes the collection of statistics.
 * @param request the active statistics request
 * @param reader the index reader/*from  w w w.j a  v a  2s  . c  o  m*/
 * @throws IOException if an error occurs while communicating with the index
 */
public void collectStats(StatsRequest request, IndexReader reader) throws IOException {
    long t1 = System.currentTimeMillis();
    TermEnum termEnum = null;
    TermDocs termDocs = null;
    try {
        OpenBitSet documentFilterBitSet = request.getDocumentFilterBitSet();
        boolean isUnfiltered = (documentFilterBitSet == null);

        // return if there are no stats to collect
        String[] fieldNames = request.getCollectableFieldNames(reader);
        if (this.determineNumberOfDocsConsidered(reader, documentFilterBitSet) <= 0) {
            return;
        } else if ((fieldNames == null) || (fieldNames.length == 0)) {
            return;
        }

        // accumulate field frequencies per document
        termDocs = reader.termDocs();
        for (String fieldName : fieldNames) {
            termEnum = reader.terms(new Term(fieldName));
            OpenBitSet docsWithFieldBitSet = new OpenBitSet(reader.maxDoc());
            do {
                Term term = termEnum.term();
                if (term != null && term.field().equals(fieldName)) {

                    termDocs.seek(term);
                    while (termDocs.next()) {
                        int docId = termDocs.doc();
                        boolean bSet = isUnfiltered || documentFilterBitSet.fastGet(docId);
                        if (bSet) {
                            docsWithFieldBitSet.fastSet(docId);
                        }
                    }

                } else {
                    break;
                }
            } while (termEnum.next());
            termEnum.close();
            termEnum = null;

            if (docsWithFieldBitSet.cardinality() > 0) {
                this.fieldAccumulator.add(fieldName, docsWithFieldBitSet.cardinality());
            }
        }

        // sort
        if (this.getSortByFrequency()) {
            this.fieldAccumulator.sortByFrequency();
        } else {
            this.fieldAccumulator.sortByName();
        }

    } finally {
        try {
            if (termEnum != null)
                termEnum.close();
        } catch (Exception ef) {
        }
        try {
            if (termDocs != null)
                termDocs.close();
        } catch (Exception ef) {
        }
        this.setTimeMillis(System.currentTimeMillis() - t1);
    }

    // print
    if (request.getResponseWriter() != null) {
        this.print(request);
    }
}