Example usage for org.apache.lucene.index IndexReader maxDoc

List of usage examples for org.apache.lucene.index IndexReader maxDoc

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader maxDoc.

Prototype

public abstract int maxDoc();

Source Link

Document

Returns one greater than the largest possible document number.

Usage

From source file:com.cis.MultiFilter.java

License:Open Source License

public BitSet bits(IndexReader reader) throws IOException {
    //Iterate through list of filters and apply the boolean AND operation
    //on each bitSet. The AND operator has the affect that only documents
    //that are allowed by every single filter in the filter list will be
    //allowed by this MultiFilter.
    int filterListSize = filterList.size();
    if (filterListSize > 0) {
        BitSet bits = ((Filter) filterList.get(0)).bits(reader);
        for (int i = 1; i < filterListSize; i++) {
            bits.and(((Filter) filterList.get(i)).bits(reader));
        }//from  w ww.j a v a 2s.c o  m
        return bits;
    }
    //There are no filters defined. In this case, we return a new
    //BitSet that will filter out all documents. This is probably the most
    //consistent behavior with the Lucene API. It's also a lot more
    //efficient considering the BitSet implementation.
    else {
        return new BitSet(reader.maxDoc());
    }
}

From source file:com.datasalt.pangool.solr.TestSolrOutputFormat.java

License:Apache License

@Test
public void test() throws Exception {
    trash(OUTPUT);/*from   w ww . j a  v  a 2  s  . com*/

    TupleSolrOutputFormatExample example = new TupleSolrOutputFormatExample();
    example.run(INPUT, OUTPUT, getConf());

    // Assert that indexes have been created
    assertTrue(new File(OUTPUT + "/part-00000/data/index").exists());
    assertTrue(new File(OUTPUT + "/FR/part-00000/data/index").exists());
    assertTrue(new File(OUTPUT + "/ES/part-00000/data/index").exists());

    // Validate data inside index
    IndexReader r = IndexReader.open(FSDirectory.open(new File(OUTPUT + "/part-00000/data/index")));
    assertEquals(2, r.maxDoc());

    int contentAssertions = 0;
    Set<String> distinctMessages = new HashSet<String>();

    for (int i = 0; i < 2; i++) {
        String document = r.document(i).toString();
        distinctMessages.add(document);
        if (document.contains("user_id:user1")) {
            assertTrue(document.contains("Hi, this is a message from user1."));
            contentAssertions++;
        } else if (document.contains("user_id:user4")) {
            assertTrue(document.contains("Hi, this is another message from user4."));
            contentAssertions++;
        }
    }

    assertEquals(2, distinctMessages.size());
    assertEquals(2, contentAssertions);

    r = IndexReader.open(FSDirectory.open(new File(OUTPUT + "/FR/part-00000/data/index")));
    assertEquals(1, r.maxDoc());

    String document = r.document(0).toString();
    assertTrue(document.contains("user_id:user3"));
    assertTrue(document.contains("Oh la l!"));

    r = IndexReader.open(FSDirectory.open(new File(OUTPUT + "/ES/part-00000/data/index")));
    assertEquals(1, r.maxDoc());

    document = r.document(0).toString();
    assertTrue(document.contains("user_id:user2"));
    assertTrue(document.contains("Yo no hablo ingls."));

    document = r.document(0).toString();

    trash(OUTPUT);
}

From source file:com.doculibre.constellio.lucene.BaseLuceneIndexHelper.java

License:Open Source License

protected synchronized int getDocNum(T object) {
    int docNum;//from www . ja  v a 2  s.  com
    String uniqueIndexFieldName = getUniqueIndexFieldName();
    String uniqueIndexFieldValue = getUniqueIndexFieldValue(object);
    if (uniqueIndexFieldValue != null) {
        String query = uniqueIndexFieldName + ":" + uniqueIndexFieldValue;
        try {
            Analyzer analyzer = analyzerProvider.getAnalyzer(Locale.FRENCH);
            QueryParser multiFielsQP = new QueryParser(Version.LUCENE_44, uniqueIndexFieldName, analyzer);
            Query luceneQuery = multiFielsQP.parse(query);

            Directory directory = FSDirectory.open(indexDir);
            IndexReader reader = DirectoryReader.open(directory);
            IndexSearcher indexSearcher = new IndexSearcher(reader);
            TopDocs topDocs = indexSearcher.search(luceneQuery, reader.maxDoc());
            if (topDocs.totalHits > 0) {
                docNum = topDocs.scoreDocs[0].doc;
            } else {
                docNum = -1;
            }
            //               indexSearcher.close();
            // TODO add finally
            reader.close();
            directory.close();
        } catch (ParseException e) {
            throw new RuntimeException(e);
        } catch (CorruptIndexException e) {
            throw new RuntimeException(e);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    } else {
        docNum = -1;
    }
    return docNum;
}

From source file:com.downtree.tourbus.search.LocationFilter.java

@Override
public BitSet bits(IndexReader reader) throws IOException {
    BitSet bits = new BitSet(reader.maxDoc());

    TermDocs docs = reader.termDocs(new Term("ferret_class", m_type));
    while (docs.next()) {
        Document doc = reader.document(docs.doc());
        String value = doc.get("latitude");
        if (value == null)
            continue;

        try {//from   w w  w .  ja  va 2 s  . c  o m
            double latitude = Double.parseDouble(value) * DEG2RAD;
            double longitude = Double.parseDouble(doc.get("longitude")) * DEG2RAD;

            double x = (Math.sin(latitude) * Math.sin(m_centerLat))
                    + (Math.cos(latitude) * Math.cos(m_centerLat) * Math.cos(longitude - m_centerLong));

            double distance = 0;
            if (x > -1 && x < 1) {
                distance = Math.acos(x) * EARTH_RADIUS;
            }

            if (distance <= m_radius) {
                bits.set(docs.doc());
            }
        } catch (Exception e) {
            SolrException.logOnce(SolrCore.log, "Error in location filter", e);
            continue;
        }
    }

    return bits;
}

From source file:com.duroty.lucene.filter.ChainedFilter.java

License:Apache License

/**
 * Delegates to each filter in the chain.
 * @param reader IndexReader//from w  w w. j  a v a  2s.c  o m
 * @param logic Logical operation
 * @return BitSet
 */
private BitSet bits(IndexReader reader, int logic) throws IOException {
    BitSet result;
    int i = 0;

    /**
     * First AND operation takes place against a completely false
     * bitset and will always return zero results. Thanks to
     * Daniel Armbrust for pointing this out and suggesting workaround.
     */
    if (logic == AND) {
        result = (BitSet) chain[i].bits(reader).clone();
        ++i;
    } else {
        result = new BitSet(reader.maxDoc());
    }

    for (; i < chain.length; i++) {
        doChain(result, reader, logic, chain[i]);
    }

    return result;
}

From source file:com.duroty.lucene.filter.ChainedFilter.java

License:Apache License

/**
 * Delegates to each filter in the chain.
 * @param reader IndexReader//w  w  w . j a  v  a  2 s. c  o m
 * @param logic Logical operation
 * @return BitSet
 */
private BitSet bits(IndexReader reader, int[] logic) throws IOException {
    if (logic.length != chain.length) {
        throw new IllegalArgumentException("Invalid number of elements in logic array");
    }

    BitSet result;
    int i = 0;

    /**
     * First AND operation takes place against a completely false
     * bitset and will always return zero results. Thanks to
     * Daniel Armbrust for pointing this out and suggesting workaround.
     */
    if (logic[0] == AND) {
        result = (BitSet) chain[i].bits(reader).clone();
        ++i;
    } else {
        result = new BitSet(reader.maxDoc());
    }

    for (; i < chain.length; i++) {
        doChain(result, reader, logic[i], chain[i]);
    }

    return result;
}

From source file:com.esri.gpt.catalog.lucene.AclFilter.java

License:Apache License

/**
 * Queries for documents that have no values associated with the field.
 * @param reader the index reader//ww w.  ja va  2  s.  c  om
 * @return the OpenBitSet (documents with no values set to true)
 * @throws IOException if an exception is encountered while reading the index
 */
private OpenBitSet queryNulls(IndexReader reader, String field) throws IOException {
    int nBits = reader.maxDoc();
    OpenBitSet bitSet = new OpenBitSet(nBits);
    TermEnum termEnum = null;
    TermDocs termDocs = null;
    if ((field != null) && (field.trim().length() > 0)) {
        try {

            // find all documents that have a term for the field, then flip the bit set
            termEnum = reader.terms(new Term(field));
            termDocs = reader.termDocs();
            do {
                Term term = termEnum.term();
                if ((term != null) && term.field().equals(field)) {
                    termDocs.seek(term);
                    while (termDocs.next()) {
                        bitSet.fastSet(termDocs.doc());
                    }
                }
            } while (termEnum.next());

            bitSet.flip(0, nBits);
            if (reader.hasDeletions()) {
                for (int i = 0; i < nBits; i++) {
                    if (bitSet.get(i) && reader.isDeleted(i)) {
                        bitSet.fastFlip(i);
                    }
                }
            }

        } finally {
            try {
                if (termEnum != null)
                    termEnum.close();
            } catch (Exception ef) {
            }
            try {
                if (termDocs != null)
                    termDocs.close();
            } catch (Exception ef) {
            }
        }
    }
    return bitSet;
}

From source file:com.esri.gpt.catalog.lucene.AclFilter.java

License:Apache License

/**
 * Queries for documents that match one or more of the supplied values.
 * @param reader the index reader//from  w  w  w. jav a 2  s.  c  om
 * @return the OpenBitSet (documents with matches are set to true)
 * @throws IOException if an exception is encountered while reading the index
 */
private OpenBitSet queryValues(IndexReader reader, String field, String[] values) throws IOException {
    OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
    if ((values != null) && (values.length > 0)) {
        TermDocs termDocs = null;
        try {
            Term baseTerm = new Term(field);
            termDocs = reader.termDocs();
            for (String value : values) {
                termDocs.seek(baseTerm.createTerm(value.trim().toLowerCase()));
                while (termDocs.next()) {
                    bitSet.set(termDocs.doc());
                }
            }
        } finally {
            try {
                if (termDocs != null)
                    termDocs.close();
            } catch (Exception ef) {
            }
        }
    }
    return bitSet;
}

From source file:com.esri.gpt.catalog.lucene.SchemaFilter.java

License:Apache License

/**
 * Queries for documents that match the supplied value.
 * @param reader the index reader//from w  w w.  ja va 2 s. co  m
 * @return the OpenBitSet (documents with matches are set to true)
 * @throws IOException if an exception is encountered while reading the index
 */
private OpenBitSet queryValue(IndexReader reader, String field, String value) throws IOException {
    OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
    if ((value != null) && (value.length() > 0)) {
        TermDocs termDocs = null;
        try {
            Term term = new Term(field, value);
            termDocs = reader.termDocs();
            termDocs.seek(term);
            while (termDocs.next()) {
                bitSet.set(termDocs.doc());
            }
        } finally {
            try {
                if (termDocs != null)
                    termDocs.close();
            } catch (Exception ef) {
            }
        }
    }
    return bitSet;
}

From source file:com.esri.gpt.catalog.lucene.stats.GlobalFieldStats.java

License:Apache License

/**
 * Executes the collection of statistics.
 * @param request the active statistics request
 * @param reader the index reader/*from  w w w.j a  v a  2s  . c  o  m*/
 * @throws IOException if an error occurs while communicating with the index
 */
public void collectStats(StatsRequest request, IndexReader reader) throws IOException {
    long t1 = System.currentTimeMillis();
    TermEnum termEnum = null;
    TermDocs termDocs = null;
    try {
        OpenBitSet documentFilterBitSet = request.getDocumentFilterBitSet();
        boolean isUnfiltered = (documentFilterBitSet == null);

        // return if there are no stats to collect
        String[] fieldNames = request.getCollectableFieldNames(reader);
        if (this.determineNumberOfDocsConsidered(reader, documentFilterBitSet) <= 0) {
            return;
        } else if ((fieldNames == null) || (fieldNames.length == 0)) {
            return;
        }

        // accumulate field frequencies per document
        termDocs = reader.termDocs();
        for (String fieldName : fieldNames) {
            termEnum = reader.terms(new Term(fieldName));
            OpenBitSet docsWithFieldBitSet = new OpenBitSet(reader.maxDoc());
            do {
                Term term = termEnum.term();
                if (term != null && term.field().equals(fieldName)) {

                    termDocs.seek(term);
                    while (termDocs.next()) {
                        int docId = termDocs.doc();
                        boolean bSet = isUnfiltered || documentFilterBitSet.fastGet(docId);
                        if (bSet) {
                            docsWithFieldBitSet.fastSet(docId);
                        }
                    }

                } else {
                    break;
                }
            } while (termEnum.next());
            termEnum.close();
            termEnum = null;

            if (docsWithFieldBitSet.cardinality() > 0) {
                this.fieldAccumulator.add(fieldName, docsWithFieldBitSet.cardinality());
            }
        }

        // sort
        if (this.getSortByFrequency()) {
            this.fieldAccumulator.sortByFrequency();
        } else {
            this.fieldAccumulator.sortByName();
        }

    } finally {
        try {
            if (termEnum != null)
                termEnum.close();
        } catch (Exception ef) {
        }
        try {
            if (termDocs != null)
                termDocs.close();
        } catch (Exception ef) {
        }
        this.setTimeMillis(System.currentTimeMillis() - t1);
    }

    // print
    if (request.getResponseWriter() != null) {
        this.print(request);
    }
}