Example usage for org.apache.lucene.index IndexReader maxDoc

List of usage examples for org.apache.lucene.index IndexReader maxDoc

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader maxDoc.

Prototype

public abstract int maxDoc();

Source Link

Document

Returns one greater than the largest possible document number.

Usage

From source file:org.codelibs.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.java

License:Apache License

public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode,
        IndexReader reader, double nonErrorLikelihood, int numCandidates, Analyzer preFilter,
        Analyzer postFilter, Terms terms) throws IOException {
    if (terms == null) {
        throw new IllegalArgumentException("generator field [" + field + "] doesn't exist");
    }//from w w  w .  j a v a2 s.  c o m
    this.spellchecker = spellchecker;
    this.field = field;
    this.numCandidates = numCandidates;
    this.suggestMode = suggestMode;
    this.reader = reader;
    final long dictSize = terms.getSumTotalTermFreq();
    this.useTotalTermFrequency = dictSize != -1;
    this.dictSize = dictSize == -1 ? reader.maxDoc() : dictSize;
    this.preFilter = preFilter;
    this.postFilter = postFilter;
    this.nonErrorLikelihood = nonErrorLikelihood;
    float thresholdFrequency = spellchecker.getThresholdFrequency();
    this.frequencyPlateau = thresholdFrequency >= 1.0f ? (int) thresholdFrequency
            : (int) (dictSize * thresholdFrequency);
    termsEnum = terms.iterator();
}

From source file:org.codelibs.elasticsearch.search.suggest.phrase.WordScorer.java

License:Apache License

public WordScorer(IndexReader reader, Terms terms, String field, double realWordLikelyHood, BytesRef separator)
        throws IOException {
    this.field = field;
    if (terms == null) {
        throw new IllegalArgumentException("Field: [" + field + "] does not exist");
    }/*from   w ww.  j  a  va  2 s  . c  o  m*/
    this.terms = terms;
    final long vocSize = terms.getSumTotalTermFreq();
    this.vocabluarySize = vocSize == -1 ? reader.maxDoc() : vocSize;
    this.useTotalTermFreq = vocSize != -1;
    this.numTerms = terms.size();
    this.termsEnum = new FreqTermsEnum(reader, field, !useTotalTermFreq, useTotalTermFreq, null,
            BigArrays.NON_RECYCLING_INSTANCE); // non recycling for now
    this.reader = reader;
    this.realWordLikelyhood = realWordLikelyHood;
    this.separator = separator;
}

From source file:org.compass.core.lucene.engine.transaction.readcommitted.BitSetByAliasFilter.java

License:Apache License

public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    DocIdSet bitSet = deletedBitSets.get(reader);
    if (bitSet != null) {
        return bitSet;
    }//from w w w. j av  a 2s.  c  o m
    bitSet = allBitSets.get(reader);
    if (bitSet == null) {
        bitSet = new AllSetBitSet(reader.maxDoc());
        allBitSets.put(reader, bitSet);
    }
    return bitSet;
}

From source file:org.compass.core.lucene.support.ChainedFilter.java

License:Apache License

private OpenBitSetDISI initialResult(IndexReader reader, int logic, int[] index) throws IOException {
    OpenBitSetDISI result;/* w w  w.java 2s  .  com*/
    /**
     * First AND operation takes place against a completely false
     * bitset and will always return zero results.
     */
    if (logic == AND) {
        result = new OpenBitSetDISI(getDISI(chain[index[0]], reader), reader.maxDoc());
        ++index[0];
    } else if (logic == ANDNOT) {
        result = new OpenBitSetDISI(getDISI(chain[index[0]], reader), reader.maxDoc());
        result.flip(0, reader.maxDoc()); // NOTE: may set bits for deleted docs.
        ++index[0];
    } else {
        result = new OpenBitSetDISI(reader.maxDoc());
    }
    return result;
}

From source file:org.compass.core.lucene.support.ChainedFilter.java

License:Apache License

/**
 * Delegates to each filter in the chain.
 *
 * @param reader IndexReader/*from  w  ww . j  a v a2s.c o  m*/
 * @param logic  Logical operation
 * @return DocIdSet
 */
private DocIdSet getDocIdSet(IndexReader reader, int logic, int[] index) throws IOException {
    OpenBitSetDISI result = initialResult(reader, logic, index);
    for (; index[0] < chain.length; index[0]++) {
        doChain(result, logic, chain[index[0]].getDocIdSet(reader));
    }
    return finalResult(result, reader.maxDoc());
}

From source file:org.compass.core.lucene.support.ChainedFilter.java

License:Apache License

/**
 * Delegates to each filter in the chain.
 *
 * @param reader IndexReader/*from w w  w  .  j a  v  a 2s  .c  o  m*/
 * @param logic  Logical operation
 * @return DocIdSet
 */
private DocIdSet getDocIdSet(IndexReader reader, int[] logic, int[] index) throws IOException {
    if (logic.length != chain.length)
        throw new IllegalArgumentException("Invalid number of elements in logic array");

    OpenBitSetDISI result = initialResult(reader, logic[0], index);
    for (; index[0] < chain.length; index[0]++) {
        doChain(result, logic[index[0]], chain[index[0]].getDocIdSet(reader));
    }
    return finalResult(result, reader.maxDoc());
}

From source file:org.compass.core.lucene.util.ChainedFilter.java

License:Apache License

/**
 * Delegates to each filter in the chain.
 *
 * @param reader IndexReader/* www.  j a v  a 2  s  . c  o m*/
 * @param logic  Logical operation
 * @return BitSet
 */
private BitSet bits(IndexReader reader, ChainedFilterType logic) throws IOException {
    BitSet result;
    int i = 0;

    /**
     * First AND operation takes place against a completely false
     * bitset and will always return zero results. Thanks to
     * Daniel Armbrust for pointing this out and suggesting workaround.
     */
    if (logic == ChainedFilterType.AND) {
        result = (BitSet) chain[i].bits(reader).clone();
        ++i;
    } else {
        result = new BitSet(reader.maxDoc());
    }

    for (; i < chain.length; i++) {
        doChain(result, reader, logic, chain[i]);
    }
    return result;
}

From source file:org.compass.core.lucene.util.ChainedFilter.java

License:Apache License

/**
 * Delegates to each filter in the chain.
 *
 * @param reader IndexReader/*from  w ww  . j  a  v a2s.c o m*/
 * @param logic  Logical operation
 * @return BitSet
 */
private BitSet bits(IndexReader reader, ChainedFilterType[] logic) throws IOException {
    if (logic.length != chain.length)
        throw new IllegalArgumentException("Invalid number of elements in logic array");
    BitSet result;
    int i = 0;

    /**
     * First AND operation takes place against a completely false
     * bitset and will always return zero results. Thanks to
     * Daniel Armbrust for pointing this out and suggesting workaround.
     */
    if (logic[0] == ChainedFilterType.AND) {
        result = (BitSet) chain[i].bits(reader).clone();
        ++i;
    } else {
        result = new BitSet(reader.maxDoc());
    }

    for (; i < chain.length; i++) {
        doChain(result, reader, logic[i], chain[i]);
    }
    return result;
}

From source file:org.creativecommons.nutch.CCDeleteUnlicensedTool.java

License:Apache License

/** Delete pages without CC licenes. */
public int deleteUnlicensed() throws IOException {
    int deleteCount = 0;
    for (int index = 0; index < readers.length; index++) {
        IndexReader reader = readers[index];
        int readerMax = reader.maxDoc();
        for (int doc = 0; doc < readerMax; doc++) {
            if (!reader.isDeleted(doc)) {
                Document document = reader.document(doc);
                if (document.get(CCIndexingFilter.FIELD) == null) { // no CC fields
                    reader.deleteDocument(doc); // delete it
                    deleteCount++;/*from  www .  j a v a  2 s  . c om*/
                }
            }
        }
    }
    return deleteCount;
}

From source file:org.creativecommons.nutch.CCDeleteUnlicensedTool.java

License:Apache License

/** Delete duplicates in the indexes in the named directory. */
public static void main(String[] args) throws Exception {
    String usage = "CCDeleteUnlicensedTool <segmentsDir>";

    if (args.length != 1) {
        System.err.println("Usage: " + usage);
        return;/*from  w w w  .  j av  a 2s .co m*/
    }

    String segmentsDir = args[0];

    File[] directories = new File(segmentsDir).listFiles();
    Vector vReaders = new Vector();
    int maxDoc = 0;
    for (int i = 0; i < directories.length; i++) {
        File indexDone = new File(directories[i], Indexer.DONE_NAME);
        if (indexDone.exists() && indexDone.isFile()) {
            File indexDir = new File(directories[i], "index");
            IndexReader reader = IndexReader.open(indexDir);
            maxDoc += reader.maxDoc();
            vReaders.add(reader);
        }
    }

    IndexReader[] readers = new IndexReader[vReaders.size()];
    for (int i = 0; vReaders.size() > 0; i++) {
        readers[i] = (IndexReader) vReaders.remove(0);
    }

    CCDeleteUnlicensedTool dd = new CCDeleteUnlicensedTool(readers);
    int count = dd.deleteUnlicensed();
    if (LOG.isInfoEnabled()) {
        LOG.info("CC: deleted " + count + " out of " + maxDoc);
    }
    dd.close();
}