List of usage examples for org.apache.lucene.index IndexReader maxDoc
public abstract int maxDoc();
From source file:org.codelibs.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.java
License:Apache License
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates, Analyzer preFilter, Analyzer postFilter, Terms terms) throws IOException { if (terms == null) { throw new IllegalArgumentException("generator field [" + field + "] doesn't exist"); }//from w w w . j a v a2 s. c o m this.spellchecker = spellchecker; this.field = field; this.numCandidates = numCandidates; this.suggestMode = suggestMode; this.reader = reader; final long dictSize = terms.getSumTotalTermFreq(); this.useTotalTermFrequency = dictSize != -1; this.dictSize = dictSize == -1 ? reader.maxDoc() : dictSize; this.preFilter = preFilter; this.postFilter = postFilter; this.nonErrorLikelihood = nonErrorLikelihood; float thresholdFrequency = spellchecker.getThresholdFrequency(); this.frequencyPlateau = thresholdFrequency >= 1.0f ? (int) thresholdFrequency : (int) (dictSize * thresholdFrequency); termsEnum = terms.iterator(); }
From source file:org.codelibs.elasticsearch.search.suggest.phrase.WordScorer.java
License:Apache License
public WordScorer(IndexReader reader, Terms terms, String field, double realWordLikelyHood, BytesRef separator) throws IOException { this.field = field; if (terms == null) { throw new IllegalArgumentException("Field: [" + field + "] does not exist"); }/*from w ww. j a va 2 s . c o m*/ this.terms = terms; final long vocSize = terms.getSumTotalTermFreq(); this.vocabluarySize = vocSize == -1 ? reader.maxDoc() : vocSize; this.useTotalTermFreq = vocSize != -1; this.numTerms = terms.size(); this.termsEnum = new FreqTermsEnum(reader, field, !useTotalTermFreq, useTotalTermFreq, null, BigArrays.NON_RECYCLING_INSTANCE); // non recycling for now this.reader = reader; this.realWordLikelyhood = realWordLikelyHood; this.separator = separator; }
From source file:org.compass.core.lucene.engine.transaction.readcommitted.BitSetByAliasFilter.java
License:Apache License
public DocIdSet getDocIdSet(IndexReader reader) throws IOException { DocIdSet bitSet = deletedBitSets.get(reader); if (bitSet != null) { return bitSet; }//from w w w. j av a 2s. c o m bitSet = allBitSets.get(reader); if (bitSet == null) { bitSet = new AllSetBitSet(reader.maxDoc()); allBitSets.put(reader, bitSet); } return bitSet; }
From source file:org.compass.core.lucene.support.ChainedFilter.java
License:Apache License
private OpenBitSetDISI initialResult(IndexReader reader, int logic, int[] index) throws IOException { OpenBitSetDISI result;/* w w w.java 2s . com*/ /** * First AND operation takes place against a completely false * bitset and will always return zero results. */ if (logic == AND) { result = new OpenBitSetDISI(getDISI(chain[index[0]], reader), reader.maxDoc()); ++index[0]; } else if (logic == ANDNOT) { result = new OpenBitSetDISI(getDISI(chain[index[0]], reader), reader.maxDoc()); result.flip(0, reader.maxDoc()); // NOTE: may set bits for deleted docs. ++index[0]; } else { result = new OpenBitSetDISI(reader.maxDoc()); } return result; }
From source file:org.compass.core.lucene.support.ChainedFilter.java
License:Apache License
/** * Delegates to each filter in the chain. * * @param reader IndexReader/*from w ww . j a v a2s.c o m*/ * @param logic Logical operation * @return DocIdSet */ private DocIdSet getDocIdSet(IndexReader reader, int logic, int[] index) throws IOException { OpenBitSetDISI result = initialResult(reader, logic, index); for (; index[0] < chain.length; index[0]++) { doChain(result, logic, chain[index[0]].getDocIdSet(reader)); } return finalResult(result, reader.maxDoc()); }
From source file:org.compass.core.lucene.support.ChainedFilter.java
License:Apache License
/** * Delegates to each filter in the chain. * * @param reader IndexReader/*from w w w . j a v a 2s .c o m*/ * @param logic Logical operation * @return DocIdSet */ private DocIdSet getDocIdSet(IndexReader reader, int[] logic, int[] index) throws IOException { if (logic.length != chain.length) throw new IllegalArgumentException("Invalid number of elements in logic array"); OpenBitSetDISI result = initialResult(reader, logic[0], index); for (; index[0] < chain.length; index[0]++) { doChain(result, logic[index[0]], chain[index[0]].getDocIdSet(reader)); } return finalResult(result, reader.maxDoc()); }
From source file:org.compass.core.lucene.util.ChainedFilter.java
License:Apache License
/** * Delegates to each filter in the chain. * * @param reader IndexReader/* www. j a v a 2 s . c o m*/ * @param logic Logical operation * @return BitSet */ private BitSet bits(IndexReader reader, ChainedFilterType logic) throws IOException { BitSet result; int i = 0; /** * First AND operation takes place against a completely false * bitset and will always return zero results. Thanks to * Daniel Armbrust for pointing this out and suggesting workaround. */ if (logic == ChainedFilterType.AND) { result = (BitSet) chain[i].bits(reader).clone(); ++i; } else { result = new BitSet(reader.maxDoc()); } for (; i < chain.length; i++) { doChain(result, reader, logic, chain[i]); } return result; }
From source file:org.compass.core.lucene.util.ChainedFilter.java
License:Apache License
/** * Delegates to each filter in the chain. * * @param reader IndexReader/*from w ww . j a v a2s.c o m*/ * @param logic Logical operation * @return BitSet */ private BitSet bits(IndexReader reader, ChainedFilterType[] logic) throws IOException { if (logic.length != chain.length) throw new IllegalArgumentException("Invalid number of elements in logic array"); BitSet result; int i = 0; /** * First AND operation takes place against a completely false * bitset and will always return zero results. Thanks to * Daniel Armbrust for pointing this out and suggesting workaround. */ if (logic[0] == ChainedFilterType.AND) { result = (BitSet) chain[i].bits(reader).clone(); ++i; } else { result = new BitSet(reader.maxDoc()); } for (; i < chain.length; i++) { doChain(result, reader, logic[i], chain[i]); } return result; }
From source file:org.creativecommons.nutch.CCDeleteUnlicensedTool.java
License:Apache License
/** Delete pages without CC licenes. */ public int deleteUnlicensed() throws IOException { int deleteCount = 0; for (int index = 0; index < readers.length; index++) { IndexReader reader = readers[index]; int readerMax = reader.maxDoc(); for (int doc = 0; doc < readerMax; doc++) { if (!reader.isDeleted(doc)) { Document document = reader.document(doc); if (document.get(CCIndexingFilter.FIELD) == null) { // no CC fields reader.deleteDocument(doc); // delete it deleteCount++;/*from www . j a v a 2 s . c om*/ } } } } return deleteCount; }
From source file:org.creativecommons.nutch.CCDeleteUnlicensedTool.java
License:Apache License
/** Delete duplicates in the indexes in the named directory. */ public static void main(String[] args) throws Exception { String usage = "CCDeleteUnlicensedTool <segmentsDir>"; if (args.length != 1) { System.err.println("Usage: " + usage); return;/*from w w w . j av a 2s .co m*/ } String segmentsDir = args[0]; File[] directories = new File(segmentsDir).listFiles(); Vector vReaders = new Vector(); int maxDoc = 0; for (int i = 0; i < directories.length; i++) { File indexDone = new File(directories[i], Indexer.DONE_NAME); if (indexDone.exists() && indexDone.isFile()) { File indexDir = new File(directories[i], "index"); IndexReader reader = IndexReader.open(indexDir); maxDoc += reader.maxDoc(); vReaders.add(reader); } } IndexReader[] readers = new IndexReader[vReaders.size()]; for (int i = 0; vReaders.size() > 0; i++) { readers[i] = (IndexReader) vReaders.remove(0); } CCDeleteUnlicensedTool dd = new CCDeleteUnlicensedTool(readers); int count = dd.deleteUnlicensed(); if (LOG.isInfoEnabled()) { LOG.info("CC: deleted " + count + " out of " + maxDoc); } dd.close(); }