List of usage examples for org.apache.lucene.index IndexReader maxDoc
public abstract int maxDoc();
From source file:com.cis.MultiFilter.java
License:Open Source License
public BitSet bits(IndexReader reader) throws IOException { //Iterate through list of filters and apply the boolean AND operation //on each bitSet. The AND operator has the affect that only documents //that are allowed by every single filter in the filter list will be //allowed by this MultiFilter. int filterListSize = filterList.size(); if (filterListSize > 0) { BitSet bits = ((Filter) filterList.get(0)).bits(reader); for (int i = 1; i < filterListSize; i++) { bits.and(((Filter) filterList.get(i)).bits(reader)); }//from w ww.j a v a 2s.c o m return bits; } //There are no filters defined. In this case, we return a new //BitSet that will filter out all documents. This is probably the most //consistent behavior with the Lucene API. It's also a lot more //efficient considering the BitSet implementation. else { return new BitSet(reader.maxDoc()); } }
From source file:com.datasalt.pangool.solr.TestSolrOutputFormat.java
License:Apache License
@Test public void test() throws Exception { trash(OUTPUT);/*from w ww . j a v a 2 s . com*/ TupleSolrOutputFormatExample example = new TupleSolrOutputFormatExample(); example.run(INPUT, OUTPUT, getConf()); // Assert that indexes have been created assertTrue(new File(OUTPUT + "/part-00000/data/index").exists()); assertTrue(new File(OUTPUT + "/FR/part-00000/data/index").exists()); assertTrue(new File(OUTPUT + "/ES/part-00000/data/index").exists()); // Validate data inside index IndexReader r = IndexReader.open(FSDirectory.open(new File(OUTPUT + "/part-00000/data/index"))); assertEquals(2, r.maxDoc()); int contentAssertions = 0; Set<String> distinctMessages = new HashSet<String>(); for (int i = 0; i < 2; i++) { String document = r.document(i).toString(); distinctMessages.add(document); if (document.contains("user_id:user1")) { assertTrue(document.contains("Hi, this is a message from user1.")); contentAssertions++; } else if (document.contains("user_id:user4")) { assertTrue(document.contains("Hi, this is another message from user4.")); contentAssertions++; } } assertEquals(2, distinctMessages.size()); assertEquals(2, contentAssertions); r = IndexReader.open(FSDirectory.open(new File(OUTPUT + "/FR/part-00000/data/index"))); assertEquals(1, r.maxDoc()); String document = r.document(0).toString(); assertTrue(document.contains("user_id:user3")); assertTrue(document.contains("Oh la l!")); r = IndexReader.open(FSDirectory.open(new File(OUTPUT + "/ES/part-00000/data/index"))); assertEquals(1, r.maxDoc()); document = r.document(0).toString(); assertTrue(document.contains("user_id:user2")); assertTrue(document.contains("Yo no hablo ingls.")); document = r.document(0).toString(); trash(OUTPUT); }
From source file:com.doculibre.constellio.lucene.BaseLuceneIndexHelper.java
License:Open Source License
protected synchronized int getDocNum(T object) { int docNum;//from www . ja v a 2 s. com String uniqueIndexFieldName = getUniqueIndexFieldName(); String uniqueIndexFieldValue = getUniqueIndexFieldValue(object); if (uniqueIndexFieldValue != null) { String query = uniqueIndexFieldName + ":" + uniqueIndexFieldValue; try { Analyzer analyzer = analyzerProvider.getAnalyzer(Locale.FRENCH); QueryParser multiFielsQP = new QueryParser(Version.LUCENE_44, uniqueIndexFieldName, analyzer); Query luceneQuery = multiFielsQP.parse(query); Directory directory = FSDirectory.open(indexDir); IndexReader reader = DirectoryReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(reader); TopDocs topDocs = indexSearcher.search(luceneQuery, reader.maxDoc()); if (topDocs.totalHits > 0) { docNum = topDocs.scoreDocs[0].doc; } else { docNum = -1; } // indexSearcher.close(); // TODO add finally reader.close(); directory.close(); } catch (ParseException e) { throw new RuntimeException(e); } catch (CorruptIndexException e) { throw new RuntimeException(e); } catch (IOException e) { throw new RuntimeException(e); } } else { docNum = -1; } return docNum; }
From source file:com.downtree.tourbus.search.LocationFilter.java
@Override public BitSet bits(IndexReader reader) throws IOException { BitSet bits = new BitSet(reader.maxDoc()); TermDocs docs = reader.termDocs(new Term("ferret_class", m_type)); while (docs.next()) { Document doc = reader.document(docs.doc()); String value = doc.get("latitude"); if (value == null) continue; try {//from w w w . ja va 2 s . c o m double latitude = Double.parseDouble(value) * DEG2RAD; double longitude = Double.parseDouble(doc.get("longitude")) * DEG2RAD; double x = (Math.sin(latitude) * Math.sin(m_centerLat)) + (Math.cos(latitude) * Math.cos(m_centerLat) * Math.cos(longitude - m_centerLong)); double distance = 0; if (x > -1 && x < 1) { distance = Math.acos(x) * EARTH_RADIUS; } if (distance <= m_radius) { bits.set(docs.doc()); } } catch (Exception e) { SolrException.logOnce(SolrCore.log, "Error in location filter", e); continue; } } return bits; }
From source file:com.duroty.lucene.filter.ChainedFilter.java
License:Apache License
/** * Delegates to each filter in the chain. * @param reader IndexReader//from w w w. j a v a 2s.c o m * @param logic Logical operation * @return BitSet */ private BitSet bits(IndexReader reader, int logic) throws IOException { BitSet result; int i = 0; /** * First AND operation takes place against a completely false * bitset and will always return zero results. Thanks to * Daniel Armbrust for pointing this out and suggesting workaround. */ if (logic == AND) { result = (BitSet) chain[i].bits(reader).clone(); ++i; } else { result = new BitSet(reader.maxDoc()); } for (; i < chain.length; i++) { doChain(result, reader, logic, chain[i]); } return result; }
From source file:com.duroty.lucene.filter.ChainedFilter.java
License:Apache License
/** * Delegates to each filter in the chain. * @param reader IndexReader//w w w . j a v a 2 s. c o m * @param logic Logical operation * @return BitSet */ private BitSet bits(IndexReader reader, int[] logic) throws IOException { if (logic.length != chain.length) { throw new IllegalArgumentException("Invalid number of elements in logic array"); } BitSet result; int i = 0; /** * First AND operation takes place against a completely false * bitset and will always return zero results. Thanks to * Daniel Armbrust for pointing this out and suggesting workaround. */ if (logic[0] == AND) { result = (BitSet) chain[i].bits(reader).clone(); ++i; } else { result = new BitSet(reader.maxDoc()); } for (; i < chain.length; i++) { doChain(result, reader, logic[i], chain[i]); } return result; }
From source file:com.esri.gpt.catalog.lucene.AclFilter.java
License:Apache License
/** * Queries for documents that have no values associated with the field. * @param reader the index reader//ww w. ja va 2 s. c om * @return the OpenBitSet (documents with no values set to true) * @throws IOException if an exception is encountered while reading the index */ private OpenBitSet queryNulls(IndexReader reader, String field) throws IOException { int nBits = reader.maxDoc(); OpenBitSet bitSet = new OpenBitSet(nBits); TermEnum termEnum = null; TermDocs termDocs = null; if ((field != null) && (field.trim().length() > 0)) { try { // find all documents that have a term for the field, then flip the bit set termEnum = reader.terms(new Term(field)); termDocs = reader.termDocs(); do { Term term = termEnum.term(); if ((term != null) && term.field().equals(field)) { termDocs.seek(term); while (termDocs.next()) { bitSet.fastSet(termDocs.doc()); } } } while (termEnum.next()); bitSet.flip(0, nBits); if (reader.hasDeletions()) { for (int i = 0; i < nBits; i++) { if (bitSet.get(i) && reader.isDeleted(i)) { bitSet.fastFlip(i); } } } } finally { try { if (termEnum != null) termEnum.close(); } catch (Exception ef) { } try { if (termDocs != null) termDocs.close(); } catch (Exception ef) { } } } return bitSet; }
From source file:com.esri.gpt.catalog.lucene.AclFilter.java
License:Apache License
/** * Queries for documents that match one or more of the supplied values. * @param reader the index reader//from w w w. jav a 2 s. c om * @return the OpenBitSet (documents with matches are set to true) * @throws IOException if an exception is encountered while reading the index */ private OpenBitSet queryValues(IndexReader reader, String field, String[] values) throws IOException { OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); if ((values != null) && (values.length > 0)) { TermDocs termDocs = null; try { Term baseTerm = new Term(field); termDocs = reader.termDocs(); for (String value : values) { termDocs.seek(baseTerm.createTerm(value.trim().toLowerCase())); while (termDocs.next()) { bitSet.set(termDocs.doc()); } } } finally { try { if (termDocs != null) termDocs.close(); } catch (Exception ef) { } } } return bitSet; }
From source file:com.esri.gpt.catalog.lucene.SchemaFilter.java
License:Apache License
/** * Queries for documents that match the supplied value. * @param reader the index reader//from w w w. ja va 2 s. co m * @return the OpenBitSet (documents with matches are set to true) * @throws IOException if an exception is encountered while reading the index */ private OpenBitSet queryValue(IndexReader reader, String field, String value) throws IOException { OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); if ((value != null) && (value.length() > 0)) { TermDocs termDocs = null; try { Term term = new Term(field, value); termDocs = reader.termDocs(); termDocs.seek(term); while (termDocs.next()) { bitSet.set(termDocs.doc()); } } finally { try { if (termDocs != null) termDocs.close(); } catch (Exception ef) { } } } return bitSet; }
From source file:com.esri.gpt.catalog.lucene.stats.GlobalFieldStats.java
License:Apache License
/** * Executes the collection of statistics. * @param request the active statistics request * @param reader the index reader/*from w w w.j a v a 2s . c o m*/ * @throws IOException if an error occurs while communicating with the index */ public void collectStats(StatsRequest request, IndexReader reader) throws IOException { long t1 = System.currentTimeMillis(); TermEnum termEnum = null; TermDocs termDocs = null; try { OpenBitSet documentFilterBitSet = request.getDocumentFilterBitSet(); boolean isUnfiltered = (documentFilterBitSet == null); // return if there are no stats to collect String[] fieldNames = request.getCollectableFieldNames(reader); if (this.determineNumberOfDocsConsidered(reader, documentFilterBitSet) <= 0) { return; } else if ((fieldNames == null) || (fieldNames.length == 0)) { return; } // accumulate field frequencies per document termDocs = reader.termDocs(); for (String fieldName : fieldNames) { termEnum = reader.terms(new Term(fieldName)); OpenBitSet docsWithFieldBitSet = new OpenBitSet(reader.maxDoc()); do { Term term = termEnum.term(); if (term != null && term.field().equals(fieldName)) { termDocs.seek(term); while (termDocs.next()) { int docId = termDocs.doc(); boolean bSet = isUnfiltered || documentFilterBitSet.fastGet(docId); if (bSet) { docsWithFieldBitSet.fastSet(docId); } } } else { break; } } while (termEnum.next()); termEnum.close(); termEnum = null; if (docsWithFieldBitSet.cardinality() > 0) { this.fieldAccumulator.add(fieldName, docsWithFieldBitSet.cardinality()); } } // sort if (this.getSortByFrequency()) { this.fieldAccumulator.sortByFrequency(); } else { this.fieldAccumulator.sortByName(); } } finally { try { if (termEnum != null) termEnum.close(); } catch (Exception ef) { } try { if (termDocs != null) termDocs.close(); } catch (Exception ef) { } this.setTimeMillis(System.currentTimeMillis() - t1); } // print if (request.getResponseWriter() != null) { this.print(request); } }