List of usage examples for org.apache.lucene.index IndexReader maxDoc
public abstract int maxDoc();
From source file:com.tamingtext.tagging.LuceneCategoryExtractor.java
License:Apache License
/** dump the values stored in the specified field for each document. * /*from w w w . ja va 2s .co m*/ * <pre>term(tab)document_frequency</pre> * * @param indexDir the index to read. * @param field the name of the field. * @param out the print writer output will be written to * @throws IOException */ public static void dumpDocumentFields(File indexDir, String field, long maxDocs, PrintWriter out) throws IOException { Directory dir = FSDirectory.open(indexDir); IndexReader reader = IndexReader.open(dir, true); int max = reader.maxDoc(); for (int i = 0; i < max; i++) { if (!reader.isDeleted(i)) { Document d = reader.document(i); for (Field f : d.getFields(field)) { if (f.isStored() && !f.isBinary()) { String value = f.stringValue(); if (value != null) { out.printf("%s\n", value); } } } } } }
From source file:com.tamingtext.tagging.LuceneTagExtractor.java
License:Apache License
public static void dumpDocs(File indexDir, PrintWriter out, long maxDocs) throws IOException { Directory dir = FSDirectory.open(indexDir); IndexReader reader = IndexReader.open(dir, true); int max = reader.maxDoc(); StringBuilder buf = new StringBuilder(); for (int i = 0; i < max; i++) { if (!reader.isDeleted(i)) { buf.setLength(0);/*w w w . j av a2 s . c o m*/ appendVectorTerms(buf, reader.getTermFreqVector(i, "description-clustering")); appendVectorTerms(buf, reader.getTermFreqVector(i, "extended-clustering")); emitTermsForTags(out, buf, reader, reader.getTermFreqVector(i, "tag")); } } }
From source file:com.Yasna.forum.database.FieldFilter.java
License:Open Source License
public BitSet bits(IndexReader reader) throws IOException { //Create a new BitSet with a capacity equal to the size of the index. BitSet bits = new BitSet(reader.maxDoc()); //Get an enumeration of all the documents that match the specified field //value./*from ww w. jav a 2s.c om*/ TermDocs matchingDocs = reader.termDocs(searchTerm); try { while (matchingDocs.next()) { bits.set(matchingDocs.doc()); } } finally { if (matchingDocs != null) { matchingDocs.close(); } } return bits; }
From source file:com.zimbra.cs.index.RawIndexEditor.java
License:Open Source License
void dumpAll() throws IOException { IndexReader reader = IndexReader.open(luceneDirectory); try {/*from www . jav a 2s .c om*/ int maxDoc = reader.maxDoc(); System.out.println("There are " + maxDoc + " documents in this index."); for (int i = 0; i < maxDoc; i++) { dumpDocument(reader.document(i), reader.isDeleted(i)); } } finally { reader.close(); } }
From source file:com.zimbra.cs.index.TermsFilter.java
License:Open Source License
/** (non-Javadoc) * @see org.apache.lucene.search.Filter#getDocIdSet(org.apache.lucene.index.IndexReader) *//*from ww w. ja va 2s.com*/ @Override public DocIdSet getDocIdSet(IndexReader reader) throws IOException { OpenBitSet result = new OpenBitSet(reader.maxDoc()); TermDocs td = reader.termDocs(); try { for (Iterator<Term> iter = terms.iterator(); iter.hasNext();) { Term term = iter.next(); td.seek(term); while (td.next()) { result.set(td.doc()); } } } finally { Closeables.closeQuietly(td); } return result; }
From source file:com.zimbra.cs.rmgmt.RemoteMailQueue.java
License:Open Source License
private void list0(SearchResult result, IndexReader indexReader, int offset, int limit) throws IOException { if (ZimbraLog.rmgmt.isDebugEnabled()) { ZimbraLog.rmgmt.debug("listing offset=" + offset + " limit=" + limit + " " + this); }//from w ww .java2 s .co m int max = indexReader.maxDoc(); int skip = 0; int listed = 0; for (int i = 0; i < max; i++) { if (indexReader.isDeleted(i)) { continue; } if (skip < offset) { skip++; continue; } Document doc = indexReader.document(i); Map<QueueAttr, String> qitem = docToQueueItem(doc); result.qitems.add(qitem); listed++; if (listed == limit) { break; } } result.hits = getNumMessages(); }
From source file:db.infiniti.config.HighFreqTerms.java
License:Apache License
private String getTheBestMatchingTerm(IndexReader indexReader, TermStats[] terms, ArrayList<String> sentQueries, ArrayList<String> initialQuery) { String specificFreqTerm = ""; if (sentQueries.size() == 0 && terms.length > 0) { String temp = terms[0].term + ""; temp = temp.substring(temp.indexOf(":") + 1, temp.length()); return temp; }/*from w ww .ja v a 2s. co m*/ HashMap<String, Integer> averageOfEach = new HashMap<String, Integer>(); for (int i = 0; i < terms.length; i++) { String temp = terms[i].term + ""; String queryATerm = temp.substring(temp.indexOf(":") + 1, temp.length()); try { // indexReader = IndexReader.open(indexDirectory); IndexSearcher indexSearcher = new IndexSearcher(indexReader); // int n = w.numDocs(); BooleanQuery bq = new BooleanQuery(); int totalNumberOfResults = 0; // double average = 0; for (String queryB : sentQueries) { Term term1 = new Term("text", queryATerm); TermQuery query1 = new TermQuery(term1); Term term2 = new Term("text", queryB); TermQuery query2 = new TermQuery(term2); bq.add(query1, BooleanClause.Occur.MUST); bq.add(query2, BooleanClause.Occur.MUST); int numberOfResults = 0; HitCollector results; int n = indexReader.maxDoc(); TopDocs hits = indexSearcher.search(bq, n); /* if (hits.totalHits > 0 ){ System.out.println(); }*/ totalNumberOfResults = totalNumberOfResults + hits.totalHits; } averageOfEach.put(queryATerm, totalNumberOfResults); // average = (double) totalNumberOfResults / (double) // sentQueries.size(); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } Set<Entry<String, Integer>> set = averageOfEach.entrySet(); List<Entry<String, Integer>> list = new ArrayList<Entry<String, Integer>>(set); Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() { public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) { return (o2.getValue()).compareTo(o1.getValue()); } }); System.out.println("sorted by relevance to previous submitted queries: "); for (Map.Entry<String, Integer> entry : list) { System.out.println(entry.getKey() + " ==== " + entry.getValue()); } int i = list.size() - 1; while (i >= 0) { specificFreqTerm = list.get(i).getKey(); if (!initialQuery.contains(specificFreqTerm)) { return specificFreqTerm; } else { i--; } } return specificFreqTerm; }
From source file:de.ks.flatadocdb.index.GlobalIndexTest.java
License:Apache License
@Test public void testRecreateLuceneIndex() throws Exception { IndexReader indexReader = luceneIndex.getIndexReader(); assertEquals(0, indexReader.maxDoc()); luceneIndex.recreate();// ww w . j a va2 s . co m indexReader = luceneIndex.getIndexReader(); assertEquals(COUNT, indexReader.maxDoc()); }
From source file:de.mirkosertic.desktopsearch.LuceneIndexHandler.java
License:Open Source License
public void cleanupDeadContent() throws IOException { searcherManager.maybeRefreshBlocking(); IndexSearcher theSearcher = searcherManager.acquire(); try {//from w w w.j a va 2 s. c o m IndexReader theReader = theSearcher.getIndexReader(); for (int i = 0; i < theReader.maxDoc(); i++) { Document theDocument = theReader.document(i); File theFile = new File(theDocument.getField(IndexFields.FILENAME).stringValue()); if (!theFile.exists()) { LOGGER.info("Removing file " + theFile + " from index as it does not exist anymore."); String theUniqueID = theDocument.getField(IndexFields.UNIQUEID).stringValue(); indexWriter.deleteDocuments(new Term(IndexFields.UNIQUEID, theUniqueID)); } } } finally { searcherManager.release(theSearcher); } }
From source file:de.tudarmstadt.ukp.teaching.uima.nounDecompounding.ranking.TotalFreqAmout.java
License:Open Source License
/** * Adds all frequency values for a special directory * @return/* www.ja va 2s . c o m*/ * @throws IOException */ protected BigInteger countFreq(FSDirectory dir) throws IOException { BigInteger count = BigInteger.valueOf(0); IndexReader reader = IndexReader.open(dir); for (int i = 0; i < reader.maxDoc(); i++) { if (reader.isDeleted(i)) { continue; } Document doc = reader.document(i); count = count.add(new BigInteger(doc.get("freq"))); } return count; }