Example usage for org.apache.lucene.index IndexReader maxDoc

List of usage examples for org.apache.lucene.index IndexReader maxDoc

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader maxDoc.

Prototype

public abstract int maxDoc();

Source Link

Document

Returns one greater than the largest possible document number.

Usage

From source file:com.tamingtext.tagging.LuceneCategoryExtractor.java

License:Apache License

/** dump the values stored in the specified field for each document.
 * /*from   w w  w  . ja  va 2s .co m*/
 *  <pre>term(tab)document_frequency</pre>
 *  
 * @param indexDir the index to read.
 * @param field the name of the field.
 * @param out the print writer output will be written to
 * @throws IOException
 */
public static void dumpDocumentFields(File indexDir, String field, long maxDocs, PrintWriter out)
        throws IOException {
    Directory dir = FSDirectory.open(indexDir);
    IndexReader reader = IndexReader.open(dir, true);
    int max = reader.maxDoc();
    for (int i = 0; i < max; i++) {
        if (!reader.isDeleted(i)) {
            Document d = reader.document(i);
            for (Field f : d.getFields(field)) {
                if (f.isStored() && !f.isBinary()) {
                    String value = f.stringValue();
                    if (value != null) {
                        out.printf("%s\n", value);
                    }
                }
            }
        }
    }
}

From source file:com.tamingtext.tagging.LuceneTagExtractor.java

License:Apache License

public static void dumpDocs(File indexDir, PrintWriter out, long maxDocs) throws IOException {
    Directory dir = FSDirectory.open(indexDir);
    IndexReader reader = IndexReader.open(dir, true);
    int max = reader.maxDoc();

    StringBuilder buf = new StringBuilder();

    for (int i = 0; i < max; i++) {
        if (!reader.isDeleted(i)) {
            buf.setLength(0);/*w  w w  . j av  a2 s  .  c  o m*/
            appendVectorTerms(buf, reader.getTermFreqVector(i, "description-clustering"));
            appendVectorTerms(buf, reader.getTermFreqVector(i, "extended-clustering"));
            emitTermsForTags(out, buf, reader, reader.getTermFreqVector(i, "tag"));
        }
    }
}

From source file:com.Yasna.forum.database.FieldFilter.java

License:Open Source License

public BitSet bits(IndexReader reader) throws IOException {
    //Create a new BitSet with a capacity equal to the size of the index.
    BitSet bits = new BitSet(reader.maxDoc());
    //Get an enumeration of all the documents that match the specified field
    //value./*from   ww  w.  jav a 2s.c  om*/
    TermDocs matchingDocs = reader.termDocs(searchTerm);
    try {
        while (matchingDocs.next()) {
            bits.set(matchingDocs.doc());
        }
    } finally {
        if (matchingDocs != null) {
            matchingDocs.close();
        }
    }
    return bits;
}

From source file:com.zimbra.cs.index.RawIndexEditor.java

License:Open Source License

void dumpAll() throws IOException {
    IndexReader reader = IndexReader.open(luceneDirectory);
    try {/*from www  . jav  a  2s .c om*/
        int maxDoc = reader.maxDoc();
        System.out.println("There are " + maxDoc + " documents in this index.");

        for (int i = 0; i < maxDoc; i++) {
            dumpDocument(reader.document(i), reader.isDeleted(i));
        }
    } finally {
        reader.close();
    }
}

From source file:com.zimbra.cs.index.TermsFilter.java

License:Open Source License

/** (non-Javadoc)
 * @see org.apache.lucene.search.Filter#getDocIdSet(org.apache.lucene.index.IndexReader)
 *//*from ww  w.  ja va 2s.com*/
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    OpenBitSet result = new OpenBitSet(reader.maxDoc());
    TermDocs td = reader.termDocs();
    try {
        for (Iterator<Term> iter = terms.iterator(); iter.hasNext();) {
            Term term = iter.next();
            td.seek(term);
            while (td.next()) {
                result.set(td.doc());
            }
        }
    } finally {
        Closeables.closeQuietly(td);
    }
    return result;
}

From source file:com.zimbra.cs.rmgmt.RemoteMailQueue.java

License:Open Source License

private void list0(SearchResult result, IndexReader indexReader, int offset, int limit) throws IOException {
    if (ZimbraLog.rmgmt.isDebugEnabled()) {
        ZimbraLog.rmgmt.debug("listing offset=" + offset + " limit=" + limit + " " + this);
    }//from   w ww .java2 s  .co  m
    int max = indexReader.maxDoc();

    int skip = 0;
    int listed = 0;

    for (int i = 0; i < max; i++) {
        if (indexReader.isDeleted(i)) {
            continue;
        }

        if (skip < offset) {
            skip++;
            continue;
        }

        Document doc = indexReader.document(i);
        Map<QueueAttr, String> qitem = docToQueueItem(doc);
        result.qitems.add(qitem);

        listed++;
        if (listed == limit) {
            break;
        }
    }
    result.hits = getNumMessages();
}

From source file:db.infiniti.config.HighFreqTerms.java

License:Apache License

private String getTheBestMatchingTerm(IndexReader indexReader, TermStats[] terms, ArrayList<String> sentQueries,
        ArrayList<String> initialQuery) {
    String specificFreqTerm = "";
    if (sentQueries.size() == 0 && terms.length > 0) {
        String temp = terms[0].term + "";
        temp = temp.substring(temp.indexOf(":") + 1, temp.length());
        return temp;
    }/*from  w ww  .ja  v  a 2s. co m*/

    HashMap<String, Integer> averageOfEach = new HashMap<String, Integer>();
    for (int i = 0; i < terms.length; i++) {
        String temp = terms[i].term + "";
        String queryATerm = temp.substring(temp.indexOf(":") + 1, temp.length());
        try {
            // indexReader = IndexReader.open(indexDirectory);
            IndexSearcher indexSearcher = new IndexSearcher(indexReader);
            // int n = w.numDocs();
            BooleanQuery bq = new BooleanQuery();
            int totalNumberOfResults = 0;
            // double average = 0;
            for (String queryB : sentQueries) {
                Term term1 = new Term("text", queryATerm);
                TermQuery query1 = new TermQuery(term1);
                Term term2 = new Term("text", queryB);
                TermQuery query2 = new TermQuery(term2);

                bq.add(query1, BooleanClause.Occur.MUST);
                bq.add(query2, BooleanClause.Occur.MUST);
                int numberOfResults = 0;
                HitCollector results;
                int n = indexReader.maxDoc();
                TopDocs hits = indexSearcher.search(bq, n);

                /*   if (hits.totalHits > 0 ){
                      System.out.println();
                   }*/
                totalNumberOfResults = totalNumberOfResults + hits.totalHits;
            }
            averageOfEach.put(queryATerm, totalNumberOfResults);
            // average = (double) totalNumberOfResults / (double)
            // sentQueries.size();

        } catch (CorruptIndexException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    Set<Entry<String, Integer>> set = averageOfEach.entrySet();
    List<Entry<String, Integer>> list = new ArrayList<Entry<String, Integer>>(set);
    Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() {
        public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {
            return (o2.getValue()).compareTo(o1.getValue());
        }
    });
    System.out.println("sorted by relevance to previous submitted queries: ");
    for (Map.Entry<String, Integer> entry : list) {
        System.out.println(entry.getKey() + " ==== " + entry.getValue());
    }
    int i = list.size() - 1;
    while (i >= 0) {
        specificFreqTerm = list.get(i).getKey();
        if (!initialQuery.contains(specificFreqTerm)) {
            return specificFreqTerm;
        } else {
            i--;
        }
    }

    return specificFreqTerm;
}

From source file:de.ks.flatadocdb.index.GlobalIndexTest.java

License:Apache License

@Test
public void testRecreateLuceneIndex() throws Exception {
    IndexReader indexReader = luceneIndex.getIndexReader();
    assertEquals(0, indexReader.maxDoc());

    luceneIndex.recreate();//  ww  w .  j a va2 s  . co  m

    indexReader = luceneIndex.getIndexReader();
    assertEquals(COUNT, indexReader.maxDoc());
}

From source file:de.mirkosertic.desktopsearch.LuceneIndexHandler.java

License:Open Source License

public void cleanupDeadContent() throws IOException {
    searcherManager.maybeRefreshBlocking();
    IndexSearcher theSearcher = searcherManager.acquire();

    try {//from   w  w  w.j a  va 2  s. c o m
        IndexReader theReader = theSearcher.getIndexReader();
        for (int i = 0; i < theReader.maxDoc(); i++) {
            Document theDocument = theReader.document(i);
            File theFile = new File(theDocument.getField(IndexFields.FILENAME).stringValue());
            if (!theFile.exists()) {
                LOGGER.info("Removing file " + theFile + " from index as it does not exist anymore.");
                String theUniqueID = theDocument.getField(IndexFields.UNIQUEID).stringValue();
                indexWriter.deleteDocuments(new Term(IndexFields.UNIQUEID, theUniqueID));
            }
        }
    } finally {
        searcherManager.release(theSearcher);
    }
}

From source file:de.tudarmstadt.ukp.teaching.uima.nounDecompounding.ranking.TotalFreqAmout.java

License:Open Source License

/**
 * Adds all frequency values for a special directory
 * @return/*  www.ja  va  2s  . c o m*/
 * @throws IOException
 */
protected BigInteger countFreq(FSDirectory dir) throws IOException {
    BigInteger count = BigInteger.valueOf(0);

    IndexReader reader = IndexReader.open(dir);
    for (int i = 0; i < reader.maxDoc(); i++) {
        if (reader.isDeleted(i)) {
            continue;
        }

        Document doc = reader.document(i);
        count = count.add(new BigInteger(doc.get("freq")));
    }

    return count;
}