Example usage for org.apache.lucene.index IndexReader numDocs

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader numDocs.

Prototype

public abstract int numDocs();

Source Link

Document

Returns the number of documents in this index.

Usage

From source file:com.searchcode.app.service.CodeSearcher.java

License:Open Source License

/**
 * Returns the total number of documents that are present in the index at this time
 *//* w w w .j  a  v  a2 s  .  co  m*/
public int getTotalNumberDocumentsIndexed() {
    int numDocs = 0;
    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));

        numDocs = reader.numDocs();
        reader.close();
    } catch (Exception ex) {
        LOGGER.info(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage());
    }

    return numDocs;
}

From source file:com.searchcode.app.service.IndexService.java

License:Open Source License

/**
 * Returns the total number of documents that are present in the index at this time
 *///from  www  .  ja  v a2 s.  c o  m
public int getTotalNumberDocumentsIndexed() {
    int numDocs = 0;
    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(this.INDEX_LOCATION));
        numDocs = reader.numDocs();
        reader.close();
    } catch (IOException ex) {
        this.logger.info(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage());
    }

    return numDocs;
}

From source file:com.stimulus.archiva.search.StandardSearch.java

License:Open Source License

public long getTotalMessageCount(Volume volume) throws MessageSearchException {
    if (volume == null)
        throw new MessageSearchException("assertion failure: null volume", logger);
    //logger.debug("get total no emails {indexpath='"+volume.getIndexPath()+"'}");
    int count = 0;
    File indexDir = new File(volume.getIndexPath());
    if (!indexDir.exists())
        return 0;
    IndexReader indexReader = null;
    try {//from w  ww  .  j a  v  a2s  .co m
        indexReader = IndexReader.open(indexDir);
        count += indexReader.numDocs();
        indexReader.close();
    } catch (IOException e) {
        logger.debug("failed to open index to calculate total email count", e);
        //throw new MessageSearchException("failed to open index to calculate total email count",e,logger);
    }
    return count;
}

From source file:com.sun.javaee.blueprints.carstore.search.UpdateIndex.java

License:Berkeley License

public static Document deleteIndex(String indexFile, String sxDocId) throws IOException {

    // get document to update, so data can be added
    SearchIndex si = new SearchIndex();
    si.query(indexFile, sxDocId, "uid");

    Hits hits = si.getHitsNative();/*from   w  w w.  ja va2s.co m*/
    // should only have one return
    if (hits.length() > 1) {
        // exception, should only be one
        throw new IllegalStateException("Should only have one document in index with uid=" + sxDocId);
    }

    Document doc = (Document) hits.doc(0);
    if (bDebug)
        System.out.println("HAVE DOC " + doc);

    // Read index and delete targeted doc through a term
    IndexReader reader = IndexReader.open(indexFile);
    // delete document by term
    int delcnt = reader.deleteDocuments(new Term("uid", sxDocId));
    if (bDebug) {
        System.out.println("return Number of items deleted:" + delcnt);
        int deleted = 0;
        for (int ii = 0; ii < reader.numDocs(); ii++) {
            if (reader.isDeleted(ii)) {
                deleted++;
            }
        }
        if (bDebug)
            System.out.println("Number of deleted items in the whole index:" + deleted);
    }
    reader.close();
    return doc;
}

From source file:com.sxc.lucene.index.IndexingTest.java

License:Apache License

public void testIndexReader() throws IOException {
    IndexReader reader = DirectoryReader.open(directory);
    assertEquals(ids.length, reader.maxDoc()); // 8
    assertEquals(ids.length, reader.numDocs()); // 8
    reader.close();/*from   w  w w .j a va 2  s .co  m*/
}

From source file:CopulaResources.TermCooccurence.java

public static TermCooccurence generateCooccurencebyClass(IndexReader indexReader, String classFieldName,
        String textFieldName, Analyzer analyzer, int minFreq, int maxFreq) throws IOException {
    System.out.println(":::Generating Term-Pair List:::");
    TermCooccurence CooccurList = new TermCooccurence();
    Terms classes = MultiFields.getTerms(indexReader, classFieldName);
    if (classes != null) {
        TermsEnum classesEnum = classes.iterator();
        BytesRef nextClass;/*from ww  w . j ava2 s  . co  m*/
        while ((nextClass = classesEnum.next()) != null) {
            if (nextClass.length > 0) {

                Term term = new Term(classFieldName, nextClass);
                String tpClass = nextClass.utf8ToString();
                BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
                booleanQuery.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.MUST));
                IndexSearcher indexSearcher = new IndexSearcher(indexReader);
                TopDocs topDocs;
                topDocs = indexSearcher.search(booleanQuery.build(), indexReader.numDocs());

                for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
                    IndexableField[] storableFields = indexSearcher.doc(scoreDoc.doc).getFields(textFieldName);
                    for (IndexableField singleStorableField : storableFields) {
                        if (singleStorableField != null) {
                            BytesRef text = new BytesRef(singleStorableField.stringValue());
                            generateCooccurences(text.utf8ToString(), analyzer, CooccurList, tpClass);
                        }
                    }
                }
                CooccurList.trimbyFreq(tpClass, minFreq, maxFreq);

            }
        }
    }
    System.out.println(":::Generation Complete:::");
    return CooccurList;
}

From source file:CopulaResources.TermCooccurence.java

public static void generateCooccurencebyClass(IndexReader indexReader, String classFieldName,
        String textFieldName, Analyzer analyzer, int minFreq, int maxFreq, Path saveDir) throws IOException {
    System.out.println(":::Generating Term-Pair List:::");
    TermCooccurence CooccurList = new TermCooccurence();
    Terms classes = MultiFields.getTerms(indexReader, classFieldName);
    if (classes != null) {
        TermsEnum classesEnum = classes.iterator();
        BytesRef nextClass;//from  ww w .  j  ava 2s  .c  o  m
        while ((nextClass = classesEnum.next()) != null) {
            if (nextClass.length > 0) {

                Term term = new Term(classFieldName, nextClass);
                String tpClass = nextClass.utf8ToString();
                BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
                booleanQuery.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.MUST));
                IndexSearcher indexSearcher = new IndexSearcher(indexReader);
                TopDocs topDocs;
                topDocs = indexSearcher.search(booleanQuery.build(), indexReader.numDocs());

                for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
                    IndexableField[] storableFields = indexSearcher.doc(scoreDoc.doc).getFields(textFieldName);
                    for (IndexableField singleStorableField : storableFields) {
                        if (singleStorableField != null) {
                            BytesRef text = new BytesRef(singleStorableField.stringValue());
                            generateCooccurences(text.utf8ToString(), analyzer, CooccurList, tpClass);
                        }
                    }
                }
                CooccurList.trimbyFreq(tpClass, minFreq, maxFreq);

            }
        }
    }
    CooccurList.savetoFile(saveDir);
    System.out.println(":::Generation Complete:::");
}

From source file:CopulaResources.TermCooccurence.java

public static void generateNCooccurencebyClass(IndexReader indexReader, String classFieldName,
        String textFieldName, Analyzer analyzer, String direction, double percent, Path saveDir)
        throws IOException {
    System.out.println(":::Generating Term-Pair List:::");
    TermCooccurence CooccurList = new TermCooccurence();
    Terms classes = MultiFields.getTerms(indexReader, classFieldName);
    if (classes != null) {
        TermsEnum classesEnum = classes.iterator();
        BytesRef nextClass;//from ww w  .java2 s.c o m
        while ((nextClass = classesEnum.next()) != null) {
            if (nextClass.length > 0) {

                Term term = new Term(classFieldName, nextClass);
                String tpClass = nextClass.utf8ToString();
                BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
                booleanQuery.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.MUST));
                IndexSearcher indexSearcher = new IndexSearcher(indexReader);
                TopDocs topDocs;
                topDocs = indexSearcher.search(booleanQuery.build(), indexReader.numDocs());

                for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
                    IndexableField[] storableFields = indexSearcher.doc(scoreDoc.doc).getFields(textFieldName);
                    for (IndexableField singleStorableField : storableFields) {
                        if (singleStorableField != null) {
                            BytesRef text = new BytesRef(singleStorableField.stringValue());
                            generateCooccurences(text.utf8ToString(), analyzer, CooccurList, tpClass);
                        }
                    }
                }
                CooccurList.trimbyPercent(tpClass, direction, percent);

            }
        }
    }
    CooccurList.savetoFile(saveDir);
    System.out.println(":::Generation Complete:::");
}

From source file:de.linguatools.disco.Compositionality.java

License:Apache License

/**
 * Find the most similar words in the DISCO word space for an input word 
 * vector. While the word vector can represent a multi-token word (if it was
 * produced by one of the methods // w  w w. ja va  2  s.co m
 * <code>Compositionality.composeWordVectors()</code>) the most
 * similar words will only be single-token words from the index.<br/>
 * <b>Warning</b>: This method is very time consuming and should only be
 * used with word spaces that have been loaded into memory!
 * @param wordvector input word vector
 * @param disco DISCO word space
 * @param simMeasure
 * @return List of all words (with their similarity values) whose similarity
 * with the <code>wordvector</code> is greater than zero, ordered by 
 * similarity value (highest value first).
 * @throws java.io.IOException
 */
public ArrayList<ReturnDataCol> similarWords(HashMap<String, Float> wordvector, DISCO disco,
        SimilarityMeasures simMeasure) throws IOException {

    // hole einen IndexReader fuer das indexDir
    IndexReader ir = disco.getIndexReader();

    // durchlaufe alle Dokumente
    ArrayList<ReturnDataCol> result = new ArrayList();
    for (int i = 0; i < ir.numDocs(); i++) {
        Document doc = null;
        try {
            doc = ir.document(i);
        } catch (CorruptIndexException ex) {
            continue;
        } catch (IOException ex) {
            continue;
        }
        // Wortvektor zu Wort Nr. i holen
        String word = doc.get("word");
        HashMap<String, Float> wv = getWordvector(word, disco);
        // hnlichkeit zwischen Wortvektoren berechnen
        float sim = semanticSimilarity(wordvector, wv, simMeasure);
        if (sim > 0.0F) {
            ReturnDataCol r = new ReturnDataCol(word, sim);
            result.add(r);
        }
    }

    // nach hchstem hnlichkeitswert sortieren
    Collections.sort(result, new ValueComparator());

    return result;
}

From source file:de.linguatools.disco.DISCO.java

License:Apache License

/***************************************************************************
 * returns the number of Documents (i.e. words) in the index.
 * @return number of words in index/*from www  . j  a v  a2 s .c o  m*/
 * @throws java.io.IOException
 */
public int numberOfWords() throws IOException {
    // erzeuge einen IndexReader fuer das indexDir
    IndexReader ir = IndexReader.open(FSDirectory.open(new File(indexName)));
    // Hole Anzahl Dokumente im Index
    return (ir.numDocs());
}