Example usage for org.apache.lucene.index IndexReader numDocs

List of usage examples for org.apache.lucene.index IndexReader numDocs

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader numDocs.

Prototype

public abstract int numDocs();

Source Link

Document

Returns the number of documents in this index.

Usage

From source file:com.searchcode.app.service.CodeSearcher.java

License:Open Source License

/**
 * Returns the total number of documents that are present in the index at this time
 *//* w w w .j  a  v  a2 s  .  co  m*/
public int getTotalNumberDocumentsIndexed() {
    int numDocs = 0;
    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH)));

        numDocs = reader.numDocs();
        reader.close();
    } catch (Exception ex) {
        LOGGER.info(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage());
    }

    return numDocs;
}

From source file:com.searchcode.app.service.IndexService.java

License:Open Source License

/**
 * Returns the total number of documents that are present in the index at this time
 *///from  www  .  ja  v a2 s.  c o  m
public int getTotalNumberDocumentsIndexed() {
    int numDocs = 0;
    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(this.INDEX_LOCATION));
        numDocs = reader.numDocs();
        reader.close();
    } catch (IOException ex) {
        this.logger.info(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage());
    }

    return numDocs;
}

From source file:com.stimulus.archiva.search.StandardSearch.java

License:Open Source License

public long getTotalMessageCount(Volume volume) throws MessageSearchException {
    if (volume == null)
        throw new MessageSearchException("assertion failure: null volume", logger);
    //logger.debug("get total no emails {indexpath='"+volume.getIndexPath()+"'}");
    int count = 0;
    File indexDir = new File(volume.getIndexPath());
    if (!indexDir.exists())
        return 0;
    IndexReader indexReader = null;
    try {//from w  ww  .  j a  v  a2s  .co m
        indexReader = IndexReader.open(indexDir);
        count += indexReader.numDocs();
        indexReader.close();
    } catch (IOException e) {
        logger.debug("failed to open index to calculate total email count", e);
        //throw new MessageSearchException("failed to open index to calculate total email count",e,logger);
    }
    return count;
}

From source file:com.sun.javaee.blueprints.carstore.search.UpdateIndex.java

License:Berkeley License

public static Document deleteIndex(String indexFile, String sxDocId) throws IOException {

    // get document to update, so data can be added
    SearchIndex si = new SearchIndex();
    si.query(indexFile, sxDocId, "uid");

    Hits hits = si.getHitsNative();/*from   w  w w.  ja va2s.co m*/
    // should only have one return
    if (hits.length() > 1) {
        // exception, should only be one
        throw new IllegalStateException("Should only have one document in index with uid=" + sxDocId);
    }

    Document doc = (Document) hits.doc(0);
    if (bDebug)
        System.out.println("HAVE DOC " + doc);

    // Read index and delete targeted doc through a term
    IndexReader reader = IndexReader.open(indexFile);
    // delete document by term
    int delcnt = reader.deleteDocuments(new Term("uid", sxDocId));
    if (bDebug) {
        System.out.println("return Number of items deleted:" + delcnt);
        int deleted = 0;
        for (int ii = 0; ii < reader.numDocs(); ii++) {
            if (reader.isDeleted(ii)) {
                deleted++;
            }
        }
        if (bDebug)
            System.out.println("Number of deleted items in the whole index:" + deleted);
    }
    reader.close();
    return doc;
}

From source file:com.sxc.lucene.index.IndexingTest.java

License:Apache License

public void testIndexReader() throws IOException {
    IndexReader reader = DirectoryReader.open(directory);
    assertEquals(ids.length, reader.maxDoc()); // 8
    assertEquals(ids.length, reader.numDocs()); // 8
    reader.close();/*from   w  w w .j a va 2  s .co  m*/
}

From source file:CopulaResources.TermCooccurence.java

public static TermCooccurence generateCooccurencebyClass(IndexReader indexReader, String classFieldName,
        String textFieldName, Analyzer analyzer, int minFreq, int maxFreq) throws IOException {
    System.out.println(":::Generating Term-Pair List:::");
    TermCooccurence CooccurList = new TermCooccurence();
    Terms classes = MultiFields.getTerms(indexReader, classFieldName);
    if (classes != null) {
        TermsEnum classesEnum = classes.iterator();
        BytesRef nextClass;/*from ww  w . j ava2 s  . co  m*/
        while ((nextClass = classesEnum.next()) != null) {
            if (nextClass.length > 0) {

                Term term = new Term(classFieldName, nextClass);
                String tpClass = nextClass.utf8ToString();
                BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
                booleanQuery.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.MUST));
                IndexSearcher indexSearcher = new IndexSearcher(indexReader);
                TopDocs topDocs;
                topDocs = indexSearcher.search(booleanQuery.build(), indexReader.numDocs());

                for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
                    IndexableField[] storableFields = indexSearcher.doc(scoreDoc.doc).getFields(textFieldName);
                    for (IndexableField singleStorableField : storableFields) {
                        if (singleStorableField != null) {
                            BytesRef text = new BytesRef(singleStorableField.stringValue());
                            generateCooccurences(text.utf8ToString(), analyzer, CooccurList, tpClass);
                        }
                    }
                }
                CooccurList.trimbyFreq(tpClass, minFreq, maxFreq);

            }
        }
    }
    System.out.println(":::Generation Complete:::");
    return CooccurList;
}

From source file:CopulaResources.TermCooccurence.java

public static void generateCooccurencebyClass(IndexReader indexReader, String classFieldName,
        String textFieldName, Analyzer analyzer, int minFreq, int maxFreq, Path saveDir) throws IOException {
    System.out.println(":::Generating Term-Pair List:::");
    TermCooccurence CooccurList = new TermCooccurence();
    Terms classes = MultiFields.getTerms(indexReader, classFieldName);
    if (classes != null) {
        TermsEnum classesEnum = classes.iterator();
        BytesRef nextClass;//from  ww w .  j  ava 2s  .c  o  m
        while ((nextClass = classesEnum.next()) != null) {
            if (nextClass.length > 0) {

                Term term = new Term(classFieldName, nextClass);
                String tpClass = nextClass.utf8ToString();
                BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
                booleanQuery.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.MUST));
                IndexSearcher indexSearcher = new IndexSearcher(indexReader);
                TopDocs topDocs;
                topDocs = indexSearcher.search(booleanQuery.build(), indexReader.numDocs());

                for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
                    IndexableField[] storableFields = indexSearcher.doc(scoreDoc.doc).getFields(textFieldName);
                    for (IndexableField singleStorableField : storableFields) {
                        if (singleStorableField != null) {
                            BytesRef text = new BytesRef(singleStorableField.stringValue());
                            generateCooccurences(text.utf8ToString(), analyzer, CooccurList, tpClass);
                        }
                    }
                }
                CooccurList.trimbyFreq(tpClass, minFreq, maxFreq);

            }
        }
    }
    CooccurList.savetoFile(saveDir);
    System.out.println(":::Generation Complete:::");
}

From source file:CopulaResources.TermCooccurence.java

public static void generateNCooccurencebyClass(IndexReader indexReader, String classFieldName,
        String textFieldName, Analyzer analyzer, String direction, double percent, Path saveDir)
        throws IOException {
    System.out.println(":::Generating Term-Pair List:::");
    TermCooccurence CooccurList = new TermCooccurence();
    Terms classes = MultiFields.getTerms(indexReader, classFieldName);
    if (classes != null) {
        TermsEnum classesEnum = classes.iterator();
        BytesRef nextClass;//from ww w  .java2 s.c o m
        while ((nextClass = classesEnum.next()) != null) {
            if (nextClass.length > 0) {

                Term term = new Term(classFieldName, nextClass);
                String tpClass = nextClass.utf8ToString();
                BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
                booleanQuery.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.MUST));
                IndexSearcher indexSearcher = new IndexSearcher(indexReader);
                TopDocs topDocs;
                topDocs = indexSearcher.search(booleanQuery.build(), indexReader.numDocs());

                for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
                    IndexableField[] storableFields = indexSearcher.doc(scoreDoc.doc).getFields(textFieldName);
                    for (IndexableField singleStorableField : storableFields) {
                        if (singleStorableField != null) {
                            BytesRef text = new BytesRef(singleStorableField.stringValue());
                            generateCooccurences(text.utf8ToString(), analyzer, CooccurList, tpClass);
                        }
                    }
                }
                CooccurList.trimbyPercent(tpClass, direction, percent);

            }
        }
    }
    CooccurList.savetoFile(saveDir);
    System.out.println(":::Generation Complete:::");
}

From source file:de.linguatools.disco.Compositionality.java

License:Apache License

/**
 * Find the most similar words in the DISCO word space for an input word 
 * vector. While the word vector can represent a multi-token word (if it was
 * produced by one of the methods // w  w w. ja va  2  s.co m
 * <code>Compositionality.composeWordVectors()</code>) the most
 * similar words will only be single-token words from the index.<br/>
 * <b>Warning</b>: This method is very time consuming and should only be
 * used with word spaces that have been loaded into memory!
 * @param wordvector input word vector
 * @param disco DISCO word space
 * @param simMeasure
 * @return List of all words (with their similarity values) whose similarity
 * with the <code>wordvector</code> is greater than zero, ordered by 
 * similarity value (highest value first).
 * @throws java.io.IOException
 */
public ArrayList<ReturnDataCol> similarWords(HashMap<String, Float> wordvector, DISCO disco,
        SimilarityMeasures simMeasure) throws IOException {

    // hole einen IndexReader fuer das indexDir
    IndexReader ir = disco.getIndexReader();

    // durchlaufe alle Dokumente
    ArrayList<ReturnDataCol> result = new ArrayList();
    for (int i = 0; i < ir.numDocs(); i++) {
        Document doc = null;
        try {
            doc = ir.document(i);
        } catch (CorruptIndexException ex) {
            continue;
        } catch (IOException ex) {
            continue;
        }
        // Wortvektor zu Wort Nr. i holen
        String word = doc.get("word");
        HashMap<String, Float> wv = getWordvector(word, disco);
        // hnlichkeit zwischen Wortvektoren berechnen
        float sim = semanticSimilarity(wordvector, wv, simMeasure);
        if (sim > 0.0F) {
            ReturnDataCol r = new ReturnDataCol(word, sim);
            result.add(r);
        }
    }

    // nach hchstem hnlichkeitswert sortieren
    Collections.sort(result, new ValueComparator());

    return result;
}

From source file:de.linguatools.disco.DISCO.java

License:Apache License

/***************************************************************************
 * returns the number of Documents (i.e. words) in the index.
 * @return number of words in index/*from www  . j  a v  a2 s .c o  m*/
 * @throws java.io.IOException
 */
public int numberOfWords() throws IOException {
    // erzeuge einen IndexReader fuer das indexDir
    IndexReader ir = IndexReader.open(FSDirectory.open(new File(indexName)));
    // Hole Anzahl Dokumente im Index
    return (ir.numDocs());
}