Example usage for org.apache.lucene.index IndexReader numDocs

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader numDocs.

Prototype

public abstract int numDocs();

Source Link

Document

Returns the number of documents in this index.

Usage

From source file:lucee.runtime.search.lucene2.LuceneSearchCollection.java

License:Open Source License

private int _countDocs(String col) {
    // FUTURE add support for none file resources
    int totalDocs;
    IndexReader reader = null;
    try {//from  ww  w.  j  av  a2  s . c  o m
        reader = _getReader(col, true);
        totalDocs = reader.numDocs();
    } catch (Exception e) {
        return 0;
    } finally {
        closeEL(reader);
    }
    return totalDocs;
}

From source file:lucee.runtime.search.lucene2.LuceneSearchCollection.java

License:Open Source License

@Override
public int getDocumentCount(String id) {
    try {//from w  w  w  .  j  av a2  s  .com
        if (!_getIndexDirectory(id, false).exists())
            return 0;
        IndexReader r = null;
        int num = 0;
        try {
            r = _getReader(id, false);
            num = r.numDocs();
        } finally {
            close(r);
        }
        return num;
    } catch (Exception e) {
    }
    return 0;
}

From source file:luceneindexcreator.LuceneIndexCreator.java

public static void main(String[] args) {
    try {//from  ww  w  .ja  v a 2 s  .  c  om
        Comparator<TermStats> comparator = new Comparator<TermStats>() {
            @Override
            public int compare(TermStats t1, TermStats t2) {
                return t1.totalTermFreq < t2.totalTermFreq ? -1 : 1;
            };
        };

        LuceneIndexCreator lw = new LuceneIndexCreator(INDEX_PATH, JSON_FILE_PATH_WEEKLY);
        lw.createIndex();

        //Check the index has been created successfully
        Directory indexDirectory = FSDirectory.open(new File(INDEX_PATH));
        IndexReader indexReader = DirectoryReader.open(indexDirectory);

        int numDocs = indexReader.numDocs();
        /* Keywords SORTED BY DATE
         *      //generation of Date indexes and the associated json files of keyword freq            
         *      ArrayList<String> indexedDates = new ArrayList<String>();
         *      for ( int i = 0; i < numDocs; i++){
         *          Document document = indexReader.document(i);
         *          //indexRader.toString(i);
         *          String date = document.get("Date");
         *          if (!contains(indexedDates, date)) {
         *              LuceneIndexCreator lwd = new LuceneIndexCreator(PARENT_INDEX_PATH + date, JSON_FILE_PATH_WEEKLY);
         *              lwd.createSubindexDate(date);
         *              indexedDates.add(date);
         *          }
         *          Directory indexDirectoryDate = FSDirectory.open(new File(PARENT_INDEX_PATH + date));
         *          IndexReader indexReaderDate = DirectoryReader.open(indexDirectoryDate);
         *          HighFreqTerms hTerms = new HighFreqTerms();
         *          JSONArray termResultJSONArray = new JSONArray();
         *          TermStats[] hTermResult = hTerms.getHighFreqTerms(indexReaderDate, 50, "content", comparator);
         *          //creating json object
         *          for (int j = 0; j < hTermResult.length; j++) {
         *              JSONObject termResultJSON = new JSONObject();
         *              termResultJSON.put("Term", hTermResult[j].termtext.utf8ToString());
         *              termResultJSON.put("Frequency", hTermResult[j].totalTermFreq);
         *              termResultJSONArray.add(termResultJSON);
         *              //System.out.println("" + hTermResult[i].termtext.utf8ToString() + " " +  hTermResult[i].totalTermFreq);
         *          }
         *          //outputting json
         *          try(FileWriter file = new FileWriter("JSONResults/" + date + ".json")) {
         *              file.write(termResultJSONArray.toJSONString());
         *              System.out.println("Successfully Copied JSON Object to File...");
         *              System.out.println("\nJSON Object: " + termResultJSONArray );
         *
         *          }
         *              //date = date.substring(5, 16).trim();
         *              //System.out.println( "d=" + document.get("content"));
         *              //System.out.println("date: " + date + ".");
         *      }
        */

        // keywords sorted by week
        //generation of Date indexes and the associated json files of keyword freq                      
        ArrayList<String> indexedWeeks = new ArrayList<String>();

        //creating subindexes for each week
        for (int i = 0; i < numDocs; i++) {
            Document document = indexReader.document(i);
            //System.out.println(document.get("Week_number"));
            //System.out.println(document.get("Date"));
            String weekNum = document.get("Week_number");
            //System.out.println(weekNum);
            if (!contains(indexedWeeks, weekNum)) {
                LuceneIndexCreator lww = new LuceneIndexCreator(PARENT_INDEX_PATH + "week" + weekNum,
                        JSON_FILE_PATH_WEEKLY);
                lww.createSubindexWeek(weekNum);
                indexedWeeks.add(weekNum);
            }
        }
        JSONArray json1 = new JSONArray();
        for (String weekNum : indexedWeeks) {
            Directory indexDirectoryWeek = FSDirectory.open(new File(PARENT_INDEX_PATH + "week" + weekNum));
            IndexReader indexReaderWeek = DirectoryReader.open(indexDirectoryWeek);
            HighFreqTerms hTerms = new HighFreqTerms();
            TermStats[] hTermResult = hTerms.getHighFreqTerms(indexReaderWeek, 100, "content", comparator);

            //creating json object 
            JSONObject json2 = new JSONObject();
            json2.put("Week", weekNum);
            JSONArray json3 = new JSONArray();
            for (int j = 0; j < hTermResult.length; j++) {
                JSONObject json4 = new JSONObject();
                json4.put("Term", hTermResult[j].termtext.utf8ToString());
                json4.put("Frequency", hTermResult[j].totalTermFreq);
                json3.add(json4);
            }
            json2.put("Terms", json3);
            json1.add(json2);
        }
        //output json
        try (FileWriter file = new FileWriter("JSONResults/allWeeklyTerms.json")) {
            file.write(json1.toJSONString());
            System.out.println("Successfully Copied JSON Object to File...");
            System.out.println("\nJSON Object: " + json1);
        }

        // gets term freq for all docs 
        HighFreqTerms hTerms = new HighFreqTerms();
        JSONArray termResultJSONArray = new JSONArray();

        //array of termStats
        TermStats[] hTermResult = hTerms.getHighFreqTerms(indexReader, 150, "content", comparator);

        //creating json object
        for (int i = 0; i < hTermResult.length; i++) {
            JSONObject termResultJSON = new JSONObject();
            termResultJSON.put("Term", hTermResult[i].termtext.utf8ToString());
            termResultJSON.put("Frequency", hTermResult[i].totalTermFreq);
            termResultJSONArray.add(termResultJSON);
            //System.out.println("" + hTermResult[i].termtext.utf8ToString() + " " +  hTermResult[i].totalTermFreq);
        }
        //outputting json
        try (FileWriter file = new FileWriter("JSONResults/allTermFreq.json")) {
            file.write(termResultJSONArray.toJSONString());
            System.out.println("Successfully Copied JSON Object to File...");
            System.out.println("\nJSON Object: " + termResultJSONArray);
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:lucli.LuceneMethods.java

License:Apache License

public void info() throws java.io.IOException {
    IndexReader indexReader = IndexReader.open(indexName);

    getFieldInfo();/*from   w  w  w.  j  a  v a  2  s  . c o  m*/
    numDocs = indexReader.numDocs();
    message("Index has " + numDocs + " documents ");
    message("All Fields:" + fields.toString());
    message("Indexed Fields:" + indexedFields.toString());

    if (IndexReader.isLocked(indexName)) {
        message("Index is locked");
    }
    //IndexReader.getCurrentVersion(indexName);
    //System.out.println("Version:" + version);

    indexReader.close();
}

From source file:net.conquiris.index.DefaultWriter.java

License:Apache License

/**
 * Default writer.//from   w  ww.  j a  va 2 s  .  c o  m
 * @param log Log context.
 * @param writer Lucene index writer to use.
 * @param overrideCheckpoint Whether to override the checkpoint.
 * @param checkpoint Overridden checkpoint value.
 * @param created Whether the index has been requested to be created.
 */
DefaultWriter(ContextLog log, IndexWriter writer, boolean overrideCheckpoint, @Nullable String checkpoint,
        boolean created) throws IndexException {
    this.log = checkNotNull(log, "The log context must be provided");
    this.writer = checkNotNull(writer, "The index writer must be provided");
    this.properties = new MapMaker().makeMap();
    this.keys = Collections.unmodifiableSet(this.properties.keySet());
    // Read properties
    try {
        final Map<String, String> commitData;
        final int documents;
        if (created) {
            commitData = ImmutableMap.of();
            documents = 0;
        } else {
            final IndexReader reader = IndexReader.open(writer, false);
            boolean threw = true;
            try {
                Map<String, String> data = reader.getIndexCommit().getUserData();
                if (overrideCheckpoint) {
                    final Map<String, String> modified = Maps.newHashMap();
                    if (data != null) {
                        modified.putAll(data);
                    }
                    modified.put(IndexInfo.CHECKPOINT, checkpoint);
                    commitData = modified;
                } else {
                    commitData = data;
                }
                documents = reader.numDocs();
                threw = false;
            } finally {
                Closeables.close(reader, threw);
            }
        }
        this.indexInfo = IndexInfo.fromMap(documents, commitData);
        this.checkpoint = this.indexInfo.getCheckpoint();
        this.targetCheckpoint = this.indexInfo.getTargetCheckpoint();
        this.properties.putAll(this.indexInfo.getProperties());
    } catch (LockObtainFailedException e) {
        indexStatus.compareAndSet(IndexStatus.OK, IndexStatus.LOCKED);
        throw new IndexException(e);
    } catch (CorruptIndexException e) {
        indexStatus.compareAndSet(IndexStatus.OK, IndexStatus.CORRUPT);
        throw new IndexException(e);
    } catch (IOException e) {
        indexStatus.compareAndSet(IndexStatus.OK, IndexStatus.IOERROR);
        throw new IndexException(e);
    } catch (RuntimeException e) {
        indexStatus.compareAndSet(IndexStatus.OK, IndexStatus.ERROR);
        throw e;
    }
}

From source file:net.dataninja.ee.textEngine.MoreLikeThisQuery.java

License:Apache License

/**
 * Condense the same term in multiple fields into a single term with a
 * total score.//from   ww w  .j  av  a  2 s  .co m
 *
 * @param words a map of words keyed on the word(String) with Int objects as the values.
 */
private Map condenseTerms(IndexReader indexReader, Map words) throws IOException {
    HashMap termScoreMap = new HashMap();

    // For reference in score calculations, get the total # of docs in index
    int numDocs = indexReader.numDocs();

    // For each term...
    Iterator it = words.keySet().iterator();
    while (it.hasNext()) {
        Term term = (Term) it.next();

        // Filter out words that don't occur enough times in the source doc
        int tf = ((Int) words.get(term)).x;
        if (minTermFreq > 0 && tf < minTermFreq)
            continue;

        // Filter out words that don't occur in enough docs
        int docFreq = indexReader.docFreq(term);
        if (minDocFreq > 0 && docFreq < minDocFreq)
            continue;

        // Filter out words that occur in too many docs
        if (maxDocFreq > 0 && docFreq > maxDocFreq)
            continue;

        // Handle potential index update problem
        if (docFreq == 0)
            continue;

        // Calculate a score for this term.
        float idf = similarity.idf(docFreq, numDocs);
        float score = tf * idf;

        // Boost if necessary.
        Float found = (Float) boostMap.get(term.field());
        if (found != null)
            score *= found.floatValue();

        // Add the score to our map.
        String word = term.text();
        if (!termScoreMap.containsKey(word))
            termScoreMap.put(word, new Flt());
        Flt cnt = (Flt) termScoreMap.get(word);
        cnt.x += score;
    }

    return termScoreMap;
}

From source file:net.jforum.actions.LuceneAdminActions.java

License:Open Source License

/**
 * Shows the main statistics page/*from w  w  w.j  a v  a 2s .  c  om*/
 */
public void list() {
    IndexReader indexReader = null;
    ReaderProvider readerProvider = null;

    try {
        SearchFactory searchFactory = Search.createFullTextSession(this.sessionFactory.getCurrentSession())
                .getSearchFactory();

        DirectoryProvider<?> directoryProvider = searchFactory.getDirectoryProviders(Post.class)[0];
        readerProvider = searchFactory.getReaderProvider();
        indexReader = readerProvider.openReader(directoryProvider);

        String indexDirectory = directoryProvider.getDirectory().toString();
        indexDirectory = indexDirectory.substring(indexDirectory.indexOf('@') + 1);

        boolean indexExists = IndexReader.indexExists(indexDirectory);

        this.propertyBag.put("indexExists", indexExists);

        if (indexExists) {
            this.propertyBag.put("numberOfDocs", indexReader.numDocs());
            this.propertyBag.put("indexLocation", indexDirectory);
            this.propertyBag.put("totalMessages", this.forumRepository.getTotalMessages());
            this.propertyBag.put("isLocked", IndexReader.isLocked(indexDirectory));
            this.propertyBag.put("lastModified", new Date(IndexReader.lastModified(indexDirectory)));
        }
    } catch (IOException e) {
        throw new ForumException(e);
    } finally {
        if (readerProvider != null && indexReader != null) {
            readerProvider.closeReader(indexReader);
        }
    }
}

From source file:net.jforum.controllers.LuceneAdminController.java

License:Open Source License

/**
 * Shows the main statistics page/* www.jav a 2  s .c o m*/
 */
public void list() {
    IndexReader indexReader = null;
    ReaderProvider readerProvider = null;

    try {
        SearchFactory searchFactory = Search.createFullTextSession(this.sessionFactory.getCurrentSession())
                .getSearchFactory();

        DirectoryProvider<?> directoryProvider = searchFactory.getDirectoryProviders(Post.class)[0];
        readerProvider = searchFactory.getReaderProvider();
        indexReader = readerProvider.openReader(directoryProvider);

        String indexDirectory = directoryProvider.getDirectory().toString();
        indexDirectory = indexDirectory.substring(indexDirectory.indexOf('@') + 1);

        boolean indexExists = IndexReader.indexExists(indexDirectory);

        this.result.include("indexExists", indexExists);

        if (indexExists) {
            this.result.include("numberOfDocs", indexReader.numDocs());
            this.result.include("indexLocation", indexDirectory);
            this.result.include("totalMessages", this.forumRepository.getTotalMessages());
            this.result.include("isLocked", IndexReader.isLocked(indexDirectory));
            this.result.include("lastModified", new Date(IndexReader.lastModified(indexDirectory)));
        }
    } catch (IOException e) {
        throw new ForumException(e);
    } finally {
        if (readerProvider != null && indexReader != null) {
            readerProvider.closeReader(indexReader);
        }
    }
}

From source file:net.semanticmetadata.lire.benchmarking.TestNister.java

License:Open Source License

public void testDocLengthIDF(String pathName) throws IOException {
    df = new double[1024];
    int[] len = new int[10200];

    avgDocLength = 0;/*from  w  w w  .  j av  a2  s  .  co m*/
    double numDocs = 0;
    for (int i = 0; i < df.length; i++)
        df[i] = 0;
    for (int i = 0; i < len.length; i++)
        len[i] = 0;
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(pathName)));
    for (int i = 0; i < reader.numDocs(); i++) {
        //            if (!reader.isDeleted(i)) {
        String s = reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        String f = reader.document(i).getValues("featureSURFHistogram")[0];
        SimpleFeature sf = new SimpleFeature();
        sf.setStringRepresentation(f);
        double[] h = sf.getDoubleHistogram();
        for (int j = 0; j < h.length; j++) {
            if (h[j] > 0.0)
                df[j] += 1; // add to the document frequency
            avgDocLength += h[j];
            len[i] += h[j];
        }
        numDocs += 1;
        //            }
    }
    //        System.out.println("avgDocLength = " + avgDocLength/numDocs);
    //        for (int i = 0; i < df.length; i++)
    //            System.out.print(df[i] + ",");
    //        System.out.println();
    //        for (int i = 0; i < len.length; i++)
    //            System.out.print(len[i] + ", ");
    //        System.out.println();
}

From source file:net.semanticmetadata.lire.benchmarking.TestWang.java

License:Open Source License

public void tttestGetDistribution() throws IOException {
    BufferedWriter bw = new BufferedWriter(new FileWriter("data.csv"));
    IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexPath)));
    // get the first document:
    //        if (!IndexReader.indexExists(reader.directory()))
    //            throw new FileNotFoundException("No index found at this specific location.");

    CEDD cedd1 = new CEDD();
    FCTH fcth1 = new FCTH();

    CEDD cedd2 = new CEDD();
    FCTH fcth2 = new FCTH();

    JCD jcd1 = new JCD();
    JCD jcd2 = new JCD();
    String[] cls;//ww w . ja  v a2s. co  m

    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    int docs = reader.numDocs();
    for (int i = 0; i < docs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.

        Document doc = reader.document(i);
        cls = doc.getValues(DocumentBuilder.FIELD_NAME_CEDD);
        if (cls != null && cls.length > 0)
            cedd1.setStringRepresentation(cls[0]);
        cls = doc.getValues(DocumentBuilder.FIELD_NAME_FCTH);
        if (cls != null && cls.length > 0)
            fcth1.setStringRepresentation(cls[0]);

        for (int j = i + 1; j < docs; j++) {
            if (reader.hasDeletions() && !liveDocs.get(i))
                continue; // if it is deleted, just ignore it.
            Document doc2 = reader.document(j);
            cls = doc2.getValues(DocumentBuilder.FIELD_NAME_CEDD);
            if (cls != null && cls.length > 0)
                cedd2.setStringRepresentation(cls[0]);
            cls = doc2.getValues(DocumentBuilder.FIELD_NAME_FCTH);
            if (cls != null && cls.length > 0)
                fcth2.setStringRepresentation(cls[0]);
            jcd1.init(cedd1, fcth1);
            jcd2.init(cedd2, fcth2);
            bw.write(cedd1.getDistance(cedd2) + ";" + fcth1.getDistance(fcth2) + ";" + jcd1.getDistance(jcd2)
                    + "\n");
        }
        if (i % 100 == 0)
            System.out.println(i + " entries processed ... ");
    }
    bw.close();
}