Example usage for org.apache.lucene.index IndexReader numDocs

List of usage examples for org.apache.lucene.index IndexReader numDocs

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader numDocs.

Prototype

public abstract int numDocs();

Source Link

Document

Returns the number of documents in this index.

Usage

From source file:lucee.runtime.search.lucene2.LuceneSearchCollection.java

License:Open Source License

private int _countDocs(String col) {
    // FUTURE add support for none file resources
    int totalDocs;
    IndexReader reader = null;
    try {//from  ww  w.  j  av  a2  s . c  o m
        reader = _getReader(col, true);
        totalDocs = reader.numDocs();
    } catch (Exception e) {
        return 0;
    } finally {
        closeEL(reader);
    }
    return totalDocs;
}

From source file:lucee.runtime.search.lucene2.LuceneSearchCollection.java

License:Open Source License

@Override
public int getDocumentCount(String id) {
    try {//from w  w  w  .  j  av a2  s  .com
        if (!_getIndexDirectory(id, false).exists())
            return 0;
        IndexReader r = null;
        int num = 0;
        try {
            r = _getReader(id, false);
            num = r.numDocs();
        } finally {
            close(r);
        }
        return num;
    } catch (Exception e) {
    }
    return 0;
}

From source file:luceneindexcreator.LuceneIndexCreator.java

public static void main(String[] args) {
    try {//from  ww  w  .ja  v a 2 s  .  c  om
        Comparator<TermStats> comparator = new Comparator<TermStats>() {
            @Override
            public int compare(TermStats t1, TermStats t2) {
                return t1.totalTermFreq < t2.totalTermFreq ? -1 : 1;
            };
        };

        LuceneIndexCreator lw = new LuceneIndexCreator(INDEX_PATH, JSON_FILE_PATH_WEEKLY);
        lw.createIndex();

        //Check the index has been created successfully
        Directory indexDirectory = FSDirectory.open(new File(INDEX_PATH));
        IndexReader indexReader = DirectoryReader.open(indexDirectory);

        int numDocs = indexReader.numDocs();
        /* Keywords SORTED BY DATE
         *      //generation of Date indexes and the associated json files of keyword freq            
         *      ArrayList<String> indexedDates = new ArrayList<String>();
         *      for ( int i = 0; i < numDocs; i++){
         *          Document document = indexReader.document(i);
         *          //indexRader.toString(i);
         *          String date = document.get("Date");
         *          if (!contains(indexedDates, date)) {
         *              LuceneIndexCreator lwd = new LuceneIndexCreator(PARENT_INDEX_PATH + date, JSON_FILE_PATH_WEEKLY);
         *              lwd.createSubindexDate(date);
         *              indexedDates.add(date);
         *          }
         *          Directory indexDirectoryDate = FSDirectory.open(new File(PARENT_INDEX_PATH + date));
         *          IndexReader indexReaderDate = DirectoryReader.open(indexDirectoryDate);
         *          HighFreqTerms hTerms = new HighFreqTerms();
         *          JSONArray termResultJSONArray = new JSONArray();
         *          TermStats[] hTermResult = hTerms.getHighFreqTerms(indexReaderDate, 50, "content", comparator);
         *          //creating json object
         *          for (int j = 0; j < hTermResult.length; j++) {
         *              JSONObject termResultJSON = new JSONObject();
         *              termResultJSON.put("Term", hTermResult[j].termtext.utf8ToString());
         *              termResultJSON.put("Frequency", hTermResult[j].totalTermFreq);
         *              termResultJSONArray.add(termResultJSON);
         *              //System.out.println("" + hTermResult[i].termtext.utf8ToString() + " " +  hTermResult[i].totalTermFreq);
         *          }
         *          //outputting json
         *          try(FileWriter file = new FileWriter("JSONResults/" + date + ".json")) {
         *              file.write(termResultJSONArray.toJSONString());
         *              System.out.println("Successfully Copied JSON Object to File...");
         *              System.out.println("\nJSON Object: " + termResultJSONArray );
         *
         *          }
         *              //date = date.substring(5, 16).trim();
         *              //System.out.println( "d=" + document.get("content"));
         *              //System.out.println("date: " + date + ".");
         *      }
        */

        // keywords sorted by week
        //generation of Date indexes and the associated json files of keyword freq                      
        ArrayList<String> indexedWeeks = new ArrayList<String>();

        //creating subindexes for each week
        for (int i = 0; i < numDocs; i++) {
            Document document = indexReader.document(i);
            //System.out.println(document.get("Week_number"));
            //System.out.println(document.get("Date"));
            String weekNum = document.get("Week_number");
            //System.out.println(weekNum);
            if (!contains(indexedWeeks, weekNum)) {
                LuceneIndexCreator lww = new LuceneIndexCreator(PARENT_INDEX_PATH + "week" + weekNum,
                        JSON_FILE_PATH_WEEKLY);
                lww.createSubindexWeek(weekNum);
                indexedWeeks.add(weekNum);
            }
        }
        JSONArray json1 = new JSONArray();
        for (String weekNum : indexedWeeks) {
            Directory indexDirectoryWeek = FSDirectory.open(new File(PARENT_INDEX_PATH + "week" + weekNum));
            IndexReader indexReaderWeek = DirectoryReader.open(indexDirectoryWeek);
            HighFreqTerms hTerms = new HighFreqTerms();
            TermStats[] hTermResult = hTerms.getHighFreqTerms(indexReaderWeek, 100, "content", comparator);

            //creating json object 
            JSONObject json2 = new JSONObject();
            json2.put("Week", weekNum);
            JSONArray json3 = new JSONArray();
            for (int j = 0; j < hTermResult.length; j++) {
                JSONObject json4 = new JSONObject();
                json4.put("Term", hTermResult[j].termtext.utf8ToString());
                json4.put("Frequency", hTermResult[j].totalTermFreq);
                json3.add(json4);
            }
            json2.put("Terms", json3);
            json1.add(json2);
        }
        //output json
        try (FileWriter file = new FileWriter("JSONResults/allWeeklyTerms.json")) {
            file.write(json1.toJSONString());
            System.out.println("Successfully Copied JSON Object to File...");
            System.out.println("\nJSON Object: " + json1);
        }

        // gets term freq for all docs 
        HighFreqTerms hTerms = new HighFreqTerms();
        JSONArray termResultJSONArray = new JSONArray();

        //array of termStats
        TermStats[] hTermResult = hTerms.getHighFreqTerms(indexReader, 150, "content", comparator);

        //creating json object
        for (int i = 0; i < hTermResult.length; i++) {
            JSONObject termResultJSON = new JSONObject();
            termResultJSON.put("Term", hTermResult[i].termtext.utf8ToString());
            termResultJSON.put("Frequency", hTermResult[i].totalTermFreq);
            termResultJSONArray.add(termResultJSON);
            //System.out.println("" + hTermResult[i].termtext.utf8ToString() + " " +  hTermResult[i].totalTermFreq);
        }
        //outputting json
        try (FileWriter file = new FileWriter("JSONResults/allTermFreq.json")) {
            file.write(termResultJSONArray.toJSONString());
            System.out.println("Successfully Copied JSON Object to File...");
            System.out.println("\nJSON Object: " + termResultJSONArray);
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:lucli.LuceneMethods.java

License:Apache License

public void info() throws java.io.IOException {
    IndexReader indexReader = IndexReader.open(indexName);

    getFieldInfo();/*from   w  w  w.  j  a  v a  2  s  . c o  m*/
    numDocs = indexReader.numDocs();
    message("Index has " + numDocs + " documents ");
    message("All Fields:" + fields.toString());
    message("Indexed Fields:" + indexedFields.toString());

    if (IndexReader.isLocked(indexName)) {
        message("Index is locked");
    }
    //IndexReader.getCurrentVersion(indexName);
    //System.out.println("Version:" + version);

    indexReader.close();
}

From source file:net.conquiris.index.DefaultWriter.java

License:Apache License

/**
 * Default writer.//from   w  ww.  j a  va 2 s  .  c o  m
 * @param log Log context.
 * @param writer Lucene index writer to use.
 * @param overrideCheckpoint Whether to override the checkpoint.
 * @param checkpoint Overridden checkpoint value.
 * @param created Whether the index has been requested to be created.
 */
DefaultWriter(ContextLog log, IndexWriter writer, boolean overrideCheckpoint, @Nullable String checkpoint,
        boolean created) throws IndexException {
    this.log = checkNotNull(log, "The log context must be provided");
    this.writer = checkNotNull(writer, "The index writer must be provided");
    this.properties = new MapMaker().makeMap();
    this.keys = Collections.unmodifiableSet(this.properties.keySet());
    // Read properties
    try {
        final Map<String, String> commitData;
        final int documents;
        if (created) {
            commitData = ImmutableMap.of();
            documents = 0;
        } else {
            final IndexReader reader = IndexReader.open(writer, false);
            boolean threw = true;
            try {
                Map<String, String> data = reader.getIndexCommit().getUserData();
                if (overrideCheckpoint) {
                    final Map<String, String> modified = Maps.newHashMap();
                    if (data != null) {
                        modified.putAll(data);
                    }
                    modified.put(IndexInfo.CHECKPOINT, checkpoint);
                    commitData = modified;
                } else {
                    commitData = data;
                }
                documents = reader.numDocs();
                threw = false;
            } finally {
                Closeables.close(reader, threw);
            }
        }
        this.indexInfo = IndexInfo.fromMap(documents, commitData);
        this.checkpoint = this.indexInfo.getCheckpoint();
        this.targetCheckpoint = this.indexInfo.getTargetCheckpoint();
        this.properties.putAll(this.indexInfo.getProperties());
    } catch (LockObtainFailedException e) {
        indexStatus.compareAndSet(IndexStatus.OK, IndexStatus.LOCKED);
        throw new IndexException(e);
    } catch (CorruptIndexException e) {
        indexStatus.compareAndSet(IndexStatus.OK, IndexStatus.CORRUPT);
        throw new IndexException(e);
    } catch (IOException e) {
        indexStatus.compareAndSet(IndexStatus.OK, IndexStatus.IOERROR);
        throw new IndexException(e);
    } catch (RuntimeException e) {
        indexStatus.compareAndSet(IndexStatus.OK, IndexStatus.ERROR);
        throw e;
    }
}

From source file:net.dataninja.ee.textEngine.MoreLikeThisQuery.java

License:Apache License

/**
 * Condense the same term in multiple fields into a single term with a
 * total score.//from   ww w  .j  av  a  2 s  .co m
 *
 * @param words a map of words keyed on the word(String) with Int objects as the values.
 */
private Map condenseTerms(IndexReader indexReader, Map words) throws IOException {
    HashMap termScoreMap = new HashMap();

    // For reference in score calculations, get the total # of docs in index
    int numDocs = indexReader.numDocs();

    // For each term...
    Iterator it = words.keySet().iterator();
    while (it.hasNext()) {
        Term term = (Term) it.next();

        // Filter out words that don't occur enough times in the source doc
        int tf = ((Int) words.get(term)).x;
        if (minTermFreq > 0 && tf < minTermFreq)
            continue;

        // Filter out words that don't occur in enough docs
        int docFreq = indexReader.docFreq(term);
        if (minDocFreq > 0 && docFreq < minDocFreq)
            continue;

        // Filter out words that occur in too many docs
        if (maxDocFreq > 0 && docFreq > maxDocFreq)
            continue;

        // Handle potential index update problem
        if (docFreq == 0)
            continue;

        // Calculate a score for this term.
        float idf = similarity.idf(docFreq, numDocs);
        float score = tf * idf;

        // Boost if necessary.
        Float found = (Float) boostMap.get(term.field());
        if (found != null)
            score *= found.floatValue();

        // Add the score to our map.
        String word = term.text();
        if (!termScoreMap.containsKey(word))
            termScoreMap.put(word, new Flt());
        Flt cnt = (Flt) termScoreMap.get(word);
        cnt.x += score;
    }

    return termScoreMap;
}

From source file:net.jforum.actions.LuceneAdminActions.java

License:Open Source License

/**
 * Shows the main statistics page/*from w  w  w.j  a v  a 2s .  c  om*/
 */
public void list() {
    IndexReader indexReader = null;
    ReaderProvider readerProvider = null;

    try {
        SearchFactory searchFactory = Search.createFullTextSession(this.sessionFactory.getCurrentSession())
                .getSearchFactory();

        DirectoryProvider<?> directoryProvider = searchFactory.getDirectoryProviders(Post.class)[0];
        readerProvider = searchFactory.getReaderProvider();
        indexReader = readerProvider.openReader(directoryProvider);

        String indexDirectory = directoryProvider.getDirectory().toString();
        indexDirectory = indexDirectory.substring(indexDirectory.indexOf('@') + 1);

        boolean indexExists = IndexReader.indexExists(indexDirectory);

        this.propertyBag.put("indexExists", indexExists);

        if (indexExists) {
            this.propertyBag.put("numberOfDocs", indexReader.numDocs());
            this.propertyBag.put("indexLocation", indexDirectory);
            this.propertyBag.put("totalMessages", this.forumRepository.getTotalMessages());
            this.propertyBag.put("isLocked", IndexReader.isLocked(indexDirectory));
            this.propertyBag.put("lastModified", new Date(IndexReader.lastModified(indexDirectory)));
        }
    } catch (IOException e) {
        throw new ForumException(e);
    } finally {
        if (readerProvider != null && indexReader != null) {
            readerProvider.closeReader(indexReader);
        }
    }
}

From source file:net.jforum.controllers.LuceneAdminController.java

License:Open Source License

/**
 * Shows the main statistics page/* www.jav a 2  s .c o m*/
 */
public void list() {
    IndexReader indexReader = null;
    ReaderProvider readerProvider = null;

    try {
        SearchFactory searchFactory = Search.createFullTextSession(this.sessionFactory.getCurrentSession())
                .getSearchFactory();

        DirectoryProvider<?> directoryProvider = searchFactory.getDirectoryProviders(Post.class)[0];
        readerProvider = searchFactory.getReaderProvider();
        indexReader = readerProvider.openReader(directoryProvider);

        String indexDirectory = directoryProvider.getDirectory().toString();
        indexDirectory = indexDirectory.substring(indexDirectory.indexOf('@') + 1);

        boolean indexExists = IndexReader.indexExists(indexDirectory);

        this.result.include("indexExists", indexExists);

        if (indexExists) {
            this.result.include("numberOfDocs", indexReader.numDocs());
            this.result.include("indexLocation", indexDirectory);
            this.result.include("totalMessages", this.forumRepository.getTotalMessages());
            this.result.include("isLocked", IndexReader.isLocked(indexDirectory));
            this.result.include("lastModified", new Date(IndexReader.lastModified(indexDirectory)));
        }
    } catch (IOException e) {
        throw new ForumException(e);
    } finally {
        if (readerProvider != null && indexReader != null) {
            readerProvider.closeReader(indexReader);
        }
    }
}

From source file:net.semanticmetadata.lire.benchmarking.TestNister.java

License:Open Source License

public void testDocLengthIDF(String pathName) throws IOException {
    df = new double[1024];
    int[] len = new int[10200];

    avgDocLength = 0;/*from  w  w w  .  j av  a2  s  .  co m*/
    double numDocs = 0;
    for (int i = 0; i < df.length; i++)
        df[i] = 0;
    for (int i = 0; i < len.length; i++)
        len[i] = 0;
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(pathName)));
    for (int i = 0; i < reader.numDocs(); i++) {
        //            if (!reader.isDeleted(i)) {
        String s = reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        String f = reader.document(i).getValues("featureSURFHistogram")[0];
        SimpleFeature sf = new SimpleFeature();
        sf.setStringRepresentation(f);
        double[] h = sf.getDoubleHistogram();
        for (int j = 0; j < h.length; j++) {
            if (h[j] > 0.0)
                df[j] += 1; // add to the document frequency
            avgDocLength += h[j];
            len[i] += h[j];
        }
        numDocs += 1;
        //            }
    }
    //        System.out.println("avgDocLength = " + avgDocLength/numDocs);
    //        for (int i = 0; i < df.length; i++)
    //            System.out.print(df[i] + ",");
    //        System.out.println();
    //        for (int i = 0; i < len.length; i++)
    //            System.out.print(len[i] + ", ");
    //        System.out.println();
}

From source file:net.semanticmetadata.lire.benchmarking.TestWang.java

License:Open Source License

public void tttestGetDistribution() throws IOException {
    BufferedWriter bw = new BufferedWriter(new FileWriter("data.csv"));
    IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexPath)));
    // get the first document:
    //        if (!IndexReader.indexExists(reader.directory()))
    //            throw new FileNotFoundException("No index found at this specific location.");

    CEDD cedd1 = new CEDD();
    FCTH fcth1 = new FCTH();

    CEDD cedd2 = new CEDD();
    FCTH fcth2 = new FCTH();

    JCD jcd1 = new JCD();
    JCD jcd2 = new JCD();
    String[] cls;//ww w . ja  v a2s. co  m

    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    int docs = reader.numDocs();
    for (int i = 0; i < docs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.

        Document doc = reader.document(i);
        cls = doc.getValues(DocumentBuilder.FIELD_NAME_CEDD);
        if (cls != null && cls.length > 0)
            cedd1.setStringRepresentation(cls[0]);
        cls = doc.getValues(DocumentBuilder.FIELD_NAME_FCTH);
        if (cls != null && cls.length > 0)
            fcth1.setStringRepresentation(cls[0]);

        for (int j = i + 1; j < docs; j++) {
            if (reader.hasDeletions() && !liveDocs.get(i))
                continue; // if it is deleted, just ignore it.
            Document doc2 = reader.document(j);
            cls = doc2.getValues(DocumentBuilder.FIELD_NAME_CEDD);
            if (cls != null && cls.length > 0)
                cedd2.setStringRepresentation(cls[0]);
            cls = doc2.getValues(DocumentBuilder.FIELD_NAME_FCTH);
            if (cls != null && cls.length > 0)
                fcth2.setStringRepresentation(cls[0]);
            jcd1.init(cedd1, fcth1);
            jcd2.init(cedd2, fcth2);
            bw.write(cedd1.getDistance(cedd2) + ";" + fcth1.getDistance(fcth2) + ";" + jcd1.getDistance(jcd2)
                    + "\n");
        }
        if (i % 100 == 0)
            System.out.println(i + " entries processed ... ");
    }
    bw.close();
}