Example usage for org.apache.lucene.index IndexReader numDocs

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader numDocs.

Prototype

public abstract int numDocs();

Source Link

Document

Returns the number of documents in this index.

Usage

From source file:ContentBasedAnalysis.java

License:Apache License

private static TermWeight[] toTfIdf(IndexReader reader, int docId) throws Exception {
    // get Lucene representation of a Term-Frequency vector
    TermFreqVector tfv = reader.getTermFreqVector(docId, "contents");
    String[] terms = tfv.getTerms();
    int[] freqs = tfv.getTermFrequencies();
    TermWeight[] tw = new TermWeight[terms.length];

    // Maximum Frequency of a term in the document
    int fmax = freqs[0];
    for (int i = 1; i < freqs.length; i++) {
        if (freqs[i] > fmax)
            fmax = freqs[i];//from  w  w w.j  a v a  2 s . c o m
    }

    // number of docs in the index
    int nDocs = reader.numDocs();
    for (int i = 0; i < tw.length; i++) {
        tw[i] = new TermWeight(terms[i]);
    }
    return tw;
}

From source file:TestWang.java

License:Open Source License

private Document findDoc(IndexReader reader, String file) throws IOException {
    for (int i = 0; i < reader.numDocs(); i++) {
        Document document = reader.document(i);
        String s = document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        if (s.endsWith(File.separator + file)) {
            //                System.out.println("s = " + s);
            return document;
        }/*from  ww  w  .ja v a  2 s. c o  m*/
    }
    return null;
}

From source file:TestWang.java

License:Open Source License

private Document[] findDocs(IndexReader reader, String[] file) throws IOException {
    Document[] result = new Document[file.length];
    for (int i = 0; i < reader.numDocs(); i++) {
        Document document = reader.document(i);
        String s = document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        for (int j = 0; j < result.length; j++) {
            if (s.endsWith("\\" + file[j])) {
                //                System.out.println("s = " + s);
                result[j] = document;/* w w w  .  j  a v a2 s.  co m*/
            }
        }
    }
    return result;
}

From source file:TfIdfViewer.java

License:Apache License

private static TermWeight[] toTfIdf(IndexReader reader, int docId) throws Exception {
    // get Lucene representation of a Term-Frequency vector
    TermFreqVector tfv = reader.getTermFreqVector(docId, "contents");
    // split it into two Arrays: one for terms, one for frequencies;
    // Lucene guarantees that terms are sorted
    String[] terms = tfv.getTerms();
    int[] freqs = tfv.getTermFrequencies();
    TermWeight[] tw = new TermWeight[terms.length];
    // compute the maximum frequence of a term in the document
    double fmax = freqs[0];
    for (int i = 1; i < freqs.length; i++) {
        if (freqs[i] > fmax)
            fmax = freqs[i];/*from  ww w .  j  a  v a 2 s .c om*/
    }

    // number of docs in the index
    int nDocs = reader.numDocs();
    Double tf;
    Double idf;
    for (int i = 0; i < tw.length; i++) {
        double df = docFreq(reader, terms[i]);
        tf = freqs[i] / fmax;
        idf = Math.log10((nDocs / df));
        Double tf_idf = tf * idf;
        tw[i] = new TermWeight(terms[i], tf_idf);
    }
    return tw;
}

From source file:action.indexing.IndexingTest.java

License:Apache License

public void testIndexReader() throws IOException {
    IndexReader reader = IndexReader.open(directory);
    assertEquals(ids.length, reader.maxDoc()); //8
    assertEquals(ids.length, reader.numDocs()); //8
    reader.close();/*from   w ww.j  a  v  a2  s  .c o m*/
}

From source file:at.lux.fotoretrieval.GraphDistanceVisualizationThread.java

License:Open Source License

public void run() {
    // TODO: make this based on files to support the visualization of result lists ...
    try {/*from w  ww.j  a  va2s .c om*/
        IndexReader ir = null;
        if (fileList.isEmpty()) {
            String indexDirectory = LucenePathIndexRetrievalEngine.parsePathIndexDirectory(dir);
            if (!IndexReader.indexExists(indexDirectory)) {
                JOptionPane.showMessageDialog(parent, "Chosen repositors directory does not exist.");
                return;
            } else {
                ir = IndexReader.open(indexDirectory);
                st = new SuffixTree(SuffixTree.RelationType.FullRelations);
                // a vector space model for nodes and triples ...
                gvs = new GraphVectorSimilarity(GraphVectorSimilarity.Type.BM25, 1);
                for (int i = 0; i < ir.numDocs(); i++) {
                    Graph g_idx = new Graph(ir.document(i).getField("graph").stringValue());
                    Field[] files = ir.document(i).getFields("file");
                    for (Field file1 : files) {
                        st.addCorpusDocument(SuffixTree.createSuffixTreeDocument(g_idx));
                        gvs.addToCorpus(g_idx);
                    }
                }

            }
        }

        parent.setEnabled(false);
        ProgressWindow pw;
        pw = new ProgressWindow(parent, progress);
        pw.pack();
        Dimension d = Toolkit.getDefaultToolkit().getScreenSize();
        pw.setLocation((d.width - pw.getWidth()) / 2, (d.height - pw.getHeight()) / 2);
        pw.setVisible(true);
        long stime, ftime;
        stime = System.currentTimeMillis();
        parent.setStatus("Loading Graphs");

        progress.setMinimum(0);
        progress.setMaximum(3);
        progress.setValue(0);

        if (fileList.size() > 0)
            prepareGraphAndFileLists();
        else if (ir != null)
            prepareGraphAndFileLists(ir);
        else {
            JOptionPane.showMessageDialog(parent, "No data found!");
            pw.setVisible(false);
            parent.setEnabled(true);
        }
        progress.setValue(1);

        try {
            // create matrixFastmapFastmap with mcs distance for graphs.
            FastmapDistanceMatrix matrixFastmap = createDistanceMatrix(graphList);

            FastMap fm = new FastMap(matrixFastmap, 2);
            progress.setValue(2);
            long ms = System.currentTimeMillis();
            fm.run();
            ms = System.currentTimeMillis() - ms;
            progress.setValue(3);
            System.out.println("Time for " + matrixFastmap.getDimension() + " images: " + ms + " ms");

            //        Vector results = engine.getSimilarImages(visualDescriptor, dir, recursive, progress);
            stime = System.currentTimeMillis() - stime;
            ftime = System.currentTimeMillis();
            parent.setStatus("Formatting results ...");
            Visualization2DPanelWithFdp panel = new Visualization2DPanelWithFdp(fm.getPoints(), matrixFastmap,
                    fileList, autoStartFDP);
            //        ResultsPanel rp = new ResultsPanel(results, progress);
            ftime = System.currentTimeMillis() - ftime;
            parent.addVisualization(panel);
            parent.setStatus("Searched for " + df.format(stime / 1000.0) + " s, formatting lasted "
                    + df.format(ftime / 1000.0) + " s");
        } catch (Exception e) {
            e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
        } finally {
            pw.setVisible(false);
            parent.setEnabled(true);
        }
    } catch (IOException e) {
        e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
    }

}

From source file:at.lux.fotoretrieval.GraphDistanceVisualizationThread.java

License:Open Source License

private void prepareGraphAndFileLists(IndexReader ir) throws IOException {
    graphList = new LinkedList<Graph>();
    for (int i = 0; i < ir.numDocs(); i++) {
        Graph g = new Graph(ir.document(i).getValues("graph")[0]);
        String graphFile = ir.document(i).getValues("file")[0];
        graphList.add(g);/*from  ww w .  j av  a2s .c o m*/
        fileList.add(graphFile);
    }
}

From source file:at.lux.fotoretrieval.retrievalengines.LucenePathIndexRetrievalEngineTest.java

License:Open Source License

public void testCreateIndex() {
    engine.indexFilesSemantically(pathToIndex, null);
    try {//from  w ww .  j  ava2  s . com
        IndexReader reader = IndexReader.open(pathToIndex + "/idx_paths");
        for (int i = 0; i < reader.numDocs(); i++) {
            System.out.println(reader.document(i).get("graph"));
        }
    } catch (IOException e) {
        e.printStackTrace();
        fail(e.toString());
    }
}

From source file:at.lux.fotoretrieval.retrievalengines.LucenePathIndexRetrievalEngineTest.java

License:Open Source License

public void testPrecisionAndRecall() {
    try {/*from  www.  ja  v a  2 s  .c o m*/
        String repository = "C:\\Java\\JavaProjects\\CaliphEmir\\testdata";
        //            String repository = "C:\\Dokumente und Einstellungen\\Mathias\\Eigene Dateien\\JavaProjects\\Caliph\\testdata";
        IndexSearcher is = new IndexSearcher(repository + "\\idx_paths");
        IndexReader ir = IndexReader.open(repository + "\\idx_paths");

        for (int i = 0; i < ir.numDocs(); i++) {
            testQuery(ir, new Graph(ir.document(i).getValues("graph")[0]), is);
        }

    } catch (IOException e) {
        e.printStackTrace();
        fail(e.toString());
    } catch (ParseException e) {
        e.printStackTrace();
    }
}

From source file:at.lux.fotoretrieval.retrievalengines.LucenePathIndexRetrievalEngineTest.java

License:Open Source License

public void testPrecisionAndRecallFullText() {
    try {//from  w  w  w  .j  a  v  a  2  s  . c o m
        String repository = "C:\\Java\\JavaProjects\\CaliphEmir\\testdata";
        //            String repository = "C:\\Dokumente und Einstellungen\\Mathias\\Eigene Dateien\\JavaProjects\\Caliph\\testdata";
        IndexSearcher is = new IndexSearcher(repository + "\\idx_paths");
        IndexReader ir = IndexReader.open(repository + "\\idx_paths");

        for (int i = 0; i < ir.numDocs(); i++) {
            testDirectQuery(ir, new Graph(ir.document(i).getValues("graph")[0]), is);
        }

    } catch (IOException e) {
        e.printStackTrace();
        fail(e.toString());
    } catch (ParseException e) {
        e.printStackTrace();
    }
}