Example usage for org.apache.lucene.index IndexReader numDocs

List of usage examples for org.apache.lucene.index IndexReader numDocs

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader numDocs.

Prototype

public abstract int numDocs();

Source Link

Document

Returns the number of documents in this index.

Usage

From source file:ContentBasedAnalysis.java

License:Apache License

private static TermWeight[] toTfIdf(IndexReader reader, int docId) throws Exception {
    // get Lucene representation of a Term-Frequency vector
    TermFreqVector tfv = reader.getTermFreqVector(docId, "contents");
    String[] terms = tfv.getTerms();
    int[] freqs = tfv.getTermFrequencies();
    TermWeight[] tw = new TermWeight[terms.length];

    // Maximum Frequency of a term in the document
    int fmax = freqs[0];
    for (int i = 1; i < freqs.length; i++) {
        if (freqs[i] > fmax)
            fmax = freqs[i];//from  w  w w.j  a v a  2 s . c o m
    }

    // number of docs in the index
    int nDocs = reader.numDocs();
    for (int i = 0; i < tw.length; i++) {
        tw[i] = new TermWeight(terms[i]);
    }
    return tw;
}

From source file:TestWang.java

License:Open Source License

private Document findDoc(IndexReader reader, String file) throws IOException {
    for (int i = 0; i < reader.numDocs(); i++) {
        Document document = reader.document(i);
        String s = document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        if (s.endsWith(File.separator + file)) {
            //                System.out.println("s = " + s);
            return document;
        }/*from  ww  w  .ja v a  2 s. c o  m*/
    }
    return null;
}

From source file:TestWang.java

License:Open Source License

private Document[] findDocs(IndexReader reader, String[] file) throws IOException {
    Document[] result = new Document[file.length];
    for (int i = 0; i < reader.numDocs(); i++) {
        Document document = reader.document(i);
        String s = document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        for (int j = 0; j < result.length; j++) {
            if (s.endsWith("\\" + file[j])) {
                //                System.out.println("s = " + s);
                result[j] = document;/* w w w  .  j  a v a2 s.  co m*/
            }
        }
    }
    return result;
}

From source file:TfIdfViewer.java

License:Apache License

private static TermWeight[] toTfIdf(IndexReader reader, int docId) throws Exception {
    // get Lucene representation of a Term-Frequency vector
    TermFreqVector tfv = reader.getTermFreqVector(docId, "contents");
    // split it into two Arrays: one for terms, one for frequencies;
    // Lucene guarantees that terms are sorted
    String[] terms = tfv.getTerms();
    int[] freqs = tfv.getTermFrequencies();
    TermWeight[] tw = new TermWeight[terms.length];
    // compute the maximum frequence of a term in the document
    double fmax = freqs[0];
    for (int i = 1; i < freqs.length; i++) {
        if (freqs[i] > fmax)
            fmax = freqs[i];/*from  ww w .  j  a  v a 2 s .c om*/
    }

    // number of docs in the index
    int nDocs = reader.numDocs();
    Double tf;
    Double idf;
    for (int i = 0; i < tw.length; i++) {
        double df = docFreq(reader, terms[i]);
        tf = freqs[i] / fmax;
        idf = Math.log10((nDocs / df));
        Double tf_idf = tf * idf;
        tw[i] = new TermWeight(terms[i], tf_idf);
    }
    return tw;
}

From source file:action.indexing.IndexingTest.java

License:Apache License

public void testIndexReader() throws IOException {
    IndexReader reader = IndexReader.open(directory);
    assertEquals(ids.length, reader.maxDoc()); //8
    assertEquals(ids.length, reader.numDocs()); //8
    reader.close();/*from   w ww.j  a  v  a2  s  .c o m*/
}

From source file:at.lux.fotoretrieval.GraphDistanceVisualizationThread.java

License:Open Source License

public void run() {
    // TODO: make this based on files to support the visualization of result lists ...
    try {/*from w  ww.j  a  va2s .c om*/
        IndexReader ir = null;
        if (fileList.isEmpty()) {
            String indexDirectory = LucenePathIndexRetrievalEngine.parsePathIndexDirectory(dir);
            if (!IndexReader.indexExists(indexDirectory)) {
                JOptionPane.showMessageDialog(parent, "Chosen repositors directory does not exist.");
                return;
            } else {
                ir = IndexReader.open(indexDirectory);
                st = new SuffixTree(SuffixTree.RelationType.FullRelations);
                // a vector space model for nodes and triples ...
                gvs = new GraphVectorSimilarity(GraphVectorSimilarity.Type.BM25, 1);
                for (int i = 0; i < ir.numDocs(); i++) {
                    Graph g_idx = new Graph(ir.document(i).getField("graph").stringValue());
                    Field[] files = ir.document(i).getFields("file");
                    for (Field file1 : files) {
                        st.addCorpusDocument(SuffixTree.createSuffixTreeDocument(g_idx));
                        gvs.addToCorpus(g_idx);
                    }
                }

            }
        }

        parent.setEnabled(false);
        ProgressWindow pw;
        pw = new ProgressWindow(parent, progress);
        pw.pack();
        Dimension d = Toolkit.getDefaultToolkit().getScreenSize();
        pw.setLocation((d.width - pw.getWidth()) / 2, (d.height - pw.getHeight()) / 2);
        pw.setVisible(true);
        long stime, ftime;
        stime = System.currentTimeMillis();
        parent.setStatus("Loading Graphs");

        progress.setMinimum(0);
        progress.setMaximum(3);
        progress.setValue(0);

        if (fileList.size() > 0)
            prepareGraphAndFileLists();
        else if (ir != null)
            prepareGraphAndFileLists(ir);
        else {
            JOptionPane.showMessageDialog(parent, "No data found!");
            pw.setVisible(false);
            parent.setEnabled(true);
        }
        progress.setValue(1);

        try {
            // create matrixFastmapFastmap with mcs distance for graphs.
            FastmapDistanceMatrix matrixFastmap = createDistanceMatrix(graphList);

            FastMap fm = new FastMap(matrixFastmap, 2);
            progress.setValue(2);
            long ms = System.currentTimeMillis();
            fm.run();
            ms = System.currentTimeMillis() - ms;
            progress.setValue(3);
            System.out.println("Time for " + matrixFastmap.getDimension() + " images: " + ms + " ms");

            //        Vector results = engine.getSimilarImages(visualDescriptor, dir, recursive, progress);
            stime = System.currentTimeMillis() - stime;
            ftime = System.currentTimeMillis();
            parent.setStatus("Formatting results ...");
            Visualization2DPanelWithFdp panel = new Visualization2DPanelWithFdp(fm.getPoints(), matrixFastmap,
                    fileList, autoStartFDP);
            //        ResultsPanel rp = new ResultsPanel(results, progress);
            ftime = System.currentTimeMillis() - ftime;
            parent.addVisualization(panel);
            parent.setStatus("Searched for " + df.format(stime / 1000.0) + " s, formatting lasted "
                    + df.format(ftime / 1000.0) + " s");
        } catch (Exception e) {
            e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
        } finally {
            pw.setVisible(false);
            parent.setEnabled(true);
        }
    } catch (IOException e) {
        e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
    }

}

From source file:at.lux.fotoretrieval.GraphDistanceVisualizationThread.java

License:Open Source License

private void prepareGraphAndFileLists(IndexReader ir) throws IOException {
    graphList = new LinkedList<Graph>();
    for (int i = 0; i < ir.numDocs(); i++) {
        Graph g = new Graph(ir.document(i).getValues("graph")[0]);
        String graphFile = ir.document(i).getValues("file")[0];
        graphList.add(g);/*from  ww w .  j av  a2s .c o m*/
        fileList.add(graphFile);
    }
}

From source file:at.lux.fotoretrieval.retrievalengines.LucenePathIndexRetrievalEngineTest.java

License:Open Source License

public void testCreateIndex() {
    engine.indexFilesSemantically(pathToIndex, null);
    try {//from  w ww .  j  ava2  s . com
        IndexReader reader = IndexReader.open(pathToIndex + "/idx_paths");
        for (int i = 0; i < reader.numDocs(); i++) {
            System.out.println(reader.document(i).get("graph"));
        }
    } catch (IOException e) {
        e.printStackTrace();
        fail(e.toString());
    }
}

From source file:at.lux.fotoretrieval.retrievalengines.LucenePathIndexRetrievalEngineTest.java

License:Open Source License

public void testPrecisionAndRecall() {
    try {/*from  www.  ja  v a  2 s  .c o m*/
        String repository = "C:\\Java\\JavaProjects\\CaliphEmir\\testdata";
        //            String repository = "C:\\Dokumente und Einstellungen\\Mathias\\Eigene Dateien\\JavaProjects\\Caliph\\testdata";
        IndexSearcher is = new IndexSearcher(repository + "\\idx_paths");
        IndexReader ir = IndexReader.open(repository + "\\idx_paths");

        for (int i = 0; i < ir.numDocs(); i++) {
            testQuery(ir, new Graph(ir.document(i).getValues("graph")[0]), is);
        }

    } catch (IOException e) {
        e.printStackTrace();
        fail(e.toString());
    } catch (ParseException e) {
        e.printStackTrace();
    }
}

From source file:at.lux.fotoretrieval.retrievalengines.LucenePathIndexRetrievalEngineTest.java

License:Open Source License

public void testPrecisionAndRecallFullText() {
    try {//from  w  w  w  .j  a  v  a  2  s  . c o m
        String repository = "C:\\Java\\JavaProjects\\CaliphEmir\\testdata";
        //            String repository = "C:\\Dokumente und Einstellungen\\Mathias\\Eigene Dateien\\JavaProjects\\Caliph\\testdata";
        IndexSearcher is = new IndexSearcher(repository + "\\idx_paths");
        IndexReader ir = IndexReader.open(repository + "\\idx_paths");

        for (int i = 0; i < ir.numDocs(); i++) {
            testDirectQuery(ir, new Graph(ir.document(i).getValues("graph")[0]), is);
        }

    } catch (IOException e) {
        e.printStackTrace();
        fail(e.toString());
    } catch (ParseException e) {
        e.printStackTrace();
    }
}