Example usage for org.apache.lucene.index IndexReader document

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader document.

Prototype




public final Document document(int docID) throws IOException

Source Link

Document

Returns the stored fields of the n^th Document in this index.

Usage

From source file:at.lux.fotoretrieval.GraphDistanceVisualizationThread.java

License:Open Source License

public void run() {
    // TODO: make this based on files to support the visualization of result lists ...
    try {//from   ww  w  . j  a v  a2s . co m
        IndexReader ir = null;
        if (fileList.isEmpty()) {
            String indexDirectory = LucenePathIndexRetrievalEngine.parsePathIndexDirectory(dir);
            if (!IndexReader.indexExists(indexDirectory)) {
                JOptionPane.showMessageDialog(parent, "Chosen repositors directory does not exist.");
                return;
            } else {
                ir = IndexReader.open(indexDirectory);
                st = new SuffixTree(SuffixTree.RelationType.FullRelations);
                // a vector space model for nodes and triples ...
                gvs = new GraphVectorSimilarity(GraphVectorSimilarity.Type.BM25, 1);
                for (int i = 0; i < ir.numDocs(); i++) {
                    Graph g_idx = new Graph(ir.document(i).getField("graph").stringValue());
                    Field[] files = ir.document(i).getFields("file");
                    for (Field file1 : files) {
                        st.addCorpusDocument(SuffixTree.createSuffixTreeDocument(g_idx));
                        gvs.addToCorpus(g_idx);
                    }
                }

            }
        }

        parent.setEnabled(false);
        ProgressWindow pw;
        pw = new ProgressWindow(parent, progress);
        pw.pack();
        Dimension d = Toolkit.getDefaultToolkit().getScreenSize();
        pw.setLocation((d.width - pw.getWidth()) / 2, (d.height - pw.getHeight()) / 2);
        pw.setVisible(true);
        long stime, ftime;
        stime = System.currentTimeMillis();
        parent.setStatus("Loading Graphs");

        progress.setMinimum(0);
        progress.setMaximum(3);
        progress.setValue(0);

        if (fileList.size() > 0)
            prepareGraphAndFileLists();
        else if (ir != null)
            prepareGraphAndFileLists(ir);
        else {
            JOptionPane.showMessageDialog(parent, "No data found!");
            pw.setVisible(false);
            parent.setEnabled(true);
        }
        progress.setValue(1);

        try {
            // create matrixFastmapFastmap with mcs distance for graphs.
            FastmapDistanceMatrix matrixFastmap = createDistanceMatrix(graphList);

            FastMap fm = new FastMap(matrixFastmap, 2);
            progress.setValue(2);
            long ms = System.currentTimeMillis();
            fm.run();
            ms = System.currentTimeMillis() - ms;
            progress.setValue(3);
            System.out.println("Time for " + matrixFastmap.getDimension() + " images: " + ms + " ms");

            //        Vector results = engine.getSimilarImages(visualDescriptor, dir, recursive, progress);
            stime = System.currentTimeMillis() - stime;
            ftime = System.currentTimeMillis();
            parent.setStatus("Formatting results ...");
            Visualization2DPanelWithFdp panel = new Visualization2DPanelWithFdp(fm.getPoints(), matrixFastmap,
                    fileList, autoStartFDP);
            //        ResultsPanel rp = new ResultsPanel(results, progress);
            ftime = System.currentTimeMillis() - ftime;
            parent.addVisualization(panel);
            parent.setStatus("Searched for " + df.format(stime / 1000.0) + " s, formatting lasted "
                    + df.format(ftime / 1000.0) + " s");
        } catch (Exception e) {
            e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
        } finally {
            pw.setVisible(false);
            parent.setEnabled(true);
        }
    } catch (IOException e) {
        e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
    }

}

From source file:at.lux.fotoretrieval.GraphDistanceVisualizationThread.java

License:Open Source License

private void prepareGraphAndFileLists(IndexReader ir) throws IOException {
    graphList = new LinkedList<Graph>();
    for (int i = 0; i < ir.numDocs(); i++) {
        Graph g = new Graph(ir.document(i).getValues("graph")[0]);
        String graphFile = ir.document(i).getValues("file")[0];
        graphList.add(g);/* w w  w  .j ava  2  s .com*/
        fileList.add(graphFile);
    }
}

From source file:at.lux.fotoretrieval.retrievalengines.LucenePathIndexRetrievalEngineTest.java

License:Open Source License

public void testCreateIndex() {
    engine.indexFilesSemantically(pathToIndex, null);
    try {//from w  w  w  . ja v  a 2  s  .  c  om
        IndexReader reader = IndexReader.open(pathToIndex + "/idx_paths");
        for (int i = 0; i < reader.numDocs(); i++) {
            System.out.println(reader.document(i).get("graph"));
        }
    } catch (IOException e) {
        e.printStackTrace();
        fail(e.toString());
    }
}

From source file:at.lux.fotoretrieval.retrievalengines.LucenePathIndexRetrievalEngineTest.java

License:Open Source License

public void testPrecisionAndRecall() {
    try {//ww w  . j av a2s  . c o  m
        String repository = "C:\\Java\\JavaProjects\\CaliphEmir\\testdata";
        //            String repository = "C:\\Dokumente und Einstellungen\\Mathias\\Eigene Dateien\\JavaProjects\\Caliph\\testdata";
        IndexSearcher is = new IndexSearcher(repository + "\\idx_paths");
        IndexReader ir = IndexReader.open(repository + "\\idx_paths");

        for (int i = 0; i < ir.numDocs(); i++) {
            testQuery(ir, new Graph(ir.document(i).getValues("graph")[0]), is);
        }

    } catch (IOException e) {
        e.printStackTrace();
        fail(e.toString());
    } catch (ParseException e) {
        e.printStackTrace();
    }
}

From source file:at.lux.fotoretrieval.retrievalengines.LucenePathIndexRetrievalEngineTest.java

License:Open Source License

public void testPrecisionAndRecallFullText() {
    try {//from  ww w  . ja v a 2 s . c  o  m
        String repository = "C:\\Java\\JavaProjects\\CaliphEmir\\testdata";
        //            String repository = "C:\\Dokumente und Einstellungen\\Mathias\\Eigene Dateien\\JavaProjects\\Caliph\\testdata";
        IndexSearcher is = new IndexSearcher(repository + "\\idx_paths");
        IndexReader ir = IndexReader.open(repository + "\\idx_paths");

        for (int i = 0; i < ir.numDocs(); i++) {
            testDirectQuery(ir, new Graph(ir.document(i).getValues("graph")[0]), is);
        }

    } catch (IOException e) {
        e.printStackTrace();
        fail(e.toString());
    } catch (ParseException e) {
        e.printStackTrace();
    }
}

From source file:at.lux.fotoretrieval.retrievalengines.LucenePathIndexRetrievalEngineTest.java

License:Open Source License

private void testQuery(IndexReader ir, Graph query, IndexSearcher is) throws IOException, ParseException {
    // create results from mcs:
    LinkedList<ResultHolder> resultsMcs = new LinkedList<ResultHolder>();
    for (int j = 0; j < ir.numDocs(); j++) {
        Graph model = new Graph(ir.document(j).getValues("graph")[0]);
        float mcsSimilarity = query.getMcsSimilarity(model);
        resultsMcs.add(new ResultHolder(j, model.toString(), mcsSimilarity));
    }/*  w  w  w  .ja  v a2  s  .c o  m*/
    Collections.sort(resultsMcs);
    //            for (Iterator<ResultHolder> iterator = resultsMcs.iterator(); iterator.hasNext();) {
    //                ResultHolder r = iterator.next();
    //                System.out.println(r.getDocumentNumber() + ": " + r.getSimilarity());
    //            }

    // create results from search:

    // set to another similarity if necessary:
    is.setSimilarity(new TermFrequencySimilarity());
    //        is.setSimilarity(new SimpleTfIdfSimilarity());

    LucenePathIndexRetrievalEngine engine = new LucenePathIndexRetrievalEngine(50);
    String gQuery = LucenePathIndexRetrievalEngine.createLucenePathQuery(query);
    //        System.out.println(query);
    QueryParser qParse = new QueryParser("paths", new WhitespaceAnalyzer());
    Query q = qParse.parse(gQuery);
    Hits hits = is.search(q);
    LinkedList<ResultHolder> resultsSearch = new LinkedList<ResultHolder>();
    for (int i = 0; i < hits.length(); i++) {
        String graph = hits.doc(i).getValues("graph")[0];
        int docID = -1;
        for (int j = 0; j < ir.numDocs(); j++) {
            Graph model = new Graph(ir.document(j).getValues("graph")[0]);
            if (model.toString().equals(graph))
                docID = j;
        }
        resultsSearch.add(new ResultHolder(docID, graph, hits.score(i)));
    }
    Collections.sort(resultsSearch);
    printPrecisionRecallPlot(resultsMcs, resultsSearch);
}

From source file:at.lux.fotoretrieval.retrievalengines.LucenePathIndexRetrievalEngineTest.java

License:Open Source License

private void testDirectQuery(IndexReader ir, Graph query, IndexSearcher is) throws IOException, ParseException {
    IndexReader reader = IndexReader.open("C:\\Java\\JavaProjects\\CaliphEmir\\testdata\\idx_semantic");
    IndexSearcher searcher = new IndexSearcher("C:\\Java\\JavaProjects\\CaliphEmir\\testdata\\idx_fulltext");

    HashMap<Integer, String> node2label = new HashMap<Integer, String>();
    for (int j = 0; j < reader.numDocs(); j++) {
        String id = reader.document(j).getValues("id")[0];
        String label = reader.document(j).getValues("label")[0];
        node2label.put(Integer.parseInt(id), label);
    }/*from www  . j ava  2 s  .co m*/
    // create results from mcs:
    LinkedList<ResultHolder> resultsMcs = new LinkedList<ResultHolder>();
    for (int j = 0; j < ir.numDocs(); j++) {
        Graph model = new Graph(ir.document(j).getValues("graph")[0]);
        float mcsSimilarity = query.getMcsSimilarity(model);
        String[] file = ir.document(j).getValues("file");
        for (int i = 0; i < file.length; i++) {
            String s = file[i];
            resultsMcs.add(new ResultHolder(mcsSimilarity, s));
        }
    }
    Collections.sort(resultsMcs);
    //            for (Iterator<ResultHolder> iterator = resultsMcs.iterator(); iterator.hasNext();) {
    //                ResultHolder r = iterator.next();
    //                System.out.println(r.getDocumentNumber() + ": " + r.getSimilarity());
    //            }

    // create results from search:
    StringBuilder qBuilder = new StringBuilder(64);
    for (Iterator<Node> iterator = query.getNodes().iterator(); iterator.hasNext();) {
        Node node = iterator.next();
        //            qBuilder.append("\"");
        qBuilder.append(node2label.get(node.getNodeID()));
        qBuilder.append(" ");
        //            qBuilder.append("\" ");
    }
    //        System.out.println(query);
    QueryParser qParse = new QueryParser("all", new WhitespaceAnalyzer());
    Query q = qParse.parse(qBuilder.toString());
    Hits hits = searcher.search(q);
    LinkedList<ResultHolder> resultsSearch = new LinkedList<ResultHolder>();
    for (int i = 0; i < hits.length(); i++) {
        String graph = hits.doc(i).getValues("file")[0];
        //            int docID = -1;
        //            for (int j = 0; j < ir.numDocs(); j++) {
        //                Graph model = new Graph(ir.document(j).getValues("graph")[0]);
        //                if (model.toString().equals(graph)) docID = j;
        //            }
        resultsSearch.add(new ResultHolder(hits.score(i), graph));
    }
    Collections.sort(resultsSearch);
    printPrecisionRecallPlotFileBased(resultsMcs, resultsSearch);
}

From source file:at.lux.retrieval.vectorspace.ElementTextVectorSimilarityTest.java

License:Open Source License

public void testSimilarity() throws IOException, JDOMException {
    ElementTextVectorSimilarity sim = new ElementTextVectorSimilarity();
    double distance = sim.getSimilarity(d1, d1);
    System.out.println("distance = " + distance);
    distance = sim.getSimilarity(d1, d2);
    System.out.println("distance = " + distance);
    distance = sim.getSimilarity(d2, d1);
    System.out.println("distance = " + distance);

    IndexReader reader = IndexReader.open("testdata/idx_paths");

    System.out.println("Loading documents and adding them to corpus ...");
    for (int i = 0; i < reader.numDocs(); i++) {
        //            Graph g_idx = new Graph(reader.document(i).getField("graph").stringValue());
        Field[] files = reader.document(i).getFields("file");
        for (Field file : files) {
            Document d = saxBuilder.build(file.stringValue());
            sim.addToCorpus(d);//from   w  w w. j a v a  2  s  . c  o m
        }
    }

    System.out.println("");

    distance = sim.getSimilarity(d1, d1, ElementTextVectorSimilarity.WeightType.TfIdf);
    System.out.println("distance = " + distance);
    distance = sim.getSimilarity(d1, d2, ElementTextVectorSimilarity.WeightType.TfIdf);
    System.out.println("distance = " + distance);
    distance = sim.getSimilarity(d2, d1, ElementTextVectorSimilarity.WeightType.TfIdf);
    System.out.println("distance = " + distance);
    distance = sim.getSimilarity(d2, d2, ElementTextVectorSimilarity.WeightType.TfIdf);
    System.out.println("distance = " + distance);

    System.out.println("");

    distance = sim.getSimilarity(d1, d1, ElementTextVectorSimilarity.WeightType.BM25);
    System.out.println("distance = " + distance);
    distance = sim.getSimilarity(d1, d2, ElementTextVectorSimilarity.WeightType.BM25);
    System.out.println("distance = " + distance);
    distance = sim.getSimilarity(d2, d1, ElementTextVectorSimilarity.WeightType.BM25);
    System.out.println("distance = " + distance);
    distance = sim.getSimilarity(d2, d2, ElementTextVectorSimilarity.WeightType.BM25);
    System.out.println("distance = " + distance);

}

From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.termExtract.LuceneTopTermExtract.java

License:Apache License

/**
 * Returns the 10 most important terms in the document with the specified
 * id./*from  w  w w .j  ava  2s .com*/
 *
 * @param docNr
 * @param reader
 * @param numberOfTerms
 * @return
 */
public Map<String, Double> getTopTerms(int docNr, IndexReader reader, int numberOfTerms) {
    try {
        Map<String, Double> termFreq = new HashMap<>(200);
        Map<String, Integer> docFreqs = new HashMap<>(200);

        Document doc = reader.document(docNr);

        updateFrequenciesMapsForReader(termFreq, docFreqs, "title", doc.get("title"), reader, 2);
        updateFrequenciesMapsForReader(termFreq, docFreqs, "description", doc.get("description"), reader, 1.5);
        updateFrequenciesMapsForReader(termFreq, docFreqs, "text", doc.get("text"), reader, 1);

        updateFrequenciesMapsForRegex(termFreq, docFreqs, "title", doc.get("title"), reader, 2);
        updateFrequenciesMapsForRegex(termFreq, docFreqs, "description", doc.get("description"), reader, 1.5);
        updateFrequenciesMapsForRegex(termFreq, docFreqs, "text", doc.get("text"), reader, 1);

        PriorityQueue<TermScorePair> pq = getTermScores(termFreq, docFreqs, reader);

        int n = (pq.size() < numberOfTerms ? pq.size() : numberOfTerms);
        int i = 0;
        TermScorePair tsp = pq.poll();
        Map<String, Double> returnTerms = new HashMap<>(n);
        while (i < n && tsp != null) {
            returnTerms.put(tsp.getTerm(), tsp.getScore());
            tsp = pq.poll();
            i++;
        }
        return returnTerms;
    } catch (IOException ex) {
        logger.error(ex);
        return new HashMap<>(0);
    }
}

From source file:be.ugent.tiwi.sleroux.newsrec.recommendationstester.LuceneTopTermExtract.java

License:Apache License

public Map<String, Double> getTopTerms(int docNr, IndexReader reader, int numberOfTerms) {
    try {/* w  ww  . j a v a2s .  co  m*/
        Map<String, Double> termFreq = new HashMap<>(200);
        Map<String, Integer> docFreqs = new HashMap<>(200);

        Document doc = reader.document(docNr);

        updateFrequenciesMaps(termFreq, docFreqs, "title", doc.get("title"), reader, 2);
        updateFrequenciesMaps(termFreq, docFreqs, "description", doc.get("description"), reader, 1.5);
        updateFrequenciesMaps(termFreq, docFreqs, "text", doc.get("text"), reader, 1);

        PriorityQueue<TermScorePair> pq = getTermScores(termFreq, docFreqs, reader);

        int n = (pq.size() < numberOfTerms ? pq.size() : numberOfTerms);
        int i = 0;
        TermScorePair tsp = pq.poll();
        Map<String, Double> returnTerms = new HashMap<>(n);
        while (i < n && tsp != null) {
            returnTerms.put(tsp.getTerm(), tsp.getScore());
            tsp = pq.poll();
            i++;
        }
        return returnTerms;
    } catch (IOException ex) {
        logger.error(ex);
        return new HashMap<>(0);
    }
}