List of usage examples for org.apache.lucene.index IndexReader document
public final Document document(int docID) throws IOException
n
th Document
in this index. From source file:at.lux.fotoretrieval.GraphDistanceVisualizationThread.java
License:Open Source License
public void run() { // TODO: make this based on files to support the visualization of result lists ... try {//from ww w . j a v a2s . co m IndexReader ir = null; if (fileList.isEmpty()) { String indexDirectory = LucenePathIndexRetrievalEngine.parsePathIndexDirectory(dir); if (!IndexReader.indexExists(indexDirectory)) { JOptionPane.showMessageDialog(parent, "Chosen repositors directory does not exist."); return; } else { ir = IndexReader.open(indexDirectory); st = new SuffixTree(SuffixTree.RelationType.FullRelations); // a vector space model for nodes and triples ... gvs = new GraphVectorSimilarity(GraphVectorSimilarity.Type.BM25, 1); for (int i = 0; i < ir.numDocs(); i++) { Graph g_idx = new Graph(ir.document(i).getField("graph").stringValue()); Field[] files = ir.document(i).getFields("file"); for (Field file1 : files) { st.addCorpusDocument(SuffixTree.createSuffixTreeDocument(g_idx)); gvs.addToCorpus(g_idx); } } } } parent.setEnabled(false); ProgressWindow pw; pw = new ProgressWindow(parent, progress); pw.pack(); Dimension d = Toolkit.getDefaultToolkit().getScreenSize(); pw.setLocation((d.width - pw.getWidth()) / 2, (d.height - pw.getHeight()) / 2); pw.setVisible(true); long stime, ftime; stime = System.currentTimeMillis(); parent.setStatus("Loading Graphs"); progress.setMinimum(0); progress.setMaximum(3); progress.setValue(0); if (fileList.size() > 0) prepareGraphAndFileLists(); else if (ir != null) prepareGraphAndFileLists(ir); else { JOptionPane.showMessageDialog(parent, "No data found!"); pw.setVisible(false); parent.setEnabled(true); } progress.setValue(1); try { // create matrixFastmapFastmap with mcs distance for graphs. FastmapDistanceMatrix matrixFastmap = createDistanceMatrix(graphList); FastMap fm = new FastMap(matrixFastmap, 2); progress.setValue(2); long ms = System.currentTimeMillis(); fm.run(); ms = System.currentTimeMillis() - ms; progress.setValue(3); System.out.println("Time for " + matrixFastmap.getDimension() + " images: " + ms + " ms"); // Vector results = engine.getSimilarImages(visualDescriptor, dir, recursive, progress); stime = System.currentTimeMillis() - stime; ftime = System.currentTimeMillis(); parent.setStatus("Formatting results ..."); Visualization2DPanelWithFdp panel = new Visualization2DPanelWithFdp(fm.getPoints(), matrixFastmap, fileList, autoStartFDP); // ResultsPanel rp = new ResultsPanel(results, progress); ftime = System.currentTimeMillis() - ftime; parent.addVisualization(panel); parent.setStatus("Searched for " + df.format(stime / 1000.0) + " s, formatting lasted " + df.format(ftime / 1000.0) + " s"); } catch (Exception e) { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. } finally { pw.setVisible(false); parent.setEnabled(true); } } catch (IOException e) { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. } }
From source file:at.lux.fotoretrieval.GraphDistanceVisualizationThread.java
License:Open Source License
private void prepareGraphAndFileLists(IndexReader ir) throws IOException { graphList = new LinkedList<Graph>(); for (int i = 0; i < ir.numDocs(); i++) { Graph g = new Graph(ir.document(i).getValues("graph")[0]); String graphFile = ir.document(i).getValues("file")[0]; graphList.add(g);/* w w w .j ava 2 s .com*/ fileList.add(graphFile); } }
From source file:at.lux.fotoretrieval.retrievalengines.LucenePathIndexRetrievalEngineTest.java
License:Open Source License
public void testCreateIndex() { engine.indexFilesSemantically(pathToIndex, null); try {//from w w w . ja v a 2 s . c om IndexReader reader = IndexReader.open(pathToIndex + "/idx_paths"); for (int i = 0; i < reader.numDocs(); i++) { System.out.println(reader.document(i).get("graph")); } } catch (IOException e) { e.printStackTrace(); fail(e.toString()); } }
From source file:at.lux.fotoretrieval.retrievalengines.LucenePathIndexRetrievalEngineTest.java
License:Open Source License
public void testPrecisionAndRecall() { try {//ww w . j av a2s . c o m String repository = "C:\\Java\\JavaProjects\\CaliphEmir\\testdata"; // String repository = "C:\\Dokumente und Einstellungen\\Mathias\\Eigene Dateien\\JavaProjects\\Caliph\\testdata"; IndexSearcher is = new IndexSearcher(repository + "\\idx_paths"); IndexReader ir = IndexReader.open(repository + "\\idx_paths"); for (int i = 0; i < ir.numDocs(); i++) { testQuery(ir, new Graph(ir.document(i).getValues("graph")[0]), is); } } catch (IOException e) { e.printStackTrace(); fail(e.toString()); } catch (ParseException e) { e.printStackTrace(); } }
From source file:at.lux.fotoretrieval.retrievalengines.LucenePathIndexRetrievalEngineTest.java
License:Open Source License
public void testPrecisionAndRecallFullText() { try {//from ww w . ja v a 2 s . c o m String repository = "C:\\Java\\JavaProjects\\CaliphEmir\\testdata"; // String repository = "C:\\Dokumente und Einstellungen\\Mathias\\Eigene Dateien\\JavaProjects\\Caliph\\testdata"; IndexSearcher is = new IndexSearcher(repository + "\\idx_paths"); IndexReader ir = IndexReader.open(repository + "\\idx_paths"); for (int i = 0; i < ir.numDocs(); i++) { testDirectQuery(ir, new Graph(ir.document(i).getValues("graph")[0]), is); } } catch (IOException e) { e.printStackTrace(); fail(e.toString()); } catch (ParseException e) { e.printStackTrace(); } }
From source file:at.lux.fotoretrieval.retrievalengines.LucenePathIndexRetrievalEngineTest.java
License:Open Source License
private void testQuery(IndexReader ir, Graph query, IndexSearcher is) throws IOException, ParseException { // create results from mcs: LinkedList<ResultHolder> resultsMcs = new LinkedList<ResultHolder>(); for (int j = 0; j < ir.numDocs(); j++) { Graph model = new Graph(ir.document(j).getValues("graph")[0]); float mcsSimilarity = query.getMcsSimilarity(model); resultsMcs.add(new ResultHolder(j, model.toString(), mcsSimilarity)); }/* w w w .ja v a2 s .c o m*/ Collections.sort(resultsMcs); // for (Iterator<ResultHolder> iterator = resultsMcs.iterator(); iterator.hasNext();) { // ResultHolder r = iterator.next(); // System.out.println(r.getDocumentNumber() + ": " + r.getSimilarity()); // } // create results from search: // set to another similarity if necessary: is.setSimilarity(new TermFrequencySimilarity()); // is.setSimilarity(new SimpleTfIdfSimilarity()); LucenePathIndexRetrievalEngine engine = new LucenePathIndexRetrievalEngine(50); String gQuery = LucenePathIndexRetrievalEngine.createLucenePathQuery(query); // System.out.println(query); QueryParser qParse = new QueryParser("paths", new WhitespaceAnalyzer()); Query q = qParse.parse(gQuery); Hits hits = is.search(q); LinkedList<ResultHolder> resultsSearch = new LinkedList<ResultHolder>(); for (int i = 0; i < hits.length(); i++) { String graph = hits.doc(i).getValues("graph")[0]; int docID = -1; for (int j = 0; j < ir.numDocs(); j++) { Graph model = new Graph(ir.document(j).getValues("graph")[0]); if (model.toString().equals(graph)) docID = j; } resultsSearch.add(new ResultHolder(docID, graph, hits.score(i))); } Collections.sort(resultsSearch); printPrecisionRecallPlot(resultsMcs, resultsSearch); }
From source file:at.lux.fotoretrieval.retrievalengines.LucenePathIndexRetrievalEngineTest.java
License:Open Source License
private void testDirectQuery(IndexReader ir, Graph query, IndexSearcher is) throws IOException, ParseException { IndexReader reader = IndexReader.open("C:\\Java\\JavaProjects\\CaliphEmir\\testdata\\idx_semantic"); IndexSearcher searcher = new IndexSearcher("C:\\Java\\JavaProjects\\CaliphEmir\\testdata\\idx_fulltext"); HashMap<Integer, String> node2label = new HashMap<Integer, String>(); for (int j = 0; j < reader.numDocs(); j++) { String id = reader.document(j).getValues("id")[0]; String label = reader.document(j).getValues("label")[0]; node2label.put(Integer.parseInt(id), label); }/*from www . j ava 2 s .co m*/ // create results from mcs: LinkedList<ResultHolder> resultsMcs = new LinkedList<ResultHolder>(); for (int j = 0; j < ir.numDocs(); j++) { Graph model = new Graph(ir.document(j).getValues("graph")[0]); float mcsSimilarity = query.getMcsSimilarity(model); String[] file = ir.document(j).getValues("file"); for (int i = 0; i < file.length; i++) { String s = file[i]; resultsMcs.add(new ResultHolder(mcsSimilarity, s)); } } Collections.sort(resultsMcs); // for (Iterator<ResultHolder> iterator = resultsMcs.iterator(); iterator.hasNext();) { // ResultHolder r = iterator.next(); // System.out.println(r.getDocumentNumber() + ": " + r.getSimilarity()); // } // create results from search: StringBuilder qBuilder = new StringBuilder(64); for (Iterator<Node> iterator = query.getNodes().iterator(); iterator.hasNext();) { Node node = iterator.next(); // qBuilder.append("\""); qBuilder.append(node2label.get(node.getNodeID())); qBuilder.append(" "); // qBuilder.append("\" "); } // System.out.println(query); QueryParser qParse = new QueryParser("all", new WhitespaceAnalyzer()); Query q = qParse.parse(qBuilder.toString()); Hits hits = searcher.search(q); LinkedList<ResultHolder> resultsSearch = new LinkedList<ResultHolder>(); for (int i = 0; i < hits.length(); i++) { String graph = hits.doc(i).getValues("file")[0]; // int docID = -1; // for (int j = 0; j < ir.numDocs(); j++) { // Graph model = new Graph(ir.document(j).getValues("graph")[0]); // if (model.toString().equals(graph)) docID = j; // } resultsSearch.add(new ResultHolder(hits.score(i), graph)); } Collections.sort(resultsSearch); printPrecisionRecallPlotFileBased(resultsMcs, resultsSearch); }
From source file:at.lux.retrieval.vectorspace.ElementTextVectorSimilarityTest.java
License:Open Source License
public void testSimilarity() throws IOException, JDOMException { ElementTextVectorSimilarity sim = new ElementTextVectorSimilarity(); double distance = sim.getSimilarity(d1, d1); System.out.println("distance = " + distance); distance = sim.getSimilarity(d1, d2); System.out.println("distance = " + distance); distance = sim.getSimilarity(d2, d1); System.out.println("distance = " + distance); IndexReader reader = IndexReader.open("testdata/idx_paths"); System.out.println("Loading documents and adding them to corpus ..."); for (int i = 0; i < reader.numDocs(); i++) { // Graph g_idx = new Graph(reader.document(i).getField("graph").stringValue()); Field[] files = reader.document(i).getFields("file"); for (Field file : files) { Document d = saxBuilder.build(file.stringValue()); sim.addToCorpus(d);//from w w w. j a v a 2 s . c o m } } System.out.println(""); distance = sim.getSimilarity(d1, d1, ElementTextVectorSimilarity.WeightType.TfIdf); System.out.println("distance = " + distance); distance = sim.getSimilarity(d1, d2, ElementTextVectorSimilarity.WeightType.TfIdf); System.out.println("distance = " + distance); distance = sim.getSimilarity(d2, d1, ElementTextVectorSimilarity.WeightType.TfIdf); System.out.println("distance = " + distance); distance = sim.getSimilarity(d2, d2, ElementTextVectorSimilarity.WeightType.TfIdf); System.out.println("distance = " + distance); System.out.println(""); distance = sim.getSimilarity(d1, d1, ElementTextVectorSimilarity.WeightType.BM25); System.out.println("distance = " + distance); distance = sim.getSimilarity(d1, d2, ElementTextVectorSimilarity.WeightType.BM25); System.out.println("distance = " + distance); distance = sim.getSimilarity(d2, d1, ElementTextVectorSimilarity.WeightType.BM25); System.out.println("distance = " + distance); distance = sim.getSimilarity(d2, d2, ElementTextVectorSimilarity.WeightType.BM25); System.out.println("distance = " + distance); }
From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.termExtract.LuceneTopTermExtract.java
License:Apache License
/** * Returns the 10 most important terms in the document with the specified * id./*from w w w .j ava 2s .com*/ * * @param docNr * @param reader * @param numberOfTerms * @return */ public Map<String, Double> getTopTerms(int docNr, IndexReader reader, int numberOfTerms) { try { Map<String, Double> termFreq = new HashMap<>(200); Map<String, Integer> docFreqs = new HashMap<>(200); Document doc = reader.document(docNr); updateFrequenciesMapsForReader(termFreq, docFreqs, "title", doc.get("title"), reader, 2); updateFrequenciesMapsForReader(termFreq, docFreqs, "description", doc.get("description"), reader, 1.5); updateFrequenciesMapsForReader(termFreq, docFreqs, "text", doc.get("text"), reader, 1); updateFrequenciesMapsForRegex(termFreq, docFreqs, "title", doc.get("title"), reader, 2); updateFrequenciesMapsForRegex(termFreq, docFreqs, "description", doc.get("description"), reader, 1.5); updateFrequenciesMapsForRegex(termFreq, docFreqs, "text", doc.get("text"), reader, 1); PriorityQueue<TermScorePair> pq = getTermScores(termFreq, docFreqs, reader); int n = (pq.size() < numberOfTerms ? pq.size() : numberOfTerms); int i = 0; TermScorePair tsp = pq.poll(); Map<String, Double> returnTerms = new HashMap<>(n); while (i < n && tsp != null) { returnTerms.put(tsp.getTerm(), tsp.getScore()); tsp = pq.poll(); i++; } return returnTerms; } catch (IOException ex) { logger.error(ex); return new HashMap<>(0); } }
From source file:be.ugent.tiwi.sleroux.newsrec.recommendationstester.LuceneTopTermExtract.java
License:Apache License
public Map<String, Double> getTopTerms(int docNr, IndexReader reader, int numberOfTerms) { try {/* w ww . j a v a2s . co m*/ Map<String, Double> termFreq = new HashMap<>(200); Map<String, Integer> docFreqs = new HashMap<>(200); Document doc = reader.document(docNr); updateFrequenciesMaps(termFreq, docFreqs, "title", doc.get("title"), reader, 2); updateFrequenciesMaps(termFreq, docFreqs, "description", doc.get("description"), reader, 1.5); updateFrequenciesMaps(termFreq, docFreqs, "text", doc.get("text"), reader, 1); PriorityQueue<TermScorePair> pq = getTermScores(termFreq, docFreqs, reader); int n = (pq.size() < numberOfTerms ? pq.size() : numberOfTerms); int i = 0; TermScorePair tsp = pq.poll(); Map<String, Double> returnTerms = new HashMap<>(n); while (i < n && tsp != null) { returnTerms.put(tsp.getTerm(), tsp.getScore()); tsp = pq.poll(); i++; } return returnTerms; } catch (IOException ex) { logger.error(ex); return new HashMap<>(0); } }