List of usage examples for org.apache.lucene.index IndexReader numDocs
public abstract int numDocs();
From source file:ContentBasedAnalysis.java
License:Apache License
private static TermWeight[] toTfIdf(IndexReader reader, int docId) throws Exception { // get Lucene representation of a Term-Frequency vector TermFreqVector tfv = reader.getTermFreqVector(docId, "contents"); String[] terms = tfv.getTerms(); int[] freqs = tfv.getTermFrequencies(); TermWeight[] tw = new TermWeight[terms.length]; // Maximum Frequency of a term in the document int fmax = freqs[0]; for (int i = 1; i < freqs.length; i++) { if (freqs[i] > fmax) fmax = freqs[i];//from w w w.j a v a 2 s . c o m } // number of docs in the index int nDocs = reader.numDocs(); for (int i = 0; i < tw.length; i++) { tw[i] = new TermWeight(terms[i]); } return tw; }
From source file:TestWang.java
License:Open Source License
private Document findDoc(IndexReader reader, String file) throws IOException { for (int i = 0; i < reader.numDocs(); i++) { Document document = reader.document(i); String s = document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; if (s.endsWith(File.separator + file)) { // System.out.println("s = " + s); return document; }/*from ww w .ja v a 2 s. c o m*/ } return null; }
From source file:TestWang.java
License:Open Source License
private Document[] findDocs(IndexReader reader, String[] file) throws IOException { Document[] result = new Document[file.length]; for (int i = 0; i < reader.numDocs(); i++) { Document document = reader.document(i); String s = document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; for (int j = 0; j < result.length; j++) { if (s.endsWith("\\" + file[j])) { // System.out.println("s = " + s); result[j] = document;/* w w w . j a v a2 s. co m*/ } } } return result; }
From source file:TfIdfViewer.java
License:Apache License
private static TermWeight[] toTfIdf(IndexReader reader, int docId) throws Exception { // get Lucene representation of a Term-Frequency vector TermFreqVector tfv = reader.getTermFreqVector(docId, "contents"); // split it into two Arrays: one for terms, one for frequencies; // Lucene guarantees that terms are sorted String[] terms = tfv.getTerms(); int[] freqs = tfv.getTermFrequencies(); TermWeight[] tw = new TermWeight[terms.length]; // compute the maximum frequence of a term in the document double fmax = freqs[0]; for (int i = 1; i < freqs.length; i++) { if (freqs[i] > fmax) fmax = freqs[i];/*from ww w . j a v a 2 s .c om*/ } // number of docs in the index int nDocs = reader.numDocs(); Double tf; Double idf; for (int i = 0; i < tw.length; i++) { double df = docFreq(reader, terms[i]); tf = freqs[i] / fmax; idf = Math.log10((nDocs / df)); Double tf_idf = tf * idf; tw[i] = new TermWeight(terms[i], tf_idf); } return tw; }
From source file:action.indexing.IndexingTest.java
License:Apache License
public void testIndexReader() throws IOException { IndexReader reader = IndexReader.open(directory); assertEquals(ids.length, reader.maxDoc()); //8 assertEquals(ids.length, reader.numDocs()); //8 reader.close();/*from w ww.j a v a2 s .c o m*/ }
From source file:at.lux.fotoretrieval.GraphDistanceVisualizationThread.java
License:Open Source License
public void run() { // TODO: make this based on files to support the visualization of result lists ... try {/*from w ww.j a va2s .c om*/ IndexReader ir = null; if (fileList.isEmpty()) { String indexDirectory = LucenePathIndexRetrievalEngine.parsePathIndexDirectory(dir); if (!IndexReader.indexExists(indexDirectory)) { JOptionPane.showMessageDialog(parent, "Chosen repositors directory does not exist."); return; } else { ir = IndexReader.open(indexDirectory); st = new SuffixTree(SuffixTree.RelationType.FullRelations); // a vector space model for nodes and triples ... gvs = new GraphVectorSimilarity(GraphVectorSimilarity.Type.BM25, 1); for (int i = 0; i < ir.numDocs(); i++) { Graph g_idx = new Graph(ir.document(i).getField("graph").stringValue()); Field[] files = ir.document(i).getFields("file"); for (Field file1 : files) { st.addCorpusDocument(SuffixTree.createSuffixTreeDocument(g_idx)); gvs.addToCorpus(g_idx); } } } } parent.setEnabled(false); ProgressWindow pw; pw = new ProgressWindow(parent, progress); pw.pack(); Dimension d = Toolkit.getDefaultToolkit().getScreenSize(); pw.setLocation((d.width - pw.getWidth()) / 2, (d.height - pw.getHeight()) / 2); pw.setVisible(true); long stime, ftime; stime = System.currentTimeMillis(); parent.setStatus("Loading Graphs"); progress.setMinimum(0); progress.setMaximum(3); progress.setValue(0); if (fileList.size() > 0) prepareGraphAndFileLists(); else if (ir != null) prepareGraphAndFileLists(ir); else { JOptionPane.showMessageDialog(parent, "No data found!"); pw.setVisible(false); parent.setEnabled(true); } progress.setValue(1); try { // create matrixFastmapFastmap with mcs distance for graphs. FastmapDistanceMatrix matrixFastmap = createDistanceMatrix(graphList); FastMap fm = new FastMap(matrixFastmap, 2); progress.setValue(2); long ms = System.currentTimeMillis(); fm.run(); ms = System.currentTimeMillis() - ms; progress.setValue(3); System.out.println("Time for " + matrixFastmap.getDimension() + " images: " + ms + " ms"); // Vector results = engine.getSimilarImages(visualDescriptor, dir, recursive, progress); stime = System.currentTimeMillis() - stime; ftime = System.currentTimeMillis(); parent.setStatus("Formatting results ..."); Visualization2DPanelWithFdp panel = new Visualization2DPanelWithFdp(fm.getPoints(), matrixFastmap, fileList, autoStartFDP); // ResultsPanel rp = new ResultsPanel(results, progress); ftime = System.currentTimeMillis() - ftime; parent.addVisualization(panel); parent.setStatus("Searched for " + df.format(stime / 1000.0) + " s, formatting lasted " + df.format(ftime / 1000.0) + " s"); } catch (Exception e) { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. } finally { pw.setVisible(false); parent.setEnabled(true); } } catch (IOException e) { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. } }
From source file:at.lux.fotoretrieval.GraphDistanceVisualizationThread.java
License:Open Source License
private void prepareGraphAndFileLists(IndexReader ir) throws IOException { graphList = new LinkedList<Graph>(); for (int i = 0; i < ir.numDocs(); i++) { Graph g = new Graph(ir.document(i).getValues("graph")[0]); String graphFile = ir.document(i).getValues("file")[0]; graphList.add(g);/*from ww w . j av a2s .c o m*/ fileList.add(graphFile); } }
From source file:at.lux.fotoretrieval.retrievalengines.LucenePathIndexRetrievalEngineTest.java
License:Open Source License
public void testCreateIndex() { engine.indexFilesSemantically(pathToIndex, null); try {//from w ww . j ava2 s . com IndexReader reader = IndexReader.open(pathToIndex + "/idx_paths"); for (int i = 0; i < reader.numDocs(); i++) { System.out.println(reader.document(i).get("graph")); } } catch (IOException e) { e.printStackTrace(); fail(e.toString()); } }
From source file:at.lux.fotoretrieval.retrievalengines.LucenePathIndexRetrievalEngineTest.java
License:Open Source License
public void testPrecisionAndRecall() { try {/*from www. ja v a 2 s .c o m*/ String repository = "C:\\Java\\JavaProjects\\CaliphEmir\\testdata"; // String repository = "C:\\Dokumente und Einstellungen\\Mathias\\Eigene Dateien\\JavaProjects\\Caliph\\testdata"; IndexSearcher is = new IndexSearcher(repository + "\\idx_paths"); IndexReader ir = IndexReader.open(repository + "\\idx_paths"); for (int i = 0; i < ir.numDocs(); i++) { testQuery(ir, new Graph(ir.document(i).getValues("graph")[0]), is); } } catch (IOException e) { e.printStackTrace(); fail(e.toString()); } catch (ParseException e) { e.printStackTrace(); } }
From source file:at.lux.fotoretrieval.retrievalengines.LucenePathIndexRetrievalEngineTest.java
License:Open Source License
public void testPrecisionAndRecallFullText() { try {//from w w w .j a v a 2 s . c o m String repository = "C:\\Java\\JavaProjects\\CaliphEmir\\testdata"; // String repository = "C:\\Dokumente und Einstellungen\\Mathias\\Eigene Dateien\\JavaProjects\\Caliph\\testdata"; IndexSearcher is = new IndexSearcher(repository + "\\idx_paths"); IndexReader ir = IndexReader.open(repository + "\\idx_paths"); for (int i = 0; i < ir.numDocs(); i++) { testDirectQuery(ir, new Graph(ir.document(i).getValues("graph")[0]), is); } } catch (IOException e) { e.printStackTrace(); fail(e.toString()); } catch (ParseException e) { e.printStackTrace(); } }