List of usage examples for org.apache.lucene.index IndexReader document
public final Document document(int docID) throws IOException
n
th Document
in this index. From source file:net.semanticmetadata.lire.indexing.HashingTest.java
License:Open Source License
public void testImageSearcher() throws IOException { BitSamplingImageSearcher is = new BitSamplingImageSearcher(60, DocumentBuilder.FIELD_NAME_PHOG, DocumentBuilder.FIELD_NAME_PHOG + "_hash", new PHOG(), 500); IndexReader reader = DirectoryReader.open(FSDirectory.open(new File("E:\\wipo1m-idx"))); Document queryDoc = reader.document(1); ImageSearchHits search = is.search(queryDoc, reader); ;/*from w w w.j a v a 2 s .com*/ long ms = System.currentTimeMillis(); int runs = 50; for (int i = 0; i < runs; i++) search = is.search(queryDoc, reader); ms = System.currentTimeMillis() - ms; // String file = FileUtils.saveImageResultsToHtml("wipo", search, queryDoc.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); // FileUtils.browseUri(file); System.out.println(((double) ms) / ((double) runs) + " ms per search."); }
From source file:net.semanticmetadata.lire.indexing.HashingTest.java
License:Open Source License
private String printToHtml(TopDocs topDocs, IndexReader reader) throws IOException { String fileName = "results-" + System.currentTimeMillis() / 1000 + ".html"; BufferedWriter bw = new BufferedWriter(new FileWriter(fileName)); bw.write("<html>\n" + "<head><title>Search Results</title></head>\n" + "<body bgcolor=\"#FFFFFF\">\n"); bw.write("<h3>query</h3>\n"); bw.write("<a href=\"" + queryFile + "\"><img src=\"" + queryFile + "\"></a><p>\n"); bw.write("<h3>results</h3>\n<table>"); int elems = Math.min(topDocs.scoreDocs.length, 50); for (int i = 0; i < elems; i++) { if (i % 3 == 0) bw.write("<tr>"); String s = reader.document(topDocs.scoreDocs[i].doc).get("descriptorImageIdentifier"); s = new File(s).getAbsolutePath(); bw.write("<td><a href=\"" + s + "\"><img style=\"max-width:220px\"src=\"" + s + "\"></a></td>\n"); if (i % 3 == 2) bw.write("</tr>"); }/*www . j a v a 2 s. co m*/ if (elems % 3 != 0) { if (elems % 3 == 2) { bw.write("<td>-</td>\n"); bw.write("<td>-</td>\n"); } else if (elems % 3 == 2) { bw.write("<td>-</td>\n"); } bw.write("</tr>"); } bw.write("</table></body>\n" + "</html>"); bw.close(); return new File(fileName).getPath(); }
From source file:net.semanticmetadata.lire.indexing.IndexVisualWordsTest.java
License:Open Source License
public void testIndexMissingFiles() throws IOException { // first delete some of the existing ones ... System.out.println("Deleting visual words from docs ..."); IndexReader ir = DirectoryReader.open(FSDirectory.open(new File(index))); IndexWriter iw = LuceneUtils.createIndexWriter(index, false); int maxDocs = ir.maxDoc(); for (int i = 0; i < maxDocs / 10; i++) { Document d = ir.document(i); // d.removeFields(DocumentBuilder.FIELD_NAME_SURF + DocumentBuilder.FIELD_NAME_BOVW); d.removeFields(DocumentBuilder.FIELD_NAME_SURF + DocumentBuilder.FIELD_NAME_BOVW); // d.removeFields(DocumentBuilder.FIELD_NAME_SURF_LOCAL_FEATURE_HISTOGRAM); d.removeFields(DocumentBuilder.FIELD_NAME_SURF + DocumentBuilder.FIELD_NAME_BOVW_VECTOR); // d.removeFields(DocumentBuilder.FIELD_NAME_SURF); iw.updateDocument(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), d); }// w w w . j a v a 2 s .c om System.out.println("# of deleted docs: " + maxDocs / 10); System.out.println("Optimizing and closing ..."); iw.close(); ir.close(); System.out.println("Creating new visual words ..."); BOVWBuilder surfFeatureHistogramBuilder = new BOVWBuilder( DirectoryReader.open(FSDirectory.open(new File(index))), new SurfFeature(), numSamples, clusters); // surfFeatureHistogramBuilder.indexMissing(); // System.out.println("Finished."); }
From source file:net.semanticmetadata.lire.indexing.LocalitySensitiveHashingTest.java
License:Open Source License
public double singleSearch(int docNum) throws IOException, InstantiationException, IllegalAccessException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath))); // ----------- String query = reader.document(docNum).getValues("hash")[0]; CEDD ceddQuery = new CEDD(); ceddQuery.setByteArrayRepresentation( reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().bytes, reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().offset, reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().length); // ----------- HashSet<String> gold = new HashSet<String>(numImagesEval); ImageSearcher cis = ImageSearcherFactory.createCEDDImageSearcher(100); ImageSearchHits hits = cis.search(reader.document(docNum), reader); for (int i = 0; i < 10; i++) { gold.add(hits.doc(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); }/*from w w w . java2 s .c o m*/ // ------------ IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(new SimilarityBase() { @Override protected float score(BasicStats basicStats, float freq, float v2) { return 1; } @Override public String toString() { return null; } }); TopDocs topDocs = searcher.search(createQuery(query), 500); topDocs = rerank(topDocs, ceddQuery, reader); // System.out.println("topDocs.scoreDocs.length = " + topDocs.scoreDocs.length); double numMatches = 0; for (int i = 0; i < topDocs.scoreDocs.length; i++) { ScoreDoc scoreDoc = topDocs.scoreDocs[i]; // System.out.print(scoreDoc.score + ": "); String file = reader.document(scoreDoc.doc).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; // System.out.println(file.substring(file.lastIndexOf('/') + 1) + (gold.contains(file)?" x":" o")); if (gold.contains(file)) numMatches++; } return numMatches; }
From source file:net.semanticmetadata.lire.indexing.LocalitySensitiveHashingTest.java
License:Open Source License
public void testOutputSearchResults() throws IOException, InstantiationException, IllegalAccessException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath))); int docNum = 0; // doc to search for. // ----------- String query = reader.document(docNum).getValues("hash")[0]; CEDD ceddQuery = new CEDD(); ceddQuery.setByteArrayRepresentation( reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().bytes, reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().offset, reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().length); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(createQuery(query), numImagesEval); FileUtils.saveImageResultsToPng("result_lsh", topDocs, reader.document(docNum).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0], reader); }
From source file:net.semanticmetadata.lire.indexing.LocalitySensitiveHashingTest.java
License:Open Source License
private TopDocs rerank(TopDocs docs, LireFeature feature, IndexReader reader) throws IOException, IllegalAccessException, InstantiationException { LireFeature tmp = new CEDD(); ArrayList<ScoreDoc> res = new ArrayList<ScoreDoc>(docs.scoreDocs.length); float maxScore = 0f; for (int i = 0; i < docs.scoreDocs.length; i++) { tmp.setByteArrayRepresentation(/*w w w. j a va2 s . c om*/ reader.document(docs.scoreDocs[i].doc).getField(DocumentBuilder.FIELD_NAME_CEDD) .binaryValue().bytes, reader.document(docs.scoreDocs[i].doc).getField(DocumentBuilder.FIELD_NAME_CEDD) .binaryValue().offset, reader.document(docs.scoreDocs[i].doc).getField(DocumentBuilder.FIELD_NAME_CEDD) .binaryValue().length); maxScore = Math.max(1 / tmp.getDistance(feature), maxScore); res.add(new ScoreDoc(docs.scoreDocs[i].doc, 1 / tmp.getDistance(feature))); } // sorting res ... Collections.sort(res, new Comparator<ScoreDoc>() { @Override public int compare(ScoreDoc o1, ScoreDoc o2) { return (int) Math.signum(o2.score - o1.score); } }); return new TopDocs(numImagesEval, (ScoreDoc[]) res.toArray(new ScoreDoc[res.size()]), maxScore); }
From source file:net.semanticmetadata.lire.indexing.MetricSpacesInvertedListIndexing.java
License:Open Source License
/** * Creates a set of reference objects and stores it in a new index (hashFunctionsFileName "<indexPath>-ro"). Then creates ordered * lists of reference object positions for each data item in the index with given feature. * Finally a new index (hashFunctionsFileName "<indexPath>-ms") is created where all the original documents as well as the new data * are stored.//from w w w.j av a 2s . c o m * * @param indexPath the path to the original index * @throws IOException */ public void createIndex(String indexPath) throws IOException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath))); int numDocs = reader.numDocs(); if (numDocs < numReferenceObjects) { throw new UnsupportedOperationException("Too few documents in index."); } // progress report progress.setNumDocsAll(numDocs); progress.setCurrentState(State.RoSelection); boolean hasDeletions = reader.hasDeletions(); // init reference objects: IndexWriter iw = LuceneUtils.createIndexWriter(indexPath + "-ro", true); HashSet<Integer> referenceObjsIds = new HashSet<Integer>(numReferenceObjects); double numDocsDouble = (double) numDocs; while (referenceObjsIds.size() < numReferenceObjects) { referenceObjsIds.add((int) (numDocsDouble * Math.random())); } int count = 0; if (hasDeletions) { System.err.println("WARNING: There are deleted docs in your index. You should " + "optimize your index before using this method."); } // progress report progress.setCurrentState(State.RoIndexing); // find them in the index and put them into a separate index: for (int i : referenceObjsIds) { count++; Document document = reader.document(i); document.add(new Field("ro-id", count + "", StringField.TYPE_STORED)); iw.addDocument(document); } iw.commit(); iw.close(); // progress report progress.setCurrentState(State.Indexing); // now find the reference objects for each entry ;) IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro"))); ImageSearcher searcher = new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName); Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>(); analyzerPerField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION)); PerFieldAnalyzerWrapper aWrapper = new PerFieldAnalyzerWrapper( new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), analyzerPerField); iw = new IndexWriter(FSDirectory.open(new File(indexPath)), new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper) .setOpenMode(IndexWriterConfig.OpenMode.CREATE)); StringBuilder sb = new StringBuilder(256); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); for (int i = 0; i < numDocs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document document = reader.document(i); ImageSearchHits hits = searcher.search(document, readerRo); sb.delete(0, sb.length()); for (int j = 0; j < numReferenceObjectsUsed; j++) { sb.append(hits.doc(j).getValues("ro-id")[0]); sb.append(' '); } // System.out.println(sb.toString()); document.add(new TextField("ro-order", sb.toString(), Field.Store.YES)); iw.updateDocument(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), document); // progress report progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1); } iw.commit(); iw.close(); // progress report progress.setCurrentState(State.Idle); }
From source file:net.semanticmetadata.lire.indexing.MetricSpacesInvertedListIndexing.java
License:Open Source License
/** * We assume that the initial indexing has been done and a set of reference objects has been * found and indexed in the separate fileList. However further documents were added and they * now need to get a ranked list of reference objects. So we (i) get all these new documents * missing the field "ro-order" and (ii) add this field. * * @param indexPath the index to update//from www.j a va2s.co m * @throws IOException */ public void updateIndex(String indexPath) throws IOException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath))); int numDocs = reader.numDocs(); boolean hasDeletions = reader.hasDeletions(); int countUpdated = 0; IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro"))); ImageSearcher searcher = new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName); Map<String, Analyzer> perField = new HashMap<String, Analyzer>(1); perField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION)); PerFieldAnalyzerWrapper aWrapper = new PerFieldAnalyzerWrapper( new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), perField); IndexWriter iw = new IndexWriter(FSDirectory.open(new File(indexPath)), new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper) .setOpenMode(IndexWriterConfig.OpenMode.CREATE)); StringBuilder sb = new StringBuilder(256); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); for (int i = 0; i < numDocs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document document = reader.document(i); if (document.getField("ro-order") == null) { // if the field is not here we create it. ImageSearchHits hits = searcher.search(document, readerRo); sb.delete(0, sb.length()); for (int j = 0; j < numReferenceObjectsUsed; j++) { sb.append(hits.doc(j).getValues("ro-id")[0]); sb.append(' '); } // System.out.println(sb.toString()); document.add(new TextField("ro-order", sb.toString(), Field.Store.YES)); iw.updateDocument(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), document); countUpdated++; } // progress report progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1); // debug: System.out.println("countUpdated = " + countUpdated); } iw.commit(); iw.close(); }
From source file:net.semanticmetadata.lire.indexing.MetricSpacesTest.java
License:Open Source License
public void testSearch() throws IOException { int docNumber = 1; MetricSpacesInvertedListIndexing ms = MetricSpacesInvertedListIndexing.getDefaultInstance(); MetricSpacesInvertedListIndexing.numReferenceObjectsUsed = 10; MetricSpacesInvertedListIndexing.numReferenceObjects = 50; IndexReader reader = ms.getIndexReader(indexPath); TopDocs docs = ms.search(reader.document(docNumber), indexPath); // print the results BufferedWriter bw = new BufferedWriter(new FileWriter("out.html")); bw.write("<html><body>"); for (int i = 0; i < docs.scoreDocs.length; i++) { ScoreDoc scoreDoc = docs.scoreDocs[i]; bw.write("<img title=\"ID: " + scoreDoc.doc + ", " + "Score: " + scoreDoc.score + "\" src=\"file:///" + reader.document(scoreDoc.doc).getValues("descriptorImageIdentifier")[0] + "\"> "); }/*from w ww . j a v a2 s. c o m*/ bw.write("</body></html>"); bw.close(); showUrl("out.html"); }
From source file:net.semanticmetadata.lire.indexing.MetricSpacesTest.java
License:Open Source License
public void testPerformance() throws IOException { MetricSpacesInvertedListIndexing mes = MetricSpacesInvertedListIndexing.getDefaultInstance(); int numSearches = 10; IndexReader reader = mes.getIndexReader(indexPath); System.out.println(reader.maxDoc() + " documents"); TopDocs docs;/*from www . j av a 2 s . c om*/ long ms = System.currentTimeMillis(); for (int i = 0; i < numSearches; i++) { docs = mes.search(reader.document(i), indexPath); } ms = System.currentTimeMillis() - ms; System.out.println("ms = " + ms); ImageSearcher ceddSearcher = ImageSearcherFactory.createCEDDImageSearcher(100); ms = System.currentTimeMillis(); for (int i = 0; i < numSearches; i++) { ceddSearcher.search(reader.document(i), reader); } ms = System.currentTimeMillis() - ms; System.out.println("ms = " + ms); }