List of usage examples for org.apache.lucene.index IndexReader document
public final Document document(int docID) throws IOException
n
th Document
in this index. From source file:net.semanticmetadata.lire.impl.ParallelImageSearcher.java
License:Open Source License
/** * @param reader//from w w w. ja v a2 s . c o m * @param lireFeature * @return the maximum distance found for normalizing. * @throws java.io.IOException */ @SuppressWarnings("unchecked") private float[] findSimilar(IndexReader reader, LireFeature[] lireFeature) throws IOException { float[] maxDistance = new float[lireFeature.length]; float[] overallMaxDistance = new float[lireFeature.length]; for (int i = 0; i < overallMaxDistance.length; i++) { overallMaxDistance[i] = -1f; maxDistance[i] = -1f; } parDocs = new TreeSet[lireFeature.length]; for (int i = 0; i < parDocs.length; i++) { parDocs[i] = new TreeSet<SimpleResult>(); } // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); // clear result set ... int docs = reader.numDocs(); for (int i = 0; i < docs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document d = reader.document(i); float[] distance = getDistance(d, lireFeature); // calculate the overall max distance to normalize score afterwards for (int j = 0; j < distance.length; j++) { float f = distance[j]; if (overallMaxDistance[j] < f) { overallMaxDistance[j] = f; } // if it is the first document: if (maxDistance[j] < 0) { maxDistance[j] = f; } // if the array is not full yet: if (this.parDocs[j].size() < maxHits) { this.parDocs[j].add(new SimpleResult(f, d, i)); if (f > maxDistance[j]) { maxDistance[j] = f; } } else if (f < maxDistance[j]) { // if it is nearer to the sample than at least on of the current set: // remove the last one ... this.parDocs[j].remove(this.parDocs[j].last()); // add the new one ... this.parDocs[j].add(new SimpleResult(f, d, i)); // and set our new distance border ... maxDistance[j] = this.parDocs[j].last().getDistance(); } } } return maxDistance; }
From source file:net.semanticmetadata.lire.impl.searcher.BitSamplingImageSearcher.java
License:Open Source License
private ImageSearchHits search(String[] hashes, LireFeature queryFeature, IndexReader reader) throws IOException { // first search by text: IndexSearcher searcher = new IndexSearcher(reader); // searcher.setSimilarity(new BaseSimilarity()); BooleanQuery query = new BooleanQuery(); for (int i = 0; i < hashes.length; i++) { // be aware that the hashFunctionsFileName of the field must match the one you put the hashes in before. if (partialHashes) { if (Math.random() < 0.5) query.add(new BooleanClause(new TermQuery(new Term(hashesFieldName, hashes[i] + "")), BooleanClause.Occur.SHOULD)); } else/*from w ww . j av a 2s . c o m*/ query.add(new BooleanClause(new TermQuery(new Term(hashesFieldName, hashes[i] + "")), BooleanClause.Occur.SHOULD)); } TopDocs docs = searcher.search(query, maxResultsHashBased); // System.out.println(docs.totalHits); // then re-rank TreeSet<SimpleResult> resultScoreDocs = new TreeSet<SimpleResult>(); float maxDistance = -1f; float tmpScore; for (int i = 0; i < docs.scoreDocs.length; i++) { feature.setByteArrayRepresentation( reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).bytes, reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).offset, reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).length); tmpScore = queryFeature.getDistance(feature); assert (tmpScore >= 0); if (resultScoreDocs.size() < maximumHits) { resultScoreDocs.add( new SimpleResult(tmpScore, reader.document(docs.scoreDocs[i].doc), docs.scoreDocs[i].doc)); maxDistance = Math.max(maxDistance, tmpScore); } else if (tmpScore < maxDistance) { // if it is nearer to the sample than at least one of the current set: // remove the last one ... resultScoreDocs.remove(resultScoreDocs.last()); // add the new one ... resultScoreDocs.add( new SimpleResult(tmpScore, reader.document(docs.scoreDocs[i].doc), docs.scoreDocs[i].doc)); // and set our new distance border ... maxDistance = resultScoreDocs.last().getDistance(); } } assert (resultScoreDocs.size() <= maximumHits); return new SimpleImageSearchHits(resultScoreDocs, maxDistance); }
From source file:net.semanticmetadata.lire.impl.searcher.GenericFastImageSearcher.java
License:Open Source License
/** * @param reader//from w ww . j av a2 s . c o m * @param lireFeature * @return the maximum distance found for normalizing. * @throws java.io.IOException */ protected float findSimilar(IndexReader reader, LireFeature lireFeature) throws IOException { maxDistance = -1f; // overallMaxDistance = -1f; // clear result set ... docs.clear(); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); Document d; float tmpDistance; int docs = reader.numDocs(); if (!isCaching) { // we read each and every document from the index and then we compare it to the query. for (int i = 0; i < docs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. d = reader.document(i); tmpDistance = getDistance(d, lireFeature); assert (tmpDistance >= 0); // if it is the first document: if (maxDistance < 0) { maxDistance = tmpDistance; } // if the array is not full yet: if (this.docs.size() < maxHits) { this.docs.add(new SimpleResult(tmpDistance, d, i)); if (tmpDistance > maxDistance) maxDistance = tmpDistance; } else if (tmpDistance < maxDistance) { // if it is nearer to the sample than at least on of the current set: // remove the last one ... this.docs.remove(this.docs.last()); // add the new one ... this.docs.add(new SimpleResult(tmpDistance, d, i)); // and set our new distance border ... maxDistance = this.docs.last().getDistance(); } } } else { // we use the in-memory cache to find the matching docs from the index. int count = 0; for (Iterator<byte[]> iterator = featureCache.iterator(); iterator.hasNext();) { cachedInstance.setByteArrayRepresentation(iterator.next()); if (reader.hasDeletions() && !liveDocs.get(count)) { count++; continue; // if it is deleted, just ignore it. } else { tmpDistance = lireFeature.getDistance(cachedInstance); assert (tmpDistance >= 0); // if it is the first document: if (maxDistance < 0) { maxDistance = tmpDistance; } // if the array is not full yet: if (this.docs.size() < maxHits) { this.docs.add(new SimpleResult(tmpDistance, reader.document(count), count)); if (tmpDistance > maxDistance) maxDistance = tmpDistance; } else if (tmpDistance < maxDistance) { // if it is nearer to the sample than at least on of the current set: // remove the last one ... this.docs.remove(this.docs.last()); // add the new one ... this.docs.add(new SimpleResult(tmpDistance, reader.document(count), count)); // and set our new distance border ... maxDistance = this.docs.last().getDistance(); } count++; } } } return maxDistance; }
From source file:net.semanticmetadata.lire.impl.searcher.KeyWordsImageSearcher.java
License:Open Source License
public ImageSearchHits search(BufferedImage image, ImageInfo imageInfo, IndexReader reader) throws IOException { if (imageInfo.getTitle() == null || imageInfo.getTitle().length() == 0) return null; SimpleImageSearchHits sh = null;//from w w w.j a v a 2 s .c o m //LuceneIndexReaderIndexSearcher IndexSearcher isearcher = new IndexSearcher(reader); //? String queryString = imageInfo.getTitle(); Query tq = null; try { //?? tq = qp.parse(queryString); // TopDocs docs = isearcher.search(tq, numMaxHits); LinkedList<SimpleResult> res = new LinkedList<SimpleResult>(); float maxDistance = 0; //???? for (int i = 0; i < docs.scoreDocs.length; i++) { float d = 1f / docs.scoreDocs[i].score; maxDistance = Math.max(d, maxDistance); SimpleResult sr = new SimpleResult(d, reader.document(docs.scoreDocs[i].doc), i); res.add(sr); } // sh = new SimpleImageSearchHits(res, maxDistance); } catch (ParseException e) { System.err.println(queryString); e.printStackTrace(); } return sh; }
From source file:net.semanticmetadata.lire.impl.searcher.LocationBasedImageSearcher.java
License:Open Source License
/** * @param reader//from w ww.j ava 2 s . co m * @param lireFeature * @return the maximum distance found for normalizing. * @throws java.io.IOException */ protected float findSimilar(IndexReader reader, ImageInfo imageInfo) throws IOException { float maxDistance = -1f, allMaxDistance = -1f; float tmpDistance = 0f; docs.clear(); //???? Bits liveDocs = MultiFields.getLiveDocs(reader); int docNumber = reader.numDocs(); Document d = null; for (int i = 0; i < docNumber; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; //? d = reader.document(i);//? tmpDistance = getDistance(d, imageInfo);//? //???? if (tmpDistance < 0 || tmpDistance > this.threshold) continue; //???? if (allMaxDistance < tmpDistance) { allMaxDistance = tmpDistance; } //?? if (maxDistance < 0) { maxDistance = tmpDistance; } //???? if (this.docs.size() < maxHits) { this.docs.add(new SimpleResult(tmpDistance, d, i)); if (tmpDistance > maxDistance) maxDistance = tmpDistance; //????? } else if (tmpDistance < maxDistance) { //??? this.docs.remove(this.docs.size() - 1); this.docs.add(new SimpleResult(tmpDistance, d, i)); //? maxDistance = tmpDistance; Collections.sort(docs); } } //? return maxDistance; }
From source file:net.semanticmetadata.lire.impl.searcher.TopDocsImageSearcher.java
License:Open Source License
/** * @param results//from w w w . j a v a 2 s . c o m * @param reader * @param lireFeature * @return the maximum distance found for normalizing. * @throws java.io.IOException */ protected float findSimilar(TopDocs results, IndexReader reader, LireFeature lireFeature) throws IOException { float maxDistance = -1f, overallMaxDistance = -1f; boolean hasDeletions = reader.hasDeletions(); // clear result set ... docs.clear(); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); int docs = results.totalHits; for (int i = 0; i < docs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document d = reader.document(results.scoreDocs[i].doc); float distance = getDistance(d, lireFeature); assert (distance >= 0); // calculate the overall max distance to normalize score afterwards if (overallMaxDistance < distance) { overallMaxDistance = distance; } // if it is the first document: if (maxDistance < 0) { maxDistance = distance; } // if the array is not full yet: if (this.docs.size() < maxHits) { this.docs.add(new SimpleResult(distance, d, i)); if (distance > maxDistance) maxDistance = distance; } else if (distance < maxDistance) { // if it is nearer to the sample than at least on of the current set: // remove the last one ... this.docs.remove(this.docs.last()); // add the new one ... this.docs.add(new SimpleResult(distance, d, i)); // and set our new distance border ... maxDistance = this.docs.last().getDistance(); } } return maxDistance; }
From source file:net.semanticmetadata.lire.impl.searcher.VisualWordsImageSearcher.java
License:Open Source License
public ImageSearchHits search(Document doc, IndexReader reader) throws IOException { SimpleImageSearchHits sh = null;/*from w w w .j a v a 2 s.c o m*/ IndexSearcher isearcher = new IndexSearcher(reader); isearcher.setSimilarity(similarity); String queryString = doc.getValues(fieldName)[0]; Query tq = null; try { tq = qp.parse(queryString); TopDocs docs = isearcher.search(tq, numMaxHits); LinkedList<SimpleResult> res = new LinkedList<SimpleResult>(); float maxDistance = 0; for (int i = 0; i < docs.scoreDocs.length; i++) { float d = 1f / docs.scoreDocs[i].score; maxDistance = Math.max(d, maxDistance); SimpleResult sr = new SimpleResult(d, reader.document(docs.scoreDocs[i].doc), i); res.add(sr); } sh = new SimpleImageSearchHits(res, maxDistance); } catch (ParseException e) { System.err.println(queryString); e.printStackTrace(); } return sh; }
From source file:net.semanticmetadata.lire.impl.SimpleImageSearcher.java
License:Open Source License
/** * @param reader/*w ww . j a va2 s. co m*/ * @param cl * @param sc * @param eh * @return the maximum distance found for normalizing. * @throws IOException */ private float findSimilar(IndexReader reader, ColorLayoutImpl cl, ScalableColorImpl sc, EdgeHistogramImplementation eh) throws IOException { float maxDistance = -1f, overallMaxDistance = -1f; boolean hasDeletions = reader.hasDeletions(); // clear result set ... docs.clear(); int docs = reader.numDocs(); for (int i = 0; i < docs; i++) { // bugfix by Roman Kern Document d = reader.document(i); float distance = getDistance(d, cl, sc, eh); // calculate the overall max distance to normalize score afterwards if (overallMaxDistance < distance) { overallMaxDistance = distance; } // if it is the first document: if (maxDistance < 0) { maxDistance = distance; } // if the array is not full yet: if (this.docs.size() < maxHits) { this.docs.add(new SimpleResult(distance, d)); if (distance > maxDistance) maxDistance = distance; } else if (distance < maxDistance) { // if it is nearer to the sample than at least on of the current set: // remove the last one ... this.docs.remove(this.docs.last()); // add the new one ... this.docs.add(new SimpleResult(distance, d)); // and set our new distance border ... maxDistance = this.docs.last().getDistance(); } } return maxDistance; }
From source file:net.semanticmetadata.lire.impl.SimpleImageSearcher.java
License:Open Source License
public ImageDuplicates findDuplicates(IndexReader reader) throws IOException { // get the first document: if (!IndexReader.indexExists(reader.directory())) throw new FileNotFoundException("No index found at this specific location."); Document doc = reader.document(0); ScalableColorImpl sc = null;// w w w . j av a 2 s .c om ColorLayoutImpl cl = null; EdgeHistogramImplementation eh = null; String[] cls = doc.getValues(DocumentBuilder.FIELD_NAME_COLORLAYOUT); if (cls != null && cls.length > 0) cl = new ColorLayoutImpl(cls[0]); String[] scs = doc.getValues(DocumentBuilder.FIELD_NAME_SCALABLECOLOR); if (scs != null && scs.length > 0) sc = new ScalableColorImpl(scs[0]); String[] ehs = doc.getValues(DocumentBuilder.FIELD_NAME_EDGEHISTOGRAM); if (ehs != null && ehs.length > 0) eh = new EdgeHistogramImplementation(ehs[0]); HashMap<Float, List<String>> duplicates = new HashMap<Float, List<String>>(); // find duplicates ... boolean hasDeletions = reader.hasDeletions(); int docs = reader.numDocs(); int numDuplicates = 0; for (int i = 0; i < docs; i++) { Document d = reader.document(i); float distance = getDistance(d, cl, sc, eh); if (!duplicates.containsKey(distance)) { duplicates.put(distance, new LinkedList<String>()); } else { numDuplicates++; } duplicates.get(distance).add(d.getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue()); } if (numDuplicates == 0) return null; LinkedList<List<String>> results = new LinkedList<List<String>>(); for (float f : duplicates.keySet()) { if (duplicates.get(f).size() > 1) { results.add(duplicates.get(f)); } } return new SimpleImageDuplicates(results); }
From source file:net.semanticmetadata.lire.indexing.HashingTest.java
License:Open Source License
private TopDocs rerank(TopDocs docs, LireFeature feature, IndexReader reader) throws IOException, IllegalAccessException, InstantiationException { LireFeature tmp = feature.getClass().newInstance(); ArrayList<ScoreDoc> res = new ArrayList<ScoreDoc>(docs.scoreDocs.length); float maxScore = 0f; for (int i = 0; i < docs.scoreDocs.length; i++) { tmp.setByteArrayRepresentation(reader.document(docs.scoreDocs[i].doc) .getBinaryValue(DocumentBuilder.FIELD_NAME_OPPONENT_HISTOGRAM).bytes); maxScore = Math.max(1 / tmp.getDistance(feature), maxScore); res.add(new ScoreDoc(docs.scoreDocs[i].doc, 1 / tmp.getDistance(feature))); }/*w w w.ja va 2 s . c o m*/ // sorting res ... Collections.sort(res, new Comparator<ScoreDoc>() { @Override public int compare(ScoreDoc o1, ScoreDoc o2) { return (int) Math.signum(o2.score - o1.score); } }); return new TopDocs(50, (ScoreDoc[]) res.toArray(new ScoreDoc[res.size()]), maxScore); }