net.semanticmetadata.lire.impl.SimpleImageSearcher.java Source code

Java tutorial

Introduction

Here is the source code for net.semanticmetadata.lire.impl.SimpleImageSearcher.java

Source

package net.semanticmetadata.lire.impl;

import at.lux.imageanalysis.ColorLayoutImpl;
import at.lux.imageanalysis.EdgeHistogramImplementation;
import at.lux.imageanalysis.ScalableColorImpl;
import net.semanticmetadata.lire.AbstractImageSearcher;
import net.semanticmetadata.lire.DocumentBuilder;
import net.semanticmetadata.lire.ImageDuplicates;
import net.semanticmetadata.lire.ImageSearchHits;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;

import java.awt.image.BufferedImage;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.TreeSet;
/*
 * This file is part of the Caliph and Emir project: http://www.SemanticMetadata.net.
 *
 * Caliph & Emir is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * Caliph & Emir is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Caliph & Emir; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 * Copyright statement:
 * --------------------
 * (c) 2002-2006 by Mathias Lux (mathias@juggle.at)
 * http://www.juggle.at, http://www.SemanticMetadata.net
 */

/**
 * This file is part of the Caliph and Emir project: http://www.SemanticMetadata.net
 * <br>Date: 01.02.2006
 * <br>Time: 00:17:02
 *
 * @author Mathias Lux, mathias@juggle.at
 */
public class SimpleImageSearcher extends AbstractImageSearcher {
    private int maxHits = 10;
    float colorHistogramWeight = 1f;
    float colorDistributionWeight = 1f;
    float textureWeight = 1f;
    private TreeSet<SimpleResult> docs;

    public SimpleImageSearcher(int maxHits) {
        this.maxHits = maxHits;
        docs = new TreeSet<SimpleResult>();
    }

    public SimpleImageSearcher(int maxHits, float colorHistogramWeight, float colorDistributionWeight,
            float textureWeight) {
        this.maxHits = maxHits;
        docs = new TreeSet<SimpleResult>();
        this.colorDistributionWeight = colorDistributionWeight;
        this.colorHistogramWeight = colorHistogramWeight;
        this.textureWeight = textureWeight;
    }

    public ImageSearchHits search(BufferedImage image, IndexReader reader) throws IOException {
        ScalableColorImpl sc = null;
        ColorLayoutImpl cl = null;
        EdgeHistogramImplementation eh = null;
        if (colorHistogramWeight > 0f)
            sc = new ScalableColorImpl(image);
        if (colorDistributionWeight > 0f)
            cl = new ColorLayoutImpl(image);
        if (textureWeight > 0f)
            eh = new EdgeHistogramImplementation(image);

        float maxDistance = findSimilar(reader, cl, sc, eh);
        return new SimpleImageSearchHits(this.docs, maxDistance);
    }

    /**
     * @param reader
     * @param cl
     * @param sc
     * @param eh
     * @return the maximum distance found for normalizing.
     * @throws IOException
     */
    private float findSimilar(IndexReader reader, ColorLayoutImpl cl, ScalableColorImpl sc,
            EdgeHistogramImplementation eh) throws IOException {
        float maxDistance = -1f, overallMaxDistance = -1f;
        boolean hasDeletions = reader.hasDeletions();

        // clear result set ...
        docs.clear();

        int docs = reader.numDocs();
        for (int i = 0; i < docs; i++) {
            // bugfix by Roman Kern

            Document d = reader.document(i);
            float distance = getDistance(d, cl, sc, eh);
            // calculate the overall max distance to normalize score afterwards
            if (overallMaxDistance < distance) {
                overallMaxDistance = distance;
            }
            // if it is the first document:
            if (maxDistance < 0) {
                maxDistance = distance;
            }
            // if the array is not full yet:
            if (this.docs.size() < maxHits) {
                this.docs.add(new SimpleResult(distance, d));
                if (distance > maxDistance)
                    maxDistance = distance;
            } else if (distance < maxDistance) {
                // if it is nearer to the sample than at least on of the current set:
                // remove the last one ...
                this.docs.remove(this.docs.last());
                // add the new one ...
                this.docs.add(new SimpleResult(distance, d));
                // and set our new distance border ...
                maxDistance = this.docs.last().getDistance();
            }
        }
        return maxDistance;
    }

    private float getDistance(Document d, ColorLayoutImpl cl, ScalableColorImpl sc,
            EdgeHistogramImplementation eh) {
        float distance = 0f;
        int descriptorCount = 0;

        if (cl != null) {
            String[] cls = d.getValues(DocumentBuilder.FIELD_NAME_COLORLAYOUT);
            if (cls != null && cls.length > 0) {
                ColorLayoutImpl clsi = new ColorLayoutImpl(cls[0]);
                distance += cl.getDistance(clsi) * colorDistributionWeight;
                descriptorCount++;
            }
        }

        if (sc != null) {
            String[] scs = d.getValues(DocumentBuilder.FIELD_NAME_SCALABLECOLOR);
            if (scs != null && scs.length > 0) {
                ScalableColorImpl scsi = new ScalableColorImpl(scs[0]);
                distance += sc.getDistance(scsi) * colorHistogramWeight;
                descriptorCount++;
            }
        }

        if (eh != null) {
            String[] ehs = d.getValues(DocumentBuilder.FIELD_NAME_EDGEHISTOGRAM);
            if (ehs != null && ehs.length > 0) {
                EdgeHistogramImplementation ehsi = new EdgeHistogramImplementation(ehs[0]);
                distance += eh.getDistance(ehsi) * textureWeight;
                descriptorCount++;
            }
        }

        if (descriptorCount > 0) {
            // TODO: find some better scoring mechanism, e.g. some normalization. One thing would be linearization of the features!
            // For now: Averaging ...
            distance = distance / (float) descriptorCount;
        }
        return distance;
    }

    public ImageSearchHits search(Document doc, IndexReader reader) throws IOException {
        ScalableColorImpl sc = null;
        ColorLayoutImpl cl = null;
        EdgeHistogramImplementation eh = null;

        String[] cls = doc.getValues(DocumentBuilder.FIELD_NAME_COLORLAYOUT);
        if (cls != null && cls.length > 0)
            cl = new ColorLayoutImpl(cls[0]);
        String[] scs = doc.getValues(DocumentBuilder.FIELD_NAME_SCALABLECOLOR);
        if (scs != null && scs.length > 0)
            sc = new ScalableColorImpl(scs[0]);
        String[] ehs = doc.getValues(DocumentBuilder.FIELD_NAME_EDGEHISTOGRAM);
        if (ehs != null && ehs.length > 0)
            eh = new EdgeHistogramImplementation(ehs[0]);

        float maxDistance = findSimilar(reader, cl, sc, eh);

        return new SimpleImageSearchHits(this.docs, maxDistance);
    }

    public ImageDuplicates findDuplicates(IndexReader reader) throws IOException {
        // get the first document:
        if (!IndexReader.indexExists(reader.directory()))
            throw new FileNotFoundException("No index found at this specific location.");
        Document doc = reader.document(0);
        ScalableColorImpl sc = null;
        ColorLayoutImpl cl = null;
        EdgeHistogramImplementation eh = null;

        String[] cls = doc.getValues(DocumentBuilder.FIELD_NAME_COLORLAYOUT);
        if (cls != null && cls.length > 0)
            cl = new ColorLayoutImpl(cls[0]);
        String[] scs = doc.getValues(DocumentBuilder.FIELD_NAME_SCALABLECOLOR);
        if (scs != null && scs.length > 0)
            sc = new ScalableColorImpl(scs[0]);
        String[] ehs = doc.getValues(DocumentBuilder.FIELD_NAME_EDGEHISTOGRAM);
        if (ehs != null && ehs.length > 0)
            eh = new EdgeHistogramImplementation(ehs[0]);

        HashMap<Float, List<String>> duplicates = new HashMap<Float, List<String>>();

        // find duplicates ...
        boolean hasDeletions = reader.hasDeletions();

        int docs = reader.numDocs();
        int numDuplicates = 0;
        for (int i = 0; i < docs; i++) {
            Document d = reader.document(i);
            float distance = getDistance(d, cl, sc, eh);

            if (!duplicates.containsKey(distance)) {
                duplicates.put(distance, new LinkedList<String>());
            } else {
                numDuplicates++;
            }
            duplicates.get(distance).add(d.getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
        }

        if (numDuplicates == 0)
            return null;

        LinkedList<List<String>> results = new LinkedList<List<String>>();
        for (float f : duplicates.keySet()) {
            if (duplicates.get(f).size() > 1) {
                results.add(duplicates.get(f));
            }
        }
        return new SimpleImageDuplicates(results);
    }

}