at.lux.fotoretrieval.retrievalengines.LucenePathIndexRetrievalEngineTest.java Source code

Java tutorial

Introduction

Here is the source code for at.lux.fotoretrieval.retrievalengines.LucenePathIndexRetrievalEngineTest.java

Source

/*
 * This file is part of Caliph & Emir.
 *
 * Caliph & Emir is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * Caliph & Emir is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Caliph & Emir; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 * Copyright statement:
 * --------------------
 * (c) 2002-2005 by Mathias Lux (mathias@juggle.at)
 * http://www.juggle.at, http://caliph-emir.sourceforge.net
 */
package at.lux.fotoretrieval.retrievalengines;

import at.lux.fotoretrieval.ResultListEntry;
import at.lux.fotoretrieval.lucene.Graph;
import at.lux.fotoretrieval.lucene.Node;
import at.lux.fotoretrieval.lucene.similarity.TermFrequencySimilarity;
import junit.framework.TestCase;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;

import java.io.IOException;
import java.util.*;

/**
 * Date: 26.03.2005
 * Time: 00:14:14
 *
 * @author Mathias Lux, mathias@juggle.at
 */
public class LucenePathIndexRetrievalEngineTest extends TestCase {
    private LucenePathIndexRetrievalEngine engine;
    private final String pathToIndex = "testdata";
    //    private final String pathToIndex = "C:\\Dokumente und Einstellungen\\Mathias\\Eigene Dateien\\JavaProjects\\Caliph\\testdata";

    /**
     * Sets up the fixture, for example, open a network connection.
     * This method is called before a test is executed.
     */
    protected void setUp() throws Exception {
        super.setUp();
        engine = new LucenePathIndexRetrievalEngine(50);
    }

    public void testCreateIndex() {
        engine.indexFilesSemantically(pathToIndex, null);
        try {
            IndexReader reader = IndexReader.open(pathToIndex + "/idx_paths");
            for (int i = 0; i < reader.numDocs(); i++) {
                System.out.println(reader.document(i).get("graph"));
            }
        } catch (IOException e) {
            e.printStackTrace();
            fail(e.toString());
        }
    }

    public void testSearch() {
        try {
            QueryParser qParser = new QueryParser("graph", new WhitespaceAnalyzer());
            IndexSearcher search = new IndexSearcher(
                    "C:\\Dokumente und Einstellungen\\Mathias\\Eigene Dateien\\JavaProjects\\Caliph\\testdata\\idx_paths");
            Hits h = search.search(qParser.parse("_*_0_1"));
            for (int i = 0; i < h.length(); i++) {
                System.out.println(h.score(i) + ": " + h.doc(i).get("graph"));
            }
        } catch (IOException e) {
            e.printStackTrace();
            fail(e.toString());
        } catch (ParseException e) {
            e.printStackTrace();
            fail(e.toString());
        }
    }

    public void testSemanticSearch() {
        List<ResultListEntry> result = engine.getImagesBySemantics(
                "[\"Mathias Lux\"] [Talking] [\"Michael Granitzer\"] patientOf 1 2 agent 2 3", null, pathToIndex,
                true, null);
        for (Iterator<ResultListEntry> iterator = result.iterator(); iterator.hasNext();) {
            ResultListEntry entry = iterator.next();
            System.out.println(entry.getRelevance() + ": " + entry.getDescriptionPath());
        }
    }

    public void testPrecisionAndRecall() {
        try {
            String repository = "C:\\Java\\JavaProjects\\CaliphEmir\\testdata";
            //            String repository = "C:\\Dokumente und Einstellungen\\Mathias\\Eigene Dateien\\JavaProjects\\Caliph\\testdata";
            IndexSearcher is = new IndexSearcher(repository + "\\idx_paths");
            IndexReader ir = IndexReader.open(repository + "\\idx_paths");

            for (int i = 0; i < ir.numDocs(); i++) {
                testQuery(ir, new Graph(ir.document(i).getValues("graph")[0]), is);
            }

        } catch (IOException e) {
            e.printStackTrace();
            fail(e.toString());
        } catch (ParseException e) {
            e.printStackTrace();
        }
    }

    public void testPrecisionAndRecallFullText() {
        try {
            String repository = "C:\\Java\\JavaProjects\\CaliphEmir\\testdata";
            //            String repository = "C:\\Dokumente und Einstellungen\\Mathias\\Eigene Dateien\\JavaProjects\\Caliph\\testdata";
            IndexSearcher is = new IndexSearcher(repository + "\\idx_paths");
            IndexReader ir = IndexReader.open(repository + "\\idx_paths");

            for (int i = 0; i < ir.numDocs(); i++) {
                testDirectQuery(ir, new Graph(ir.document(i).getValues("graph")[0]), is);
            }

        } catch (IOException e) {
            e.printStackTrace();
            fail(e.toString());
        } catch (ParseException e) {
            e.printStackTrace();
        }
    }

    private void testQuery(IndexReader ir, Graph query, IndexSearcher is) throws IOException, ParseException {
        // create results from mcs:
        LinkedList<ResultHolder> resultsMcs = new LinkedList<ResultHolder>();
        for (int j = 0; j < ir.numDocs(); j++) {
            Graph model = new Graph(ir.document(j).getValues("graph")[0]);
            float mcsSimilarity = query.getMcsSimilarity(model);
            resultsMcs.add(new ResultHolder(j, model.toString(), mcsSimilarity));
        }
        Collections.sort(resultsMcs);
        //            for (Iterator<ResultHolder> iterator = resultsMcs.iterator(); iterator.hasNext();) {
        //                ResultHolder r = iterator.next();
        //                System.out.println(r.getDocumentNumber() + ": " + r.getSimilarity());
        //            }

        // create results from search:

        // set to another similarity if necessary:
        is.setSimilarity(new TermFrequencySimilarity());
        //        is.setSimilarity(new SimpleTfIdfSimilarity());

        LucenePathIndexRetrievalEngine engine = new LucenePathIndexRetrievalEngine(50);
        String gQuery = LucenePathIndexRetrievalEngine.createLucenePathQuery(query);
        //        System.out.println(query);
        QueryParser qParse = new QueryParser("paths", new WhitespaceAnalyzer());
        Query q = qParse.parse(gQuery);
        Hits hits = is.search(q);
        LinkedList<ResultHolder> resultsSearch = new LinkedList<ResultHolder>();
        for (int i = 0; i < hits.length(); i++) {
            String graph = hits.doc(i).getValues("graph")[0];
            int docID = -1;
            for (int j = 0; j < ir.numDocs(); j++) {
                Graph model = new Graph(ir.document(j).getValues("graph")[0]);
                if (model.toString().equals(graph))
                    docID = j;
            }
            resultsSearch.add(new ResultHolder(docID, graph, hits.score(i)));
        }
        Collections.sort(resultsSearch);
        printPrecisionRecallPlot(resultsMcs, resultsSearch);
    }

    private void testDirectQuery(IndexReader ir, Graph query, IndexSearcher is) throws IOException, ParseException {
        IndexReader reader = IndexReader.open("C:\\Java\\JavaProjects\\CaliphEmir\\testdata\\idx_semantic");
        IndexSearcher searcher = new IndexSearcher("C:\\Java\\JavaProjects\\CaliphEmir\\testdata\\idx_fulltext");

        HashMap<Integer, String> node2label = new HashMap<Integer, String>();
        for (int j = 0; j < reader.numDocs(); j++) {
            String id = reader.document(j).getValues("id")[0];
            String label = reader.document(j).getValues("label")[0];
            node2label.put(Integer.parseInt(id), label);
        }
        // create results from mcs:
        LinkedList<ResultHolder> resultsMcs = new LinkedList<ResultHolder>();
        for (int j = 0; j < ir.numDocs(); j++) {
            Graph model = new Graph(ir.document(j).getValues("graph")[0]);
            float mcsSimilarity = query.getMcsSimilarity(model);
            String[] file = ir.document(j).getValues("file");
            for (int i = 0; i < file.length; i++) {
                String s = file[i];
                resultsMcs.add(new ResultHolder(mcsSimilarity, s));
            }
        }
        Collections.sort(resultsMcs);
        //            for (Iterator<ResultHolder> iterator = resultsMcs.iterator(); iterator.hasNext();) {
        //                ResultHolder r = iterator.next();
        //                System.out.println(r.getDocumentNumber() + ": " + r.getSimilarity());
        //            }

        // create results from search:
        StringBuilder qBuilder = new StringBuilder(64);
        for (Iterator<Node> iterator = query.getNodes().iterator(); iterator.hasNext();) {
            Node node = iterator.next();
            //            qBuilder.append("\"");
            qBuilder.append(node2label.get(node.getNodeID()));
            qBuilder.append(" ");
            //            qBuilder.append("\" ");
        }
        //        System.out.println(query);
        QueryParser qParse = new QueryParser("all", new WhitespaceAnalyzer());
        Query q = qParse.parse(qBuilder.toString());
        Hits hits = searcher.search(q);
        LinkedList<ResultHolder> resultsSearch = new LinkedList<ResultHolder>();
        for (int i = 0; i < hits.length(); i++) {
            String graph = hits.doc(i).getValues("file")[0];
            //            int docID = -1;
            //            for (int j = 0; j < ir.numDocs(); j++) {
            //                Graph model = new Graph(ir.document(j).getValues("graph")[0]);
            //                if (model.toString().equals(graph)) docID = j;
            //            }
            resultsSearch.add(new ResultHolder(hits.score(i), graph));
        }
        Collections.sort(resultsSearch);
        printPrecisionRecallPlotFileBased(resultsMcs, resultsSearch);
    }

    public static String printPrecisionRecallPlot(LinkedList<ResultHolder> mcs, LinkedList<ResultHolder> search) {
        int numLevels = 10;
        List<ResultHolder> optimal = mcs.subList(0, numLevels);
        HashSet<Integer> firstOptimalResultsDocIDs = new HashSet<Integer>(numLevels);
        for (Iterator<ResultHolder> iterator = optimal.iterator(); iterator.hasNext();) {
            ResultHolder r = iterator.next();
            firstOptimalResultsDocIDs.add(r.getDocumentNumber());
        }

        LinkedList<Integer> foundInSearch = new LinkedList<Integer>();
        int position = 1;
        for (Iterator<ResultHolder> iterator = search.iterator(); iterator.hasNext();) {
            ResultHolder r = iterator.next();
            if (firstOptimalResultsDocIDs.contains(r.getDocumentNumber())) {
                foundInSearch.add(position);
            }
            position++;
        }
        position = 1;
        StringBuilder sb1 = new StringBuilder(256);
        StringBuilder sb2 = new StringBuilder(256);
        sb1.append("precision 1 ");
        sb2.append("recall 0 ");
        for (Iterator<Integer> iterator = foundInSearch.iterator(); iterator.hasNext();) {
            Integer integer = iterator.next();
            float recall = (1f / (float) numLevels) * ((float) position);
            float precision = ((float) position) / ((float) integer);
            sb1.append(precision);
            sb1.append(" ");
            sb2.append(recall);
            sb2.append(" ");
            position++;
        }
        //        System.out.println(sb2.toString().replace('.', ','));
        System.out.println(sb1.toString().replace('.', ','));
        return "";
    }

    public static String printPrecisionRecallPlotFileBased(LinkedList<ResultHolder> mcs,
            LinkedList<ResultHolder> search) {
        int numLevels = 10;
        List<ResultHolder> optimal = mcs.subList(0, numLevels);
        HashSet<String> firstOptimalResultsDocIDs = new HashSet<String>(numLevels);
        for (Iterator<ResultHolder> iterator = optimal.iterator(); iterator.hasNext();) {
            ResultHolder r = iterator.next();
            firstOptimalResultsDocIDs.add(r.getFile());
        }

        LinkedList<Integer> foundInSearch = new LinkedList<Integer>();
        int position = 1;
        for (Iterator<ResultHolder> iterator = search.iterator(); iterator.hasNext();) {
            ResultHolder r = iterator.next();
            if (firstOptimalResultsDocIDs.contains(r.getFile())) {
                foundInSearch.add(position);
            }
            position++;
        }
        position = 1;
        StringBuilder sb1 = new StringBuilder(256);
        StringBuilder sb2 = new StringBuilder(256);
        sb1.append("precision 1 ");
        sb2.append("recall 0 ");
        for (Iterator<Integer> iterator = foundInSearch.iterator(); iterator.hasNext();) {
            Integer integer = iterator.next();
            float recall = (1f / (float) numLevels) * ((float) position);
            float precision = ((float) position) / ((float) integer);
            sb1.append(precision);
            sb1.append(" ");
            sb2.append(recall);
            sb2.append(" ");
            position++;
        }
        //        System.out.println(sb2.toString().replace('.', ','));
        System.out.println(sb1.toString().replace('.', ','));
        return "";
    }

}

class ResultHolder implements Comparable {
    private float similarity;
    private int documentNumber;
    private String graph;
    private String file;

    public ResultHolder(int documentNumber, String graph, float similarity) {
        this.documentNumber = documentNumber;
        this.graph = graph;
        this.similarity = similarity;
    }

    public ResultHolder(float similarity, String file) {
        //        this.documentNumber = documentNumber;
        this.file = file;
        this.similarity = similarity;
    }

    public String getFile() {
        return file;
    }

    public int getDocumentNumber() {
        return documentNumber;
    }

    public void setDocumentNumber(int documentNumber) {
        this.documentNumber = documentNumber;
    }

    public String getGraph() {
        return graph;
    }

    public void setGraph(String graph) {
        this.graph = graph;
    }

    public float getSimilarity() {
        return similarity;
    }

    public void setSimilarity(float similarity) {
        this.similarity = similarity;
    }

    public int compareTo(Object o) {
        if (o instanceof ResultHolder) {
            ResultHolder r = (ResultHolder) o;
            return (int) Math.signum(r.similarity - similarity);
        }
        return 0;
    }
}