org.apache.lucene.benchmark.quality.trec.TrecJudge.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.lucene.benchmark.quality.trec.TrecJudge.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.benchmark.quality.trec;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.StringTokenizer;

import org.apache.lucene.benchmark.quality.Judge;
import org.apache.lucene.benchmark.quality.QualityQuery;

/**
 * Judge if given document is relevant to given quality query, based on Trec format for judgements.
 */
public class TrecJudge implements Judge {

    HashMap<String, QRelJudgement> judgements;

    /**
     * Constructor from a reader.
     * <p>
     * Expected input format:
     * <pre>
     *     qnum  0   doc-name     is-relevant
     * </pre> 
     * Two sample lines:
     * <pre> 
     *     19    0   doc303       1
     *     19    0   doc7295      0
     * </pre> 
     * @param reader where judgements are read from.
     * @throws IOException If there is a low-level I/O error.
     */
    public TrecJudge(BufferedReader reader) throws IOException {
        judgements = new HashMap<>();
        QRelJudgement curr = null;
        String zero = "0";
        String line;

        try {
            while (null != (line = reader.readLine())) {
                line = line.trim();
                if (line.length() == 0 || '#' == line.charAt(0)) {
                    continue;
                }
                StringTokenizer st = new StringTokenizer(line);
                String queryID = st.nextToken();
                st.nextToken();
                String docName = st.nextToken();
                boolean relevant = !zero.equals(st.nextToken());
                assert !st.hasMoreTokens() : "wrong format: " + line + "  next: " + st.nextToken();
                if (relevant) { // only keep relevant docs
                    if (curr == null || !curr.queryID.equals(queryID)) {
                        curr = judgements.get(queryID);
                        if (curr == null) {
                            curr = new QRelJudgement(queryID);
                            judgements.put(queryID, curr);
                        }
                    }
                    curr.addRelevandDoc(docName);
                }
            }
        } finally {
            reader.close();
        }
    }

    // inherit javadocs
    @Override
    public boolean isRelevant(String docName, QualityQuery query) {
        QRelJudgement qrj = judgements.get(query.getQueryID());
        return qrj != null && qrj.isRelevant(docName);
    }

    /** single Judgement of a trec quality query */
    private static class QRelJudgement {
        private String queryID;
        private HashMap<String, String> relevantDocs;

        QRelJudgement(String queryID) {
            this.queryID = queryID;
            relevantDocs = new HashMap<>();
        }

        public void addRelevandDoc(String docName) {
            relevantDocs.put(docName, docName);
        }

        boolean isRelevant(String docName) {
            return relevantDocs.containsKey(docName);
        }

        public int maxRecall() {
            return relevantDocs.size();
        }
    }

    // inherit javadocs
    @Override
    public boolean validateData(QualityQuery[] qq, PrintWriter logger) {
        HashMap<String, QRelJudgement> missingQueries = new HashMap<>(judgements);
        ArrayList<String> missingJudgements = new ArrayList<>();
        for (int i = 0; i < qq.length; i++) {
            String id = qq[i].getQueryID();
            if (missingQueries.containsKey(id)) {
                missingQueries.remove(id);
            } else {
                missingJudgements.add(id);
            }
        }
        boolean isValid = true;
        if (missingJudgements.size() > 0) {
            isValid = false;
            if (logger != null) {
                logger.println("WARNING: " + missingJudgements.size() + " queries have no judgments! - ");
                for (int i = 0; i < missingJudgements.size(); i++) {
                    logger.println("   " + missingJudgements.get(i));
                }
            }
        }
        if (missingQueries.size() > 0) {
            isValid = false;
            if (logger != null) {
                logger.println("WARNING: " + missingQueries.size() + " judgments match no query! - ");
                for (final String id : missingQueries.keySet()) {
                    logger.println("   " + id);
                }
            }
        }
        return isValid;
    }

    // inherit javadocs
    @Override
    public int maxRecall(QualityQuery query) {
        QRelJudgement qrj = judgements.get(query.getQueryID());
        if (qrj != null) {
            return qrj.maxRecall();
        }
        return 0;
    }
}