io.anserini.search.SimpleSearcher.java Source code

Java tutorial

Introduction

Here is the source code for io.anserini.search.SimpleSearcher.java

Source

/**
 * Anserini: An information retrieval toolkit built on Lucene
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.anserini.search;

import io.anserini.index.generator.LuceneDocumentGenerator;
import io.anserini.util.AnalyzerUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.similarities.LMDirichletSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.FSDirectory;

import java.io.Closeable;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;

public class SimpleSearcher implements Closeable {
    private static final Logger LOG = LogManager.getLogger(SimpleSearcher.class);
    private final IndexReader reader;
    private Similarity similarity;
    private Analyzer analyzer;

    protected class Result {
        public String docid;
        public int ldocid;
        public float score;
        public String content;

        public Result(String docid, int ldocid, float score, String content) {
            this.docid = docid;
            this.ldocid = ldocid;
            this.score = score;
            this.content = content;
        }
    }

    public SimpleSearcher(String indexDir) throws IOException {
        Path indexPath = Paths.get(indexDir);

        if (!Files.exists(indexPath) || !Files.isDirectory(indexPath) || !Files.isReadable(indexPath)) {
            throw new IllegalArgumentException(indexDir + " does not exist or is not a directory.");
        }

        this.reader = DirectoryReader.open(FSDirectory.open(indexPath));
        this.similarity = new LMDirichletSimilarity(1000.0f);
        this.analyzer = new EnglishAnalyzer();
    }

    @Override
    public void close() throws IOException {
        reader.close();
    }

    public Result[] search(String q) throws IOException {
        return search(q, 10);
    }

    public Result[] search(String q, int k) throws IOException {
        IndexSearcher searcher = new IndexSearcher(reader);
        searcher.setSimilarity(similarity);
        Query query = AnalyzerUtils.buildBagOfWordsQuery(LuceneDocumentGenerator.FIELD_BODY, analyzer, q);

        TopDocs rs = searcher.search(query, k);
        ScoreDoc[] hits = rs.scoreDocs;

        Result[] results = new Result[hits.length];
        for (int i = 0; i < hits.length; i++) {
            Document doc = searcher.doc(hits[i].doc);
            String docid = doc.getField(LuceneDocumentGenerator.FIELD_ID).stringValue();
            IndexableField field = doc.getField(LuceneDocumentGenerator.FIELD_RAW);
            String content = field == null ? null : field.stringValue();
            results[i] = new Result(docid, hits[i].doc, hits[i].score, content);
        }

        return results;
    }

    public String doc(int ldocid) {
        Document doc;
        try {
            doc = reader.document(ldocid);
        } catch (IOException e) {
            return null;
        }

        IndexableField field = doc.getField(LuceneDocumentGenerator.FIELD_RAW);
        return field == null ? null : field.stringValue();
    }
}