nl.inl.blacklab.search.Hit.java Source code

Java tutorial

Introduction

Here is the source code for nl.inl.blacklab.search.Hit.java

Source

/*******************************************************************************
 * Copyright (c) 2010, 2012 Institute for Dutch Lexicology
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *******************************************************************************/
package nl.inl.blacklab.search;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.spans.Spans;

import nl.inl.blacklab.search.lucene.BLSpans;

/**
 * Class for a hit. Normally, hits are iterated over in a Lucene Spans object, but in some places,
 * it makes sense to place hits in separate objects: when caching or sorting hits, or just for
 * convenience in client code.
 *
 * This class has public members for the sake of efficiency; this makes a non-trivial difference
 * when iterating over hundreds of thousands of hits.
 */
public class Hit implements Comparable<Hit>, Cloneable {
    /**
     * Get the hit object from a Spans object.
     *
     * This method makes sure Hit objects aren't reinstantiated unnecessarily, as well as making
     * sure Hit subclass objects aren't squashed back into regular Hit objects.
     *
     * Subclasses of Hit should implement their own version of this function to make sure they
     * return the proper type.
     *
     * @param spans
     *            the Spans to get the Hit from
     * @return the Hit [subclass] object
     * @deprecated use BLSpans.getHit()
     */
    @Deprecated
    public static Hit getHit(BLSpans spans) {
        return spans.getHit();
    }

    /**
     * Retrieve a list of Hit objects from a Spans.
     *
     * @param spans
     *            where to retrieve the hits
     * @return the list of hits
     * @deprecated use Hits class
     */
    @Deprecated
    public static List<Hit> hitList(BLSpans spans) {
        List<Hit> result = new ArrayList<>();
        try {
            while (spans.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
                    result.add(spans.getHit());
                }
            }
            return result;
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    @Override
    public boolean equals(Object with) {
        if (this == with)
            return true;
        if (with instanceof Hit) {
            Hit o = (Hit) with;
            return doc == o.doc && start == o.start && end == o.end;
        }
        return false;
    }

    @Override
    public int compareTo(Hit o) {
        if (this == o)
            return 0;
        if (doc == o.doc) {
            if (start == o.start) {
                return end - o.end;
            }
            return start - o.start;
        }
        return doc - o.doc;
    }

    /** The Lucene doc this hits occurs in */
    public int doc;

    /** End of this hit's span (in word positions).
     *
     *  Note that this actually points to the first word not in the hit (just like Spans).
     */
    public int end;

    /** Start of this hit's span (in word positions) */
    public int start;

    //   /** Context information */
    //   public int[] context;
    //
    //   /** Where in the context array the hit text starts */
    //   public int contextHitStart;
    //
    //   /** Where in the context array the right context starts */
    //   public int contextRightStart;
    //
    //   /** How many words one context takes up (context[] may contain multiple contexts) */
    //   public int contextLength;

    /**
     * Construct a hit object
     *
     * @param doc
     *            the document
     * @param start
     *            start of the hit (word positions)
     * @param end
     *            end of the hit (word positions)
     */
    public Hit(int doc, int start, int end) {
        this.doc = doc;
        this.start = start;
        this.end = end;
    }

    @Override
    public String toString() {
        return String.format("doc %d, words %d-%d", doc, start, end);
    }

    @Override
    public int hashCode() {
        return (doc * 17 + start) * 31 + end;
    }

    @Override
    protected Object clone() {
        Hit hit = new Hit(doc, start, end);
        //      hit.context = context;
        //      hit.contextHitStart = contextHitStart;
        //      hit.contextRightStart = contextRightStart;
        //      hit.contextLength = contextLength;
        return hit;
    }

    public Collection<byte[]> getPayload() {
        // FIXME: option to store payload in Hit, probably using subclass
        return null;
    }

    public boolean isPayloadAvailable() {
        return false;
    }

}