Example usage for org.apache.lucene.index FieldInvertState getNumOverlap

List of usage examples for org.apache.lucene.index FieldInvertState getNumOverlap

Introduction

In this page you can find the example usage for org.apache.lucene.index FieldInvertState getNumOverlap.

Prototype

public int getNumOverlap() 

Source Link

Document

Get the number of terms with positionIncrement == 0.

Usage

From source file:BM25LSimilarity.java

License:Apache License

@Override
public final long computeNorm(FieldInvertState state) {
    final int numTerms = discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength();
    return encodeNormValue(state.getBoost(), numTerms);
}

From source file:com.core.nlp.similarity.DefaultSimilarity.java

License:Apache License

/**
 * Implemented as/*w w w .java 2 s .c om*/
 * <code>state.getBoost()*lengthNorm(numTerms)</code>, where
 * <code>numTerms</code> is {@link FieldInvertState#getLength()} if {@link
 * #setDiscountOverlaps} is false, else it's {@link
 * FieldInvertState#getLength()} - {@link
 * FieldInvertState#getNumOverlap()}.
 *
 * @lucene.experimental
 */
@Override
public float lengthNorm(FieldInvertState state) {
    final int numTerms;
    if (discountOverlaps)
        numTerms = state.getLength() - state.getNumOverlap();
    else
        numTerms = state.getLength();
    return state.getBoost() * ((float) (1.0 / Math.sqrt(numTerms)));
}

From source file:com.jaeksoft.searchlib.schema.FairSimilarity.java

License:Open Source License

@Override
final public float computeNorm(String field, FieldInvertState state) {
    final int numTerms;
    if (discountOverlaps)
        numTerms = state.getLength() - state.getNumOverlap();
    else// w w  w .  ja  va 2  s .  c  o m
        numTerms = state.getLength();
    return state.getBoost() * ((float) (1.0 / numTerms));
}

From source file:elhuyar.bilakit.SimilarityCLIRFactory.java

License:Open Source License

/** Implemented as
 *  <code>state.getBoost()*lengthNorm(numTerms)</code>, where
 *  <code>numTerms</code> is {@link FieldInvertState#getLength()} if {@link
 *  #setDiscountOverlaps} is false, else it's {@link
 *  FieldInvertState#getLength()} - {@link
 *  FieldInvertState#getNumOverlap()}.//from w  w w . jav  a  2s.c o  m
 *
 *  @lucene.experimental */
public float lengthNorm(FieldInvertState state) {
    final int numTerms;
    if (discountOverlaps)
        numTerms = state.getLength() - state.getNumOverlap();
    else
        numTerms = state.getLength();
    return state.getBoost() * ((float) (1.0 / Math.sqrt(numTerms)));
}

From source file:eu.europeana.ranking.bm25f.similarity.BM25FSimilarity.java

License:Apache License

@Override
public final void computeNorm(FieldInvertState state, Norm norm) {
    final int numTerms = discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength();
    norm.setByte(encodeNormValue(state.getBoost(), numTerms));
}

From source file:io.anserini.search.similarity.F2LogSimilarity.java

License:Apache License

@Override
public long computeNorm(FieldInvertState state) {
    final int numTerms = discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength();
    return encodeNormValue(state.getBoost(), numTerms);
}

From source file:main.BM25VASimilarity.java

License:Apache License

@Override
public final long computeNorm(FieldInvertState state) {
    final int numTerms = discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength();
    state.getUniqueTermCount();//from  ww  w.  j a v  a  2s  .  c  o m
    //System.out.println(state.getName());
    //System.out.println(state.getUniqueTermCount());
    return encodeNormValue(state.getBoost(), numTerms);
}

From source file:org.archive.jbs.lucene.WebSimilarity.java

License:Apache License

/** Normalize field by length.  Called at index time. */
public float computeNorm(String fieldName, FieldInvertState state) {
    int numTokens = state.getLength();

    if ("url".equals(fieldName)) {
        // URL: prefer short by using linear normalization
        return 1.0f / numTokens;

    } else if ("content".equals(fieldName)) {
        // Content: penalize short, by treating short as longer

        // TODO: Is creating a new FieldInvertState object good, or
        //       should we modify the existing one?
        return super.computeNorm(fieldName,
                new FieldInvertState(state.getPosition(), Math.max(numTokens, MIN_CONTENT_LENGTH),
                        state.getNumOverlap(), state.getOffset(), state.getBoost()));
    } else {//from  w ww.  j  a v  a 2s  .c o  m
        // use default
        return super.computeNorm(fieldName, state);
    }
}

From source file:org.elasticsearch.index.similarity.ScriptedSimilarity.java

License:Apache License

@Override
public long computeNorm(FieldInvertState state) {
    final int numTerms = discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength();
    return SmallFloat.intToByte4(numTerms);
}

From source file:org.opencms.search.CmsSearchSimilarity.java

License:Open Source License

/**
 * Special implementation for "compute norm" to reduce the significance of this factor 
 * for the <code>{@link CmsSearchField#FIELD_CONTENT}</code> field, while 
 * keeping the Lucene default for all other fields.<p>
 * /*  w w  w. j  a va  2 s.c om*/
 * @see org.apache.lucene.search.DefaultSimilarity#computeNorm(java.lang.String, org.apache.lucene.index.FieldInvertState)
 */
@Override
public float computeNorm(String fieldName, FieldInvertState state) {

    if (fieldName.equals(CmsSearchField.FIELD_CONTENT)) {
        final int numTerms = state.getLength() - state.getNumOverlap();
        // special length norm for content
        return (float) (3.0 / (Math.log(1000 + numTerms) / LOG10));
    }
    // all other fields use the default Lucene implementation
    return super.computeNorm(fieldName, state);
}