List of usage examples for org.apache.lucene.index FieldInvertState getNumOverlap
public int getNumOverlap()
positionIncrement == 0. From source file:BM25LSimilarity.java
License:Apache License
@Override public final long computeNorm(FieldInvertState state) { final int numTerms = discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength(); return encodeNormValue(state.getBoost(), numTerms); }
From source file:com.core.nlp.similarity.DefaultSimilarity.java
License:Apache License
/** * Implemented as/*w w w .java 2 s .c om*/ * <code>state.getBoost()*lengthNorm(numTerms)</code>, where * <code>numTerms</code> is {@link FieldInvertState#getLength()} if {@link * #setDiscountOverlaps} is false, else it's {@link * FieldInvertState#getLength()} - {@link * FieldInvertState#getNumOverlap()}. * * @lucene.experimental */ @Override public float lengthNorm(FieldInvertState state) { final int numTerms; if (discountOverlaps) numTerms = state.getLength() - state.getNumOverlap(); else numTerms = state.getLength(); return state.getBoost() * ((float) (1.0 / Math.sqrt(numTerms))); }
From source file:com.jaeksoft.searchlib.schema.FairSimilarity.java
License:Open Source License
@Override final public float computeNorm(String field, FieldInvertState state) { final int numTerms; if (discountOverlaps) numTerms = state.getLength() - state.getNumOverlap(); else// w w w . ja va 2 s . c o m numTerms = state.getLength(); return state.getBoost() * ((float) (1.0 / numTerms)); }
From source file:elhuyar.bilakit.SimilarityCLIRFactory.java
License:Open Source License
/** Implemented as * <code>state.getBoost()*lengthNorm(numTerms)</code>, where * <code>numTerms</code> is {@link FieldInvertState#getLength()} if {@link * #setDiscountOverlaps} is false, else it's {@link * FieldInvertState#getLength()} - {@link * FieldInvertState#getNumOverlap()}.//from w w w . jav a 2s.c o m * * @lucene.experimental */ public float lengthNorm(FieldInvertState state) { final int numTerms; if (discountOverlaps) numTerms = state.getLength() - state.getNumOverlap(); else numTerms = state.getLength(); return state.getBoost() * ((float) (1.0 / Math.sqrt(numTerms))); }
From source file:eu.europeana.ranking.bm25f.similarity.BM25FSimilarity.java
License:Apache License
@Override public final void computeNorm(FieldInvertState state, Norm norm) { final int numTerms = discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength(); norm.setByte(encodeNormValue(state.getBoost(), numTerms)); }
From source file:io.anserini.search.similarity.F2LogSimilarity.java
License:Apache License
@Override public long computeNorm(FieldInvertState state) { final int numTerms = discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength(); return encodeNormValue(state.getBoost(), numTerms); }
From source file:main.BM25VASimilarity.java
License:Apache License
@Override public final long computeNorm(FieldInvertState state) { final int numTerms = discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength(); state.getUniqueTermCount();//from ww w. j a v a 2s . c o m //System.out.println(state.getName()); //System.out.println(state.getUniqueTermCount()); return encodeNormValue(state.getBoost(), numTerms); }
From source file:org.archive.jbs.lucene.WebSimilarity.java
License:Apache License
/** Normalize field by length. Called at index time. */ public float computeNorm(String fieldName, FieldInvertState state) { int numTokens = state.getLength(); if ("url".equals(fieldName)) { // URL: prefer short by using linear normalization return 1.0f / numTokens; } else if ("content".equals(fieldName)) { // Content: penalize short, by treating short as longer // TODO: Is creating a new FieldInvertState object good, or // should we modify the existing one? return super.computeNorm(fieldName, new FieldInvertState(state.getPosition(), Math.max(numTokens, MIN_CONTENT_LENGTH), state.getNumOverlap(), state.getOffset(), state.getBoost())); } else {//from w ww. j a v a 2s .c o m // use default return super.computeNorm(fieldName, state); } }
From source file:org.elasticsearch.index.similarity.ScriptedSimilarity.java
License:Apache License
@Override public long computeNorm(FieldInvertState state) { final int numTerms = discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength(); return SmallFloat.intToByte4(numTerms); }
From source file:org.opencms.search.CmsSearchSimilarity.java
License:Open Source License
/** * Special implementation for "compute norm" to reduce the significance of this factor * for the <code>{@link CmsSearchField#FIELD_CONTENT}</code> field, while * keeping the Lucene default for all other fields.<p> * /* w w w. j a va 2 s.c om*/ * @see org.apache.lucene.search.DefaultSimilarity#computeNorm(java.lang.String, org.apache.lucene.index.FieldInvertState) */ @Override public float computeNorm(String fieldName, FieldInvertState state) { if (fieldName.equals(CmsSearchField.FIELD_CONTENT)) { final int numTerms = state.getLength() - state.getNumOverlap(); // special length norm for content return (float) (3.0 / (Math.log(1000 + numTerms) / LOG10)); } // all other fields use the default Lucene implementation return super.computeNorm(fieldName, state); }