Example usage for org.apache.lucene.analysis.payloads PayloadHelper decodeFloat

List of usage examples for org.apache.lucene.analysis.payloads PayloadHelper decodeFloat

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.payloads PayloadHelper decodeFloat.

Prototype

public static final float decodeFloat(byte[] bytes, int offset) 

Source Link

Document

Decode the payload that was encoded using #encodeFloat(float) .

Usage

From source file:aos.lucene.search.ext.payloads.BoostingSimilarity.java

License:Apache License

public float scorePayload(int docID, String fieldName, int start, int end, byte[] payload, int offset,
        int length) {
    if (payload != null) {
        return PayloadHelper.decodeFloat(payload, offset);
    } else {//from  w  w w.  j  a  v a  2  s . c o m
        return 1.0F;
    }
}

From source file:com.factweavers.elasticsearch.payloadscorefunction.PayloadScoringFunction.java

License:Apache License

@Override
public double score(int docId, float subQueryScore) {
    indexLookup.setNextDocId(docId);/*from w  w  w . java2  s. c o  m*/
    float score = 0;
    int obtainedTerms = 0;
    try {
        Fields termVectors = indexLookup.termVectors();
        Boolean isPayloadOrIndex = false;
        TermsEnum iterator = null;
        if (termVectors != null && termVectors.terms(field) != null && termVectors.terms(field).hasPayloads()) {
            isPayloadOrIndex = true;
            Terms fields = termVectors.terms(field);
            iterator = fields.iterator(null);
        }

        if (isPayloadOrIndex) {
            BytesRef firstElement = iterator.next();
            while (firstElement != null && (obtainedTerms < values.size())) {
                String currentValue = firstElement.utf8ToString();
                if (!values.contains(currentValue)) {
                    //logger.info("Payload Skipping " + currentValue);
                    firstElement = iterator.next();
                    continue;
                } else {
                    obtainedTerms++;
                }
                //logger.info("Payload processing value is " + currentValue);
                DocsAndPositionsEnum docsAndPositions = iterator.docsAndPositions(null, null);
                docsAndPositions.nextDoc();
                docsAndPositions.nextPosition();
                BytesRef payload = docsAndPositions.getPayload();
                if (payload != null) {
                    score += PayloadHelper.decodeFloat(payload.bytes, payload.offset);
                    //logger.info("Score " + score);
                } else {
                    score += defaultValue;
                }
                firstElement = iterator.next();
            }
        } else {
            IndexField fieldObject = indexLookup.get(field);
            for (String value : values) {
                IndexFieldTerm tokens = fieldObject.get(value,
                        IndexLookup.FLAG_CACHE | IndexLookup.FLAG_PAYLOADS);
                if (fieldObject != null && tokens != null) {
                    //logger.info("Processing docID=" + docId + " " + field
                    //      + " for " + value + " , " + tokens);
                    if (tokens.iterator().hasNext()) {
                        score += tokens.iterator().next().payloadAsFloat(defaultValue);
                    }

                }
            }
        }
    } catch (IOException e) {
        //logger.info("Exception in Term Vectors");
        e.printStackTrace();
    }
    return new Double(score);
}

From source file:edu.rpi.tw.linkipedia.search.similarity.MySimilarity.java

License:Open Source License

@Override
public float scorePayload(int doc, int start, int end, BytesRef payload) {
    if (payload != null) {
        return PayloadHelper.decodeFloat(payload.bytes, payload.offset);
    } else/*from  ww  w. j  a va2 s  . c o m*/
        return 1.0F;

    //      return 1;
}

From source file:elhuyar.bilakit.SimilarityCLIRFactory.java

License:Open Source License

public float scorePayload(int doc, int start, int end, BytesRef payload) {
    if (payload == null)
        return 1.0F;
    return PayloadHelper.decodeFloat(payload.bytes, payload.offset);
}

From source file:org.apache.solr.util.PayloadUtils.java

License:Apache License

public static PayloadDecoder getPayloadDecoder(FieldType fieldType) {
    PayloadDecoder decoder = null;//  ww  w .j a  va 2s .c  o m

    String encoder = getPayloadEncoder(fieldType);

    if ("integer".equals(encoder)) {
        decoder = (int doc, int start, int end, BytesRef payload) -> PayloadHelper.decodeInt(payload.bytes,
                payload.offset);
    }
    if ("float".equals(encoder)) {
        decoder = (int doc, int start, int end, BytesRef payload) -> PayloadHelper.decodeFloat(payload.bytes,
                payload.offset);
    }
    // encoder could be "identity" at this point, in the case of DelimitedTokenFilterFactory encoder="identity"

    // TODO: support pluggable payload decoders?

    return decoder;
}

From source file:org.dice.solrenhancements.morelikethis.MoreLikeThis.java

License:Apache License

/**
 * Adds term weights found by tokenizing text from reader into the Map words
 *
 * @param reader a source of text to be tokenized
 * @param termWeightMap a Map of terms and their weights
 * @param fieldName Used by analyzer for any special per-field analysis
 *///from   w  w  w  .  j  ava2s .  c  om
private void addTermWeights(Reader reader, Map<String, Flt> termWeightMap, String fieldName)
        throws IOException {
    if (analyzer == null) {
        throw new UnsupportedOperationException(
                "To use MoreLikeThis without " + "term vectors, you must provide an Analyzer");
    }

    TokenStream ts = analyzer.tokenStream(fieldName, reader);
    try {
        int tokenCount = 0;
        // for every token
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        PayloadAttribute payloadAttr = ts.addAttribute(PayloadAttribute.class);
        TypeAttribute typeAttr = ts.addAttribute(TypeAttribute.class);

        ts.reset();
        while (ts.incrementToken()) {
            String word = termAtt.toString();
            tokenCount++;
            if (tokenCount > maxNumTokensParsedPerField) {
                break;
            }
            if (word.trim().length() == 0) {
                continue;
            }
            if (isNoiseWord(word)) {
                continue;
            }

            BytesRef payload = payloadAttr.getPayload();
            float tokenWeight = 1.0f; // 1.0 or payload if set and a payload field
            if (isPayloadField(fieldName) && payload != null) {
                tokenWeight = PayloadHelper.decodeFloat(payload.bytes, payload.offset);
            }
            // increment frequency
            Flt termWeight = termWeightMap.get(word);
            if (termWeight == null) {
                termWeightMap.put(word, new Flt(tokenWeight));
            } else {
                termWeight.x += tokenWeight;
            }
        }
        ts.end();
    } finally {
        IOUtils.closeWhileHandlingException(ts);
    }
}

From source file:org.dice.solrenhancements.unsupervisedfeedback.UnsupervisedFeedback.java

License:Apache License

/**
 * Adds term weights found by tokenizing text from reader into the Map words
 *
 * @param r a source of text to be tokenized
 * @param termWeightMap a Map of terms and their weights
 * @param fieldName Used by analyzer for any special per-field analysis
 *//* ww w. j a  va2s .co  m*/
private void addTermWeights(Reader r, Map<String, Flt> termWeightMap, String fieldName) throws IOException {
    if (analyzer == null) {
        throw new UnsupportedOperationException(
                "To use MoreLikeThis without " + "term vectors, you must provide an Analyzer");
    }
    TokenStream ts = analyzer.tokenStream(fieldName, r);
    try {
        int tokenCount = 0;
        // for every token
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        PayloadAttribute payloadAttr = ts.addAttribute(PayloadAttribute.class);

        ts.reset();
        while (ts.incrementToken()) {
            String word = termAtt.toString();
            tokenCount++;
            if (tokenCount > maxNumTokensParsedPerField) {
                break;
            }
            if (isNoiseWord(word)) {
                continue;
            }

            BytesRef payload = payloadAttr.getPayload();
            float tokenWeight = 1.0f; // 1.0 or payload if set and a payload field
            if (isPayloadField(fieldName) && payload != null) {
                tokenWeight = PayloadHelper.decodeFloat(payload.bytes, payload.offset);
            }
            // increment frequency
            Flt termWeight = termWeightMap.get(word);
            if (termWeight == null) {
                termWeightMap.put(word, new Flt(tokenWeight));
            } else {
                termWeight.x += tokenWeight;
            }
        }
        ts.end();
    } finally {
        IOUtils.closeWhileHandlingException(ts);
    }
}

From source file:org.elasticsearch.action.termvector.GetTermVectorTests.java

License:Apache License

private String createString(String[] tokens, Map<String, List<BytesRef>> payloads, int encoding,
        char delimiter) {
    String resultString = "";
    ObjectIntOpenHashMap<String> payloadCounter = new ObjectIntOpenHashMap<String>();
    for (String token : tokens) {
        if (!payloadCounter.containsKey(token)) {
            payloadCounter.putIfAbsent(token, 0);
        } else {/* w ww.j a  va  2 s .  c om*/
            payloadCounter.put(token, payloadCounter.get(token) + 1);
        }
        resultString = resultString + token;
        BytesRef payload = payloads.get(token).get(payloadCounter.get(token));
        if (payload.length > 0) {
            resultString = resultString + delimiter;
            switch (encoding) {
            case 0: {
                resultString = resultString
                        + Float.toString(PayloadHelper.decodeFloat(payload.bytes, payload.offset));
                break;
            }
            case 1: {
                resultString = resultString
                        + Integer.toString(PayloadHelper.decodeInt(payload.bytes, payload.offset));
                break;
            }
            case 2: {
                resultString = resultString + payload.utf8ToString();
                break;
            }
            default: {
                throw new ElasticsearchException("unsupported encoding type");
            }
            }
        }
        resultString = resultString + " ";
    }
    return resultString;
}

From source file:org.elasticsearch.action.termvectors.GetTermVectorsIT.java

License:Apache License

private String createString(String[] tokens, Map<String, List<BytesRef>> payloads, int encoding,
        char delimiter) {
    String resultString = "";
    ObjectIntHashMap<String> payloadCounter = new ObjectIntHashMap<>();
    for (String token : tokens) {
        if (!payloadCounter.containsKey(token)) {
            payloadCounter.putIfAbsent(token, 0);
        } else {//  w  ww. j a  v a 2  s  .  co  m
            payloadCounter.put(token, payloadCounter.get(token) + 1);
        }
        resultString = resultString + token;
        BytesRef payload = payloads.get(token).get(payloadCounter.get(token));
        if (payload.length > 0) {
            resultString = resultString + delimiter;
            switch (encoding) {
            case 0: {
                resultString = resultString
                        + Float.toString(PayloadHelper.decodeFloat(payload.bytes, payload.offset));
                break;
            }
            case 1: {
                resultString = resultString
                        + Integer.toString(PayloadHelper.decodeInt(payload.bytes, payload.offset));
                break;
            }
            case 2: {
                resultString = resultString + payload.utf8ToString();
                break;
            }
            default: {
                throw new ElasticsearchException("unsupported encoding type");
            }
            }
        }
        resultString = resultString + " ";
    }
    return resultString;
}

From source file:org.elasticsearch.action.termvectors.GetTermVectorsTests.java

License:Apache License

private String createString(String[] tokens, Map<String, List<BytesRef>> payloads, int encoding,
        char delimiter) {
    String resultString = "";
    ObjectIntOpenHashMap<String> payloadCounter = new ObjectIntOpenHashMap<>();
    for (String token : tokens) {
        if (!payloadCounter.containsKey(token)) {
            payloadCounter.putIfAbsent(token, 0);
        } else {/*from w w  w .  ja  va 2 s .  com*/
            payloadCounter.put(token, payloadCounter.get(token) + 1);
        }
        resultString = resultString + token;
        BytesRef payload = payloads.get(token).get(payloadCounter.get(token));
        if (payload.length > 0) {
            resultString = resultString + delimiter;
            switch (encoding) {
            case 0: {
                resultString = resultString
                        + Float.toString(PayloadHelper.decodeFloat(payload.bytes, payload.offset));
                break;
            }
            case 1: {
                resultString = resultString
                        + Integer.toString(PayloadHelper.decodeInt(payload.bytes, payload.offset));
                break;
            }
            case 2: {
                resultString = resultString + payload.utf8ToString();
                break;
            }
            default: {
                throw new ElasticsearchException("unsupported encoding type");
            }
            }
        }
        resultString = resultString + " ";
    }
    return resultString;
}