List of usage examples for org.apache.lucene.analysis.payloads PayloadHelper decodeFloat
public static final float decodeFloat(byte[] bytes, int offset)
From source file:aos.lucene.search.ext.payloads.BoostingSimilarity.java
License:Apache License
public float scorePayload(int docID, String fieldName, int start, int end, byte[] payload, int offset, int length) { if (payload != null) { return PayloadHelper.decodeFloat(payload, offset); } else {//from w w w. j a v a 2 s . c o m return 1.0F; } }
From source file:com.factweavers.elasticsearch.payloadscorefunction.PayloadScoringFunction.java
License:Apache License
@Override public double score(int docId, float subQueryScore) { indexLookup.setNextDocId(docId);/*from w w w . java2 s. c o m*/ float score = 0; int obtainedTerms = 0; try { Fields termVectors = indexLookup.termVectors(); Boolean isPayloadOrIndex = false; TermsEnum iterator = null; if (termVectors != null && termVectors.terms(field) != null && termVectors.terms(field).hasPayloads()) { isPayloadOrIndex = true; Terms fields = termVectors.terms(field); iterator = fields.iterator(null); } if (isPayloadOrIndex) { BytesRef firstElement = iterator.next(); while (firstElement != null && (obtainedTerms < values.size())) { String currentValue = firstElement.utf8ToString(); if (!values.contains(currentValue)) { //logger.info("Payload Skipping " + currentValue); firstElement = iterator.next(); continue; } else { obtainedTerms++; } //logger.info("Payload processing value is " + currentValue); DocsAndPositionsEnum docsAndPositions = iterator.docsAndPositions(null, null); docsAndPositions.nextDoc(); docsAndPositions.nextPosition(); BytesRef payload = docsAndPositions.getPayload(); if (payload != null) { score += PayloadHelper.decodeFloat(payload.bytes, payload.offset); //logger.info("Score " + score); } else { score += defaultValue; } firstElement = iterator.next(); } } else { IndexField fieldObject = indexLookup.get(field); for (String value : values) { IndexFieldTerm tokens = fieldObject.get(value, IndexLookup.FLAG_CACHE | IndexLookup.FLAG_PAYLOADS); if (fieldObject != null && tokens != null) { //logger.info("Processing docID=" + docId + " " + field // + " for " + value + " , " + tokens); if (tokens.iterator().hasNext()) { score += tokens.iterator().next().payloadAsFloat(defaultValue); } } } } } catch (IOException e) { //logger.info("Exception in Term Vectors"); e.printStackTrace(); } return new Double(score); }
From source file:edu.rpi.tw.linkipedia.search.similarity.MySimilarity.java
License:Open Source License
@Override public float scorePayload(int doc, int start, int end, BytesRef payload) { if (payload != null) { return PayloadHelper.decodeFloat(payload.bytes, payload.offset); } else/*from ww w. j a va2 s . c o m*/ return 1.0F; // return 1; }
From source file:elhuyar.bilakit.SimilarityCLIRFactory.java
License:Open Source License
public float scorePayload(int doc, int start, int end, BytesRef payload) { if (payload == null) return 1.0F; return PayloadHelper.decodeFloat(payload.bytes, payload.offset); }
From source file:org.apache.solr.util.PayloadUtils.java
License:Apache License
public static PayloadDecoder getPayloadDecoder(FieldType fieldType) { PayloadDecoder decoder = null;// ww w .j a va 2s .c o m String encoder = getPayloadEncoder(fieldType); if ("integer".equals(encoder)) { decoder = (int doc, int start, int end, BytesRef payload) -> PayloadHelper.decodeInt(payload.bytes, payload.offset); } if ("float".equals(encoder)) { decoder = (int doc, int start, int end, BytesRef payload) -> PayloadHelper.decodeFloat(payload.bytes, payload.offset); } // encoder could be "identity" at this point, in the case of DelimitedTokenFilterFactory encoder="identity" // TODO: support pluggable payload decoders? return decoder; }
From source file:org.dice.solrenhancements.morelikethis.MoreLikeThis.java
License:Apache License
/** * Adds term weights found by tokenizing text from reader into the Map words * * @param reader a source of text to be tokenized * @param termWeightMap a Map of terms and their weights * @param fieldName Used by analyzer for any special per-field analysis *///from w w w . j ava2s . c om private void addTermWeights(Reader reader, Map<String, Flt> termWeightMap, String fieldName) throws IOException { if (analyzer == null) { throw new UnsupportedOperationException( "To use MoreLikeThis without " + "term vectors, you must provide an Analyzer"); } TokenStream ts = analyzer.tokenStream(fieldName, reader); try { int tokenCount = 0; // for every token CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); PayloadAttribute payloadAttr = ts.addAttribute(PayloadAttribute.class); TypeAttribute typeAttr = ts.addAttribute(TypeAttribute.class); ts.reset(); while (ts.incrementToken()) { String word = termAtt.toString(); tokenCount++; if (tokenCount > maxNumTokensParsedPerField) { break; } if (word.trim().length() == 0) { continue; } if (isNoiseWord(word)) { continue; } BytesRef payload = payloadAttr.getPayload(); float tokenWeight = 1.0f; // 1.0 or payload if set and a payload field if (isPayloadField(fieldName) && payload != null) { tokenWeight = PayloadHelper.decodeFloat(payload.bytes, payload.offset); } // increment frequency Flt termWeight = termWeightMap.get(word); if (termWeight == null) { termWeightMap.put(word, new Flt(tokenWeight)); } else { termWeight.x += tokenWeight; } } ts.end(); } finally { IOUtils.closeWhileHandlingException(ts); } }
From source file:org.dice.solrenhancements.unsupervisedfeedback.UnsupervisedFeedback.java
License:Apache License
/** * Adds term weights found by tokenizing text from reader into the Map words * * @param r a source of text to be tokenized * @param termWeightMap a Map of terms and their weights * @param fieldName Used by analyzer for any special per-field analysis *//* ww w. j a va2s .co m*/ private void addTermWeights(Reader r, Map<String, Flt> termWeightMap, String fieldName) throws IOException { if (analyzer == null) { throw new UnsupportedOperationException( "To use MoreLikeThis without " + "term vectors, you must provide an Analyzer"); } TokenStream ts = analyzer.tokenStream(fieldName, r); try { int tokenCount = 0; // for every token CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); PayloadAttribute payloadAttr = ts.addAttribute(PayloadAttribute.class); ts.reset(); while (ts.incrementToken()) { String word = termAtt.toString(); tokenCount++; if (tokenCount > maxNumTokensParsedPerField) { break; } if (isNoiseWord(word)) { continue; } BytesRef payload = payloadAttr.getPayload(); float tokenWeight = 1.0f; // 1.0 or payload if set and a payload field if (isPayloadField(fieldName) && payload != null) { tokenWeight = PayloadHelper.decodeFloat(payload.bytes, payload.offset); } // increment frequency Flt termWeight = termWeightMap.get(word); if (termWeight == null) { termWeightMap.put(word, new Flt(tokenWeight)); } else { termWeight.x += tokenWeight; } } ts.end(); } finally { IOUtils.closeWhileHandlingException(ts); } }
From source file:org.elasticsearch.action.termvector.GetTermVectorTests.java
License:Apache License
private String createString(String[] tokens, Map<String, List<BytesRef>> payloads, int encoding, char delimiter) { String resultString = ""; ObjectIntOpenHashMap<String> payloadCounter = new ObjectIntOpenHashMap<String>(); for (String token : tokens) { if (!payloadCounter.containsKey(token)) { payloadCounter.putIfAbsent(token, 0); } else {/* w ww.j a va 2 s . c om*/ payloadCounter.put(token, payloadCounter.get(token) + 1); } resultString = resultString + token; BytesRef payload = payloads.get(token).get(payloadCounter.get(token)); if (payload.length > 0) { resultString = resultString + delimiter; switch (encoding) { case 0: { resultString = resultString + Float.toString(PayloadHelper.decodeFloat(payload.bytes, payload.offset)); break; } case 1: { resultString = resultString + Integer.toString(PayloadHelper.decodeInt(payload.bytes, payload.offset)); break; } case 2: { resultString = resultString + payload.utf8ToString(); break; } default: { throw new ElasticsearchException("unsupported encoding type"); } } } resultString = resultString + " "; } return resultString; }
From source file:org.elasticsearch.action.termvectors.GetTermVectorsIT.java
License:Apache License
private String createString(String[] tokens, Map<String, List<BytesRef>> payloads, int encoding, char delimiter) { String resultString = ""; ObjectIntHashMap<String> payloadCounter = new ObjectIntHashMap<>(); for (String token : tokens) { if (!payloadCounter.containsKey(token)) { payloadCounter.putIfAbsent(token, 0); } else {// w ww. j a v a 2 s . co m payloadCounter.put(token, payloadCounter.get(token) + 1); } resultString = resultString + token; BytesRef payload = payloads.get(token).get(payloadCounter.get(token)); if (payload.length > 0) { resultString = resultString + delimiter; switch (encoding) { case 0: { resultString = resultString + Float.toString(PayloadHelper.decodeFloat(payload.bytes, payload.offset)); break; } case 1: { resultString = resultString + Integer.toString(PayloadHelper.decodeInt(payload.bytes, payload.offset)); break; } case 2: { resultString = resultString + payload.utf8ToString(); break; } default: { throw new ElasticsearchException("unsupported encoding type"); } } } resultString = resultString + " "; } return resultString; }
From source file:org.elasticsearch.action.termvectors.GetTermVectorsTests.java
License:Apache License
private String createString(String[] tokens, Map<String, List<BytesRef>> payloads, int encoding, char delimiter) { String resultString = ""; ObjectIntOpenHashMap<String> payloadCounter = new ObjectIntOpenHashMap<>(); for (String token : tokens) { if (!payloadCounter.containsKey(token)) { payloadCounter.putIfAbsent(token, 0); } else {/*from w w w . ja va 2 s . com*/ payloadCounter.put(token, payloadCounter.get(token) + 1); } resultString = resultString + token; BytesRef payload = payloads.get(token).get(payloadCounter.get(token)); if (payload.length > 0) { resultString = resultString + delimiter; switch (encoding) { case 0: { resultString = resultString + Float.toString(PayloadHelper.decodeFloat(payload.bytes, payload.offset)); break; } case 1: { resultString = resultString + Integer.toString(PayloadHelper.decodeInt(payload.bytes, payload.offset)); break; } case 2: { resultString = resultString + payload.utf8ToString(); break; } default: { throw new ElasticsearchException("unsupported encoding type"); } } } resultString = resultString + " "; } return resultString; }