List of usage examples for org.apache.lucene.util BytesRef utf8ToString
public String utf8ToString()
From source file:io.crate.operation.scalar.SubstrFunctionTest.java
License:Apache License
@Test @SuppressWarnings("unchecked") public void testEvaluate() throws Exception { final Literal<Long> startPos = Literal.newLiteral(6L); List<Symbol> args = Arrays.<Symbol>asList(createReference("tag", DataTypes.STRING), startPos); Function function = createFunction(SubstrFunction.NAME, DataTypes.STRING, args); Scalar<BytesRef, Object> format = (Scalar<BytesRef, Object>) functions.get(function.info().ident()); Input<Object> arg1 = new Input<Object>() { @Override/* w w w.j a v a 2 s . c o m*/ public Object value() { return new BytesRef("cratedata"); } }; Input<Object> arg2 = new Input<Object>() { @Override public Object value() { return startPos.value(); } }; BytesRef result = format.evaluate(arg1, arg2); assertThat(result.utf8ToString(), is("data")); final Literal<Long> count = Literal.newLiteral(2L); args = Arrays.<Symbol>asList(createReference("tag", DataTypes.STRING), startPos, count); function = createFunction(SubstrFunction.NAME, DataTypes.STRING, args); format = (Scalar<BytesRef, Object>) functions.get(function.info().ident()); Input<Object> arg3 = new Input<Object>() { @Override public Object value() { return count.value(); } }; result = format.evaluate(arg1, arg2, arg3); assertThat(result.utf8ToString(), is("da")); }
From source file:io.crate.operation.scalar.SubstrFunctionTest.java
License:Apache License
@Test @SuppressWarnings("unchecked") public void testEvaluateWithArgsAsNonLiterals() throws Exception { List<Symbol> args = Arrays.<Symbol>asList(createReference("tag", DataTypes.STRING), createReference("start", DataTypes.LONG), createReference("end", DataTypes.LONG)); Function function = createFunction(SubstrFunction.NAME, DataTypes.STRING, args); Scalar<BytesRef, Object> format = (Scalar<BytesRef, Object>) functions.get(function.info().ident()); Input<Object> arg1 = new Input<Object>() { @Override//from ww w. j ava 2s.c o m public Object value() { return new BytesRef("cratedata"); } }; Input<Object> arg2 = new Input<Object>() { @Override public Object value() { return 1L; } }; Input<Object> arg3 = new Input<Object>() { @Override public Object value() { return 5L; } }; BytesRef result = format.evaluate(arg1, arg2, arg3); assertThat(result.utf8ToString(), is("crate")); }
From source file:io.crate.operation.scalar.SubstrFunctionTest.java
License:Apache License
@Test @SuppressWarnings("unchecked") public void testEvaluateWithArgsAsNonLiteralsIntShort() throws Exception { List<Symbol> args = Arrays.<Symbol>asList(createReference("tag", DataTypes.STRING), createReference("start", DataTypes.INTEGER), createReference("end", DataTypes.SHORT)); Function function = createFunction(SubstrFunction.NAME, DataTypes.STRING, args); Scalar<BytesRef, Object> format = (Scalar<BytesRef, Object>) functions.get(function.info().ident()); BytesRef resultBytesRef = format.evaluate(generateInputs(new BytesRef("cratedata"), 1, 5)); assertThat(resultBytesRef.utf8ToString(), is("crate")); BytesRef resultString = format.evaluate(generateInputs("cratedata", 1, 5)); assertThat(resultString.utf8ToString(), is("crate")); }
From source file:io.crate.planner.symbol.StringLiteral.java
License:Apache License
@Override public Object convertValueTo(DataType type, BytesRef value) { if (valueType() == type) { return value; }/*w ww . java 2s. c om*/ return convertValueTo(type, value.utf8ToString()); }
From source file:io.crate.planner.symbol.TimestampLiteral.java
License:Apache License
public TimestampLiteral(BytesRef value) { this(value.utf8ToString()); }
From source file:io.crate.testing.BytesRefUtils.java
License:Apache License
private static String[] setToStringArray(Set<BytesRef> values) { String[] strings = new String[values.size()]; int idx = 0;//from w w w . j a v a2s . c o m for (BytesRef value : values) { strings[idx] = value == null ? null : value.utf8ToString(); idx++; } return strings; }
From source file:io.crate.types.IpType.java
License:Apache License
private void validate(BytesRef ip) { if (!isValid(ip)) throw new IllegalArgumentException( "Failed to validate ip [" + ip.utf8ToString() + "], not a valid ipv4 address"); }
From source file:io.crate.types.LongType.java
License:Apache License
/** * parses the utf-8 encoded bytesRef argument as signed decimal {@code long}. * All characters in the string must be decimal digits, except the first which may be an ASCII minus sign to indicate * a negative value or or a plus sign to indicate a positive value. * * mostly copied from {@link Long#parseLong(String s, int radix)} */// w w w .j a v a2 s. c om private long parseLong(BytesRef value) { assert value != null : "value must not be null"; boolean negative = false; long result = 0; int i = 0; int len = value.length; int radix = 10; long limit = -Long.MAX_VALUE; long multmin; byte[] bytes = value.bytes; int digit; if (len <= 0) { throw new NumberFormatException(value.utf8ToString()); } char firstChar = (char) bytes[i]; if (firstChar < '0') { if (firstChar == '-') { negative = true; limit = Long.MIN_VALUE; } else if (firstChar != '+') { throw new NumberFormatException(value.utf8ToString()); } if (len == 1) { // lone '+' or '-' throw new NumberFormatException(value.utf8ToString()); } i++; ; } multmin = limit / radix; while (i < len) { digit = Character.digit((char) bytes[i], radix); i++; if (digit < 0) { throw new NumberFormatException(value.utf8ToString()); } if (result < multmin) { throw new NumberFormatException(value.utf8ToString()); } result *= radix; if (result < limit + digit) { throw new NumberFormatException(value.utf8ToString()); } result -= digit; } return negative ? result : -result; }
From source file:it.cnr.isti.hpc.dexter.lucene.LuceneHelper.java
License:Apache License
/** * Returns the cosine similarity between two documents * //from w ww. j a v a2 s.c o m * @param x * - the WikiId of the first document * @param y * - the WikiId of the first document * @param field * - the field on which to compute the similarity * * @return a double between 0 (not similar) and 1 (same content), * representing the similarity between the 2 documents */ public double getCosineSimilarity(int x, int y, String field) { IndexReader reader = getReader(); Terms tfvX = null; Terms tfvY = null; try { tfvX = reader.getTermVector(getLuceneId(x), field); tfvY = reader.getTermVector(getLuceneId(y), field); // try { // tfvX = reader.document(idX).getBinaryValue("asd") // getTermFreqVectors(idX); // tfvY = reader.getTermFreqVectors(idY); } catch (IOException e) { logger.error("computing cosine similarity ({}) ", e.toString()); System.exit(-1); } Map<String, Integer> xfrequencies = new HashMap<String, Integer>(); Map<String, Integer> yfrequencies = new HashMap<String, Integer>(); TermsEnum xtermsEnum = null; try { xtermsEnum = tfvX.iterator(null); BytesRef text; while ((text = xtermsEnum.next()) != null) { String term = text.utf8ToString(); int freq = (int) xtermsEnum.totalTermFreq(); xfrequencies.put(term, freq); } TermsEnum ytermsEnum = tfvY.iterator(null); while ((text = ytermsEnum.next()) != null) { String term = text.utf8ToString(); int freq = (int) ytermsEnum.totalTermFreq(); yfrequencies.put(term, freq); } } catch (IOException e) { logger.error("computing cosine similarity ({}) ", e.toString()); System.exit(-1); } Map<String, Double> xTfidf = new HashMap<String, Double>(); Map<String, Double> yTfidf = new HashMap<String, Double>(); double xnorm = tfidfVector(xTfidf, xfrequencies, field); double ynorm = tfidfVector(yTfidf, yfrequencies, field); double dotproduct = 0; for (Map.Entry<String, Double> k : xTfidf.entrySet()) { if (yTfidf.containsKey(k.getKey())) { logger.info("key {}", k.getKey()); logger.info("key x {} y {} ", k.getValue(), yTfidf.get(k.getKey())); dotproduct += k.getValue() * yTfidf.get(k.getKey()); logger.info("dotproduct {} ", dotproduct); } } return dotproduct / (xnorm * ynorm); }
From source file:lab_mri.RocchioExpander.java
private List<Entry<String, Float>> getTermScoreList(List<SearchResult> results) throws CorruptIndexException, IOException { Map<String, Float> termScoreMap = new HashMap<>(); try (IndexReader idxreader = IndexReader .open(FSDirectory.open(new File("/home/luigi/NetBeansProjects/LAB_mri/inv_index")))) { int docsnum = idxreader.numDocs(); for (SearchResult res : results) { Terms termVector = idxreader.getTermVector(Integer.parseInt(res.getId()) - 1, field); // index starts from zero TermsEnum itr = null;//from ww w .j a v a2 s . c o m if (termVector != null) { itr = termVector.iterator(null); BytesRef term = null; while ((term = itr.next()) != null) { String termTxt = term.utf8ToString(); double tf = itr.totalTermFreq(); double df = idxreader.docFreq(new Term("abst", term)); float idf = (float) Math.log(docsnum / df); float tfidf = (float) (tf * idf); termScoreMap.put(termTxt, beta * tfidf); } } } return new ArrayList<Entry<String, Float>>(termScoreMap.entrySet()); } }