Example usage for org.apache.lucene.util BytesRef utf8ToString

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef utf8ToString.

Prototype

public String utf8ToString()

Source Link

Document

Interprets stored bytes as UTF8 bytes, returning the resulting string

Usage

From source file:io.crate.operation.scalar.SubstrFunctionTest.java

License:Apache License

@Test
@SuppressWarnings("unchecked")
public void testEvaluate() throws Exception {
    final Literal<Long> startPos = Literal.newLiteral(6L);

    List<Symbol> args = Arrays.<Symbol>asList(createReference("tag", DataTypes.STRING), startPos);
    Function function = createFunction(SubstrFunction.NAME, DataTypes.STRING, args);
    Scalar<BytesRef, Object> format = (Scalar<BytesRef, Object>) functions.get(function.info().ident());

    Input<Object> arg1 = new Input<Object>() {
        @Override/*  w  w w.j  a  v  a  2  s . c  o  m*/
        public Object value() {
            return new BytesRef("cratedata");
        }
    };
    Input<Object> arg2 = new Input<Object>() {
        @Override
        public Object value() {
            return startPos.value();
        }
    };

    BytesRef result = format.evaluate(arg1, arg2);
    assertThat(result.utf8ToString(), is("data"));

    final Literal<Long> count = Literal.newLiteral(2L);

    args = Arrays.<Symbol>asList(createReference("tag", DataTypes.STRING), startPos, count);
    function = createFunction(SubstrFunction.NAME, DataTypes.STRING, args);
    format = (Scalar<BytesRef, Object>) functions.get(function.info().ident());

    Input<Object> arg3 = new Input<Object>() {
        @Override
        public Object value() {
            return count.value();
        }
    };

    result = format.evaluate(arg1, arg2, arg3);
    assertThat(result.utf8ToString(), is("da"));

}

From source file:io.crate.operation.scalar.SubstrFunctionTest.java

License:Apache License

@Test
@SuppressWarnings("unchecked")
public void testEvaluateWithArgsAsNonLiterals() throws Exception {
    List<Symbol> args = Arrays.<Symbol>asList(createReference("tag", DataTypes.STRING),
            createReference("start", DataTypes.LONG), createReference("end", DataTypes.LONG));
    Function function = createFunction(SubstrFunction.NAME, DataTypes.STRING, args);
    Scalar<BytesRef, Object> format = (Scalar<BytesRef, Object>) functions.get(function.info().ident());

    Input<Object> arg1 = new Input<Object>() {
        @Override//from  ww w. j  ava  2s.c o m
        public Object value() {
            return new BytesRef("cratedata");
        }
    };
    Input<Object> arg2 = new Input<Object>() {
        @Override
        public Object value() {
            return 1L;
        }
    };
    Input<Object> arg3 = new Input<Object>() {
        @Override
        public Object value() {
            return 5L;
        }
    };

    BytesRef result = format.evaluate(arg1, arg2, arg3);
    assertThat(result.utf8ToString(), is("crate"));
}

From source file:io.crate.operation.scalar.SubstrFunctionTest.java

License:Apache License

@Test
@SuppressWarnings("unchecked")
public void testEvaluateWithArgsAsNonLiteralsIntShort() throws Exception {
    List<Symbol> args = Arrays.<Symbol>asList(createReference("tag", DataTypes.STRING),
            createReference("start", DataTypes.INTEGER), createReference("end", DataTypes.SHORT));
    Function function = createFunction(SubstrFunction.NAME, DataTypes.STRING, args);
    Scalar<BytesRef, Object> format = (Scalar<BytesRef, Object>) functions.get(function.info().ident());

    BytesRef resultBytesRef = format.evaluate(generateInputs(new BytesRef("cratedata"), 1, 5));
    assertThat(resultBytesRef.utf8ToString(), is("crate"));

    BytesRef resultString = format.evaluate(generateInputs("cratedata", 1, 5));
    assertThat(resultString.utf8ToString(), is("crate"));
}

From source file:io.crate.planner.symbol.StringLiteral.java

License:Apache License

@Override
public Object convertValueTo(DataType type, BytesRef value) {
    if (valueType() == type) {
        return value;
    }/*w  ww  . java  2s. c  om*/
    return convertValueTo(type, value.utf8ToString());
}

From source file:io.crate.planner.symbol.TimestampLiteral.java

License:Apache License

public TimestampLiteral(BytesRef value) {
    this(value.utf8ToString());
}

From source file:io.crate.testing.BytesRefUtils.java

License:Apache License

private static String[] setToStringArray(Set<BytesRef> values) {
    String[] strings = new String[values.size()];
    int idx = 0;//from w w  w  . j a v  a2s  . c o  m
    for (BytesRef value : values) {
        strings[idx] = value == null ? null : value.utf8ToString();
        idx++;
    }
    return strings;
}

From source file:io.crate.types.IpType.java

License:Apache License

private void validate(BytesRef ip) {
    if (!isValid(ip))
        throw new IllegalArgumentException(
                "Failed to validate ip [" + ip.utf8ToString() + "], not a valid ipv4 address");
}

From source file:io.crate.types.LongType.java

License:Apache License

/**
 * parses the utf-8 encoded bytesRef argument as signed decimal {@code long}.
 * All characters in the string must be decimal digits, except the first which may be an ASCII minus sign to indicate
 * a negative value or or a plus sign to indicate a positive value.
 *
 * mostly copied from {@link Long#parseLong(String s, int radix)}
 */// w w  w  .j a v a2 s.  c  om
private long parseLong(BytesRef value) {
    assert value != null : "value must not be null";
    boolean negative = false;
    long result = 0;

    int i = 0;
    int len = value.length;
    int radix = 10;
    long limit = -Long.MAX_VALUE;
    long multmin;
    byte[] bytes = value.bytes;
    int digit;

    if (len <= 0) {
        throw new NumberFormatException(value.utf8ToString());
    }

    char firstChar = (char) bytes[i];
    if (firstChar < '0') {
        if (firstChar == '-') {
            negative = true;
            limit = Long.MIN_VALUE;
        } else if (firstChar != '+') {
            throw new NumberFormatException(value.utf8ToString());
        }

        if (len == 1) { // lone '+' or '-'
            throw new NumberFormatException(value.utf8ToString());
        }

        i++;
        ;
    }
    multmin = limit / radix;
    while (i < len) {
        digit = Character.digit((char) bytes[i], radix);
        i++;
        if (digit < 0) {
            throw new NumberFormatException(value.utf8ToString());
        }
        if (result < multmin) {
            throw new NumberFormatException(value.utf8ToString());
        }
        result *= radix;
        if (result < limit + digit) {
            throw new NumberFormatException(value.utf8ToString());
        }
        result -= digit;
    }
    return negative ? result : -result;
}

From source file:it.cnr.isti.hpc.dexter.lucene.LuceneHelper.java

License:Apache License

/**
 * Returns the cosine similarity between two documents
 * //from  w  ww.  j  a v  a2 s.c  o m
 * @param x
 *            - the WikiId of the first document
 * @param y
 *            - the WikiId of the first document
 * @param field
 *            - the field on which to compute the similarity
 * 
 * @return a double between 0 (not similar) and 1 (same content),
 *         representing the similarity between the 2 documents
 */
public double getCosineSimilarity(int x, int y, String field) {

    IndexReader reader = getReader();
    Terms tfvX = null;
    Terms tfvY = null;
    try {
        tfvX = reader.getTermVector(getLuceneId(x), field);
        tfvY = reader.getTermVector(getLuceneId(y), field);

        // try {
        // tfvX = reader.document(idX).getBinaryValue("asd")
        // getTermFreqVectors(idX);
        // tfvY = reader.getTermFreqVectors(idY);
    } catch (IOException e) {
        logger.error("computing cosine similarity ({}) ", e.toString());
        System.exit(-1);
    }

    Map<String, Integer> xfrequencies = new HashMap<String, Integer>();
    Map<String, Integer> yfrequencies = new HashMap<String, Integer>();
    TermsEnum xtermsEnum = null;
    try {
        xtermsEnum = tfvX.iterator(null);

        BytesRef text;

        while ((text = xtermsEnum.next()) != null) {
            String term = text.utf8ToString();
            int freq = (int) xtermsEnum.totalTermFreq();
            xfrequencies.put(term, freq);
        }

        TermsEnum ytermsEnum = tfvY.iterator(null);
        while ((text = ytermsEnum.next()) != null) {
            String term = text.utf8ToString();
            int freq = (int) ytermsEnum.totalTermFreq();
            yfrequencies.put(term, freq);
        }

    } catch (IOException e) {
        logger.error("computing cosine similarity ({}) ", e.toString());
        System.exit(-1);
    }
    Map<String, Double> xTfidf = new HashMap<String, Double>();
    Map<String, Double> yTfidf = new HashMap<String, Double>();
    double xnorm = tfidfVector(xTfidf, xfrequencies, field);
    double ynorm = tfidfVector(yTfidf, yfrequencies, field);

    double dotproduct = 0;

    for (Map.Entry<String, Double> k : xTfidf.entrySet()) {
        if (yTfidf.containsKey(k.getKey())) {
            logger.info("key {}", k.getKey());
            logger.info("key x {} y {} ", k.getValue(), yTfidf.get(k.getKey()));
            dotproduct += k.getValue() * yTfidf.get(k.getKey());
            logger.info("dotproduct {} ", dotproduct);
        }

    }
    return dotproduct / (xnorm * ynorm);

}

From source file:lab_mri.RocchioExpander.java

private List<Entry<String, Float>> getTermScoreList(List<SearchResult> results)
        throws CorruptIndexException, IOException {
    Map<String, Float> termScoreMap = new HashMap<>();
    try (IndexReader idxreader = IndexReader
            .open(FSDirectory.open(new File("/home/luigi/NetBeansProjects/LAB_mri/inv_index")))) {
        int docsnum = idxreader.numDocs();

        for (SearchResult res : results) {

            Terms termVector = idxreader.getTermVector(Integer.parseInt(res.getId()) - 1, field); // index starts from zero 

            TermsEnum itr = null;//from   ww  w .j  a  v a2  s  .  c o  m
            if (termVector != null) {
                itr = termVector.iterator(null);
                BytesRef term = null;

                while ((term = itr.next()) != null) {
                    String termTxt = term.utf8ToString();
                    double tf = itr.totalTermFreq();
                    double df = idxreader.docFreq(new Term("abst", term));
                    float idf = (float) Math.log(docsnum / df);
                    float tfidf = (float) (tf * idf);
                    termScoreMap.put(termTxt, beta * tfidf);

                }
            }

        }
        return new ArrayList<Entry<String, Float>>(termScoreMap.entrySet());
    }

}