Example usage for org.apache.lucene.util BytesRef BytesRef

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef BytesRef.

Prototype

public BytesRef(CharSequence text)

Source Link

Document

Initialize the byte[] from the UTF8 bytes for the provided String.

Usage

From source file:com.meizu.nlp.classification.KNearestNeighborClassifier.java

License:Apache License

private List<ClassificationResult<BytesRef>> buildListFromTopDocs(TopDocs topDocs) throws IOException {
    Map<BytesRef, Integer> classCounts = new HashMap<>();
    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
        BytesRef cl = new BytesRef(indexSearcher.doc(scoreDoc.doc).getField(classFieldName).stringValue());
        Integer count = classCounts.get(cl);
        if (count != null) {
            classCounts.put(cl, count + 1);
        } else {/*from   w  w  w .  j  a  v a2  s .c  o m*/
            classCounts.put(cl, 1);
        }
    }
    List<ClassificationResult<BytesRef>> returnList = new ArrayList<>();
    int sumdoc = 0;
    for (Map.Entry<BytesRef, Integer> entry : classCounts.entrySet()) {
        Integer count = entry.getValue();
        returnList.add(new ClassificationResult<>(entry.getKey().clone(), count / (double) k));
        sumdoc += count;

    }

    //correction
    if (sumdoc < k) {
        for (ClassificationResult<BytesRef> cr : returnList) {
            cr.setScore(cr.getScore() * (double) k / (double) sumdoc);
        }
    }
    return returnList;
}

From source file:com.o19s.solr.swan.nodes.SwanRangeNode.java

License:Apache License

private Query setSwanRangeBounds(SwanRangeBounds bounds, Operation operation, String value) {
    if (value == null || operation == null)
        return null;

    switch (operation) {
    case LESS_THAN: // @key < value
        bounds.inc_upper = false;//from  www . java2 s. c  o  m
        bounds.ref_upper = new BytesRef(value);
        break;
    case LESS_THAN_EQUAL: // @key <= value
        bounds.inc_upper = true;
        bounds.ref_upper = new BytesRef(value);
        break;
    case GREATER_THAN:
        bounds.inc_lower = false;
        bounds.ref_lower = new BytesRef(value);
        break;
    case GREATER_THAN_EQUAL:
        bounds.inc_lower = true;
        bounds.ref_lower = new BytesRef(value);
        break;
    case EQUAL:
        SwanTermNode termNode = new SwanTermNode(value);
        termNode.setField(_field);
        termNode.setSchema(schema);
        return termNode.getQuery();
    case NOT_EQUAL:
        SwanOrOperationNode node = new SwanOrOperationNode(new SwanRangeNode(_field, "<", value),
                new SwanRangeNode(_field, ">", value));
        node.setSchema(schema);
        return node.getQuery();

    default:
        break;
    }

    return null;
}

From source file:com.quentinxxz.lucene.kv.IntPayloadTokenizer.java

License:Open Source License

@Override
public final boolean incrementToken() throws IOException {
    clearAttributes();/*  w w  w  .j  av a 2 s . co m*/
    int length = 0;
    int start = -1; // this variable is always initialized
    char[] buffer = termAtt.buffer();
    while (true) {
        if (bufferIndex >= dataLen) {
            offset += dataLen;
            charUtils.fill(ioBuffer, input); // read supplementary char aware with CharacterUtils
            if (ioBuffer.getLength() == 0) {
                dataLen = 0; // so next offset += dataLen won't decrement offset
                if (length > 0) {
                    break;
                } else {
                    return false;
                }
            }
            dataLen = ioBuffer.getLength();
            bufferIndex = 0;
        }
        // use CharacterUtils here to support < 3.1 UTF-16 code unit behavior if the char based methods are gone
        final int c = charUtils.codePointAt(ioBuffer.getBuffer(), bufferIndex, ioBuffer.getLength());
        final int charCount = Character.charCount(c);
        bufferIndex += charCount;

        if (isTokenChar(c)) { // if it's a token char
            if (length == 0) { // start of token
                assert start == -1;
                start = offset + bufferIndex - charCount;
            } else if (length >= buffer.length - 1) { // check if a supplementary could run out of bounds
                buffer = termAtt.resizeBuffer(2 + length); // make sure a supplementary fits in the buffer
            }
            length += Character.toChars(normalize(c), buffer, length); // buffer it, normalized
            if (length >= MAX_WORD_LEN) // buffer overflow! make sure to check for >= surrogate pair could break ==
                                        // test
                break;
        } else if (length > 0) // at non-Letter w/ chars
            break; // return 'em
    }
    String mayTerm = new String(buffer, 0, length);
    int index = mayTerm.indexOf(weightSplitChar);
    String label = mayTerm;
    int actualLength = length;
    int actualWeight = 0; // 0
    if (index != -1) {
        label = mayTerm.substring(0, index);
        actualLength = index;
        actualWeight = Integer.parseInt(mayTerm.substring(index + 1));
    }
    assert start != -1;

    if (labels.contains(label)) {
        return incrementToken();
    } else {
        labels.add(label);
    }

    termAtt.setLength(actualLength);

    payload.setPayload(new BytesRef(Ints.toByteArray(actualWeight)));
    return true;
}

From source file:com.querydsl.lucene4.LuceneSerializer.java

License:Apache License

private BytesRef convertNumber(Number number) {
    if (Integer.class.isInstance(number) || Byte.class.isInstance(number) || Short.class.isInstance(number)) {
        BytesRef bytes = new BytesRef(NumericUtils.BUF_SIZE_INT);
        NumericUtils.intToPrefixCoded(number.intValue(), 0, bytes);
        return bytes;
    } else if (Double.class.isInstance(number) || BigDecimal.class.isInstance(number)) {
        BytesRef bytes = new BytesRef(NumericUtils.BUF_SIZE_LONG);
        long l = NumericUtils.doubleToSortableLong(number.doubleValue());
        NumericUtils.longToPrefixCoded(l, 0, bytes);
        return bytes;
    } else if (Long.class.isInstance(number) || BigInteger.class.isInstance(number)) {
        BytesRef bytes = new BytesRef(NumericUtils.BUF_SIZE_LONG);
        NumericUtils.longToPrefixCoded(number.longValue(), 0, bytes);
        return bytes;
    } else if (Float.class.isInstance(number)) {
        BytesRef bytes = new BytesRef(NumericUtils.BUF_SIZE_INT);
        int i = NumericUtils.floatToSortableInt(number.floatValue());
        NumericUtils.intToPrefixCoded(i, 0, bytes);
        return bytes;
    } else {//  w ww  . java 2s. c  o m
        throw new IllegalArgumentException("Unsupported numeric type " + number.getClass().getName());
    }
}

From source file:com.querydsl.lucene5.LuceneQueryTest.java

License:Apache License

private Document createDocument(final String docTitle, final String docAuthor, final String docText,
        final int docYear, final double docGross) {
    Document doc = new Document();
    // Reusing field for performance
    if (titleField == null) {
        titleField = new TextField("title", docTitle, Store.YES);
        doc.add(titleField);//from   w w  w  . j  a  v  a  2s.co  m
        titleSortedField = new SortedDocValuesField("title", new BytesRef(docTitle));
        doc.add(titleSortedField);
    } else {
        titleField.setStringValue(docTitle);
        titleSortedField.setBytesValue(new BytesRef(docTitle));
        doc.add(titleField);
        doc.add(titleSortedField);
    }
    if (authorField == null) {
        authorField = new TextField("author", docAuthor, Store.YES);
        doc.add(authorField);
        authorSortedField = new SortedDocValuesField("author", new BytesRef(docAuthor));
        doc.add(authorSortedField);

    } else {
        authorField.setStringValue(docAuthor);
        authorSortedField.setBytesValue(new BytesRef(docAuthor));
        doc.add(authorField);
        doc.add(authorSortedField);
    }
    if (textField == null) {
        textField = new TextField("text", docText, Store.YES);
        doc.add(textField);
        textSortedField = new SortedDocValuesField("text", new BytesRef(docText));
        doc.add(textSortedField);
    } else {
        textField.setStringValue(docText);
        textSortedField.setBytesValue(new BytesRef(docText));
        doc.add(textField);
        doc.add(textSortedField);
    }
    if (yearField == null) {
        yearField = new IntField("year", docYear, Store.YES);
        doc.add(yearField);
        yearSortedField = new NumericDocValuesField("year", docYear);
        doc.add(yearSortedField);
    } else {
        yearField.setIntValue(docYear);
        yearSortedField.setLongValue(docYear);
        doc.add(yearField);
        doc.add(yearSortedField);
    }

    if (grossField == null) {
        grossField = new DoubleField("gross", docGross, Store.YES);
        doc.add(grossField);
        grossSortedField = new DoubleDocValuesField("gross", docGross);
        doc.add(grossSortedField);
    } else {
        grossField.setDoubleValue(docGross);
        grossSortedField.setDoubleValue(docGross);
        doc.add(grossField);
        doc.add(grossSortedField);
    }

    return doc;
}

From source file:com.qwazr.search.bench.test.FullText.ShortAbstractLuceneTest.java

License:Apache License

@Override
final public void accept(final TtlLineReader lineReader, final LuceneRecord.Indexable record) {
    final BytesRef termBytesRef = new BytesRef(lineReader.subject);
    record.reset(new Term(URL, termBytesRef));
    record.document.add(new StringField(URL, termBytesRef, Field.Store.NO));
    record.document.add(new TextField(SHORT_ABSTRACT, lineReader.object, Field.Store.NO));
}

From source file:com.qwazr.search.bench.test.Merging.UpdateBinaryDocValuesTest.java

License:Apache License

@Override
protected BinaryDocValuesField getField(String id) {
    return new BinaryDocValuesField("dv", new BytesRef("dv" + id));
}

From source file:com.qwazr.search.bench.test.SortedSetFacet.SortedSetFacetLuceneTest.java

License:Apache License

@Override
final public void accept(final TtlLineReader lineReader, final LuceneRecord.Indexable record) {
    final BytesRef termBytesRef = new BytesRef(lineReader.subject);
    record.reset(new Term(URL, termBytesRef));
    record.document.add(new StringField(URL, termBytesRef, Field.Store.NO));
    record.document.add(new SortedSetDocValuesFacetField(PREDICATE, lineReader.predicate));
}

From source file:com.qwazr.search.bench.test.TaxonomyFacet.TaxonomyFacetLuceneTest.java

License:Apache License

@Override
final public void accept(final TtlLineReader lineReader, final LuceneRecord.Indexable record) {
    final BytesRef termBytesRef = new BytesRef(lineReader.subject);
    record.reset(new Term(URL, termBytesRef));
    record.document.add(new StringField(URL, termBytesRef, Field.Store.NO));
    record.document.add(new FacetField(PREDICATE, lineReader.predicate));
}

From source file:com.qwazr.search.field.BinaryDocValuesType.java

License:Apache License

@Override
final public void fillValue(final Object value, final FieldConsumer consumer) {
    consumer.accept(new BinaryDocValuesField(fieldName, new BytesRef(value.toString())));
}