Example usage for org.apache.lucene.util BytesRef BytesRef

List of usage examples for org.apache.lucene.util BytesRef BytesRef

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef BytesRef.

Prototype

public BytesRef(CharSequence text) 

Source Link

Document

Initialize the byte[] from the UTF8 bytes for the provided String.

Usage

From source file:com.meizu.nlp.classification.KNearestNeighborClassifier.java

License:Apache License

private List<ClassificationResult<BytesRef>> buildListFromTopDocs(TopDocs topDocs) throws IOException {
    Map<BytesRef, Integer> classCounts = new HashMap<>();
    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
        BytesRef cl = new BytesRef(indexSearcher.doc(scoreDoc.doc).getField(classFieldName).stringValue());
        Integer count = classCounts.get(cl);
        if (count != null) {
            classCounts.put(cl, count + 1);
        } else {/*from   w  w  w .  j  a  v a2  s .c  o m*/
            classCounts.put(cl, 1);
        }
    }
    List<ClassificationResult<BytesRef>> returnList = new ArrayList<>();
    int sumdoc = 0;
    for (Map.Entry<BytesRef, Integer> entry : classCounts.entrySet()) {
        Integer count = entry.getValue();
        returnList.add(new ClassificationResult<>(entry.getKey().clone(), count / (double) k));
        sumdoc += count;

    }

    //correction
    if (sumdoc < k) {
        for (ClassificationResult<BytesRef> cr : returnList) {
            cr.setScore(cr.getScore() * (double) k / (double) sumdoc);
        }
    }
    return returnList;
}

From source file:com.o19s.solr.swan.nodes.SwanRangeNode.java

License:Apache License

private Query setSwanRangeBounds(SwanRangeBounds bounds, Operation operation, String value) {
    if (value == null || operation == null)
        return null;

    switch (operation) {
    case LESS_THAN: // @key < value
        bounds.inc_upper = false;//from  www . java2 s. c  o  m
        bounds.ref_upper = new BytesRef(value);
        break;
    case LESS_THAN_EQUAL: // @key <= value
        bounds.inc_upper = true;
        bounds.ref_upper = new BytesRef(value);
        break;
    case GREATER_THAN:
        bounds.inc_lower = false;
        bounds.ref_lower = new BytesRef(value);
        break;
    case GREATER_THAN_EQUAL:
        bounds.inc_lower = true;
        bounds.ref_lower = new BytesRef(value);
        break;
    case EQUAL:
        SwanTermNode termNode = new SwanTermNode(value);
        termNode.setField(_field);
        termNode.setSchema(schema);
        return termNode.getQuery();
    case NOT_EQUAL:
        SwanOrOperationNode node = new SwanOrOperationNode(new SwanRangeNode(_field, "<", value),
                new SwanRangeNode(_field, ">", value));
        node.setSchema(schema);
        return node.getQuery();

    default:
        break;
    }

    return null;
}

From source file:com.quentinxxz.lucene.kv.IntPayloadTokenizer.java

License:Open Source License

@Override
public final boolean incrementToken() throws IOException {
    clearAttributes();/*  w w  w  .j  av a 2 s . co m*/
    int length = 0;
    int start = -1; // this variable is always initialized
    char[] buffer = termAtt.buffer();
    while (true) {
        if (bufferIndex >= dataLen) {
            offset += dataLen;
            charUtils.fill(ioBuffer, input); // read supplementary char aware with CharacterUtils
            if (ioBuffer.getLength() == 0) {
                dataLen = 0; // so next offset += dataLen won't decrement offset
                if (length > 0) {
                    break;
                } else {
                    return false;
                }
            }
            dataLen = ioBuffer.getLength();
            bufferIndex = 0;
        }
        // use CharacterUtils here to support < 3.1 UTF-16 code unit behavior if the char based methods are gone
        final int c = charUtils.codePointAt(ioBuffer.getBuffer(), bufferIndex, ioBuffer.getLength());
        final int charCount = Character.charCount(c);
        bufferIndex += charCount;

        if (isTokenChar(c)) { // if it's a token char
            if (length == 0) { // start of token
                assert start == -1;
                start = offset + bufferIndex - charCount;
            } else if (length >= buffer.length - 1) { // check if a supplementary could run out of bounds
                buffer = termAtt.resizeBuffer(2 + length); // make sure a supplementary fits in the buffer
            }
            length += Character.toChars(normalize(c), buffer, length); // buffer it, normalized
            if (length >= MAX_WORD_LEN) // buffer overflow! make sure to check for >= surrogate pair could break ==
                                        // test
                break;
        } else if (length > 0) // at non-Letter w/ chars
            break; // return 'em
    }
    String mayTerm = new String(buffer, 0, length);
    int index = mayTerm.indexOf(weightSplitChar);
    String label = mayTerm;
    int actualLength = length;
    int actualWeight = 0; // 0
    if (index != -1) {
        label = mayTerm.substring(0, index);
        actualLength = index;
        actualWeight = Integer.parseInt(mayTerm.substring(index + 1));
    }
    assert start != -1;

    if (labels.contains(label)) {
        return incrementToken();
    } else {
        labels.add(label);
    }

    termAtt.setLength(actualLength);

    payload.setPayload(new BytesRef(Ints.toByteArray(actualWeight)));
    return true;
}

From source file:com.querydsl.lucene4.LuceneSerializer.java

License:Apache License

private BytesRef convertNumber(Number number) {
    if (Integer.class.isInstance(number) || Byte.class.isInstance(number) || Short.class.isInstance(number)) {
        BytesRef bytes = new BytesRef(NumericUtils.BUF_SIZE_INT);
        NumericUtils.intToPrefixCoded(number.intValue(), 0, bytes);
        return bytes;
    } else if (Double.class.isInstance(number) || BigDecimal.class.isInstance(number)) {
        BytesRef bytes = new BytesRef(NumericUtils.BUF_SIZE_LONG);
        long l = NumericUtils.doubleToSortableLong(number.doubleValue());
        NumericUtils.longToPrefixCoded(l, 0, bytes);
        return bytes;
    } else if (Long.class.isInstance(number) || BigInteger.class.isInstance(number)) {
        BytesRef bytes = new BytesRef(NumericUtils.BUF_SIZE_LONG);
        NumericUtils.longToPrefixCoded(number.longValue(), 0, bytes);
        return bytes;
    } else if (Float.class.isInstance(number)) {
        BytesRef bytes = new BytesRef(NumericUtils.BUF_SIZE_INT);
        int i = NumericUtils.floatToSortableInt(number.floatValue());
        NumericUtils.intToPrefixCoded(i, 0, bytes);
        return bytes;
    } else {//  w ww  . java 2s. c  o m
        throw new IllegalArgumentException("Unsupported numeric type " + number.getClass().getName());
    }
}

From source file:com.querydsl.lucene5.LuceneQueryTest.java

License:Apache License

private Document createDocument(final String docTitle, final String docAuthor, final String docText,
        final int docYear, final double docGross) {
    Document doc = new Document();
    // Reusing field for performance
    if (titleField == null) {
        titleField = new TextField("title", docTitle, Store.YES);
        doc.add(titleField);//from   w w  w  . j  a  v  a  2s.co  m
        titleSortedField = new SortedDocValuesField("title", new BytesRef(docTitle));
        doc.add(titleSortedField);
    } else {
        titleField.setStringValue(docTitle);
        titleSortedField.setBytesValue(new BytesRef(docTitle));
        doc.add(titleField);
        doc.add(titleSortedField);
    }
    if (authorField == null) {
        authorField = new TextField("author", docAuthor, Store.YES);
        doc.add(authorField);
        authorSortedField = new SortedDocValuesField("author", new BytesRef(docAuthor));
        doc.add(authorSortedField);

    } else {
        authorField.setStringValue(docAuthor);
        authorSortedField.setBytesValue(new BytesRef(docAuthor));
        doc.add(authorField);
        doc.add(authorSortedField);
    }
    if (textField == null) {
        textField = new TextField("text", docText, Store.YES);
        doc.add(textField);
        textSortedField = new SortedDocValuesField("text", new BytesRef(docText));
        doc.add(textSortedField);
    } else {
        textField.setStringValue(docText);
        textSortedField.setBytesValue(new BytesRef(docText));
        doc.add(textField);
        doc.add(textSortedField);
    }
    if (yearField == null) {
        yearField = new IntField("year", docYear, Store.YES);
        doc.add(yearField);
        yearSortedField = new NumericDocValuesField("year", docYear);
        doc.add(yearSortedField);
    } else {
        yearField.setIntValue(docYear);
        yearSortedField.setLongValue(docYear);
        doc.add(yearField);
        doc.add(yearSortedField);
    }

    if (grossField == null) {
        grossField = new DoubleField("gross", docGross, Store.YES);
        doc.add(grossField);
        grossSortedField = new DoubleDocValuesField("gross", docGross);
        doc.add(grossSortedField);
    } else {
        grossField.setDoubleValue(docGross);
        grossSortedField.setDoubleValue(docGross);
        doc.add(grossField);
        doc.add(grossSortedField);
    }

    return doc;
}

From source file:com.qwazr.search.bench.test.FullText.ShortAbstractLuceneTest.java

License:Apache License

@Override
final public void accept(final TtlLineReader lineReader, final LuceneRecord.Indexable record) {
    final BytesRef termBytesRef = new BytesRef(lineReader.subject);
    record.reset(new Term(URL, termBytesRef));
    record.document.add(new StringField(URL, termBytesRef, Field.Store.NO));
    record.document.add(new TextField(SHORT_ABSTRACT, lineReader.object, Field.Store.NO));
}

From source file:com.qwazr.search.bench.test.Merging.UpdateBinaryDocValuesTest.java

License:Apache License

@Override
protected BinaryDocValuesField getField(String id) {
    return new BinaryDocValuesField("dv", new BytesRef("dv" + id));
}

From source file:com.qwazr.search.bench.test.SortedSetFacet.SortedSetFacetLuceneTest.java

License:Apache License

@Override
final public void accept(final TtlLineReader lineReader, final LuceneRecord.Indexable record) {
    final BytesRef termBytesRef = new BytesRef(lineReader.subject);
    record.reset(new Term(URL, termBytesRef));
    record.document.add(new StringField(URL, termBytesRef, Field.Store.NO));
    record.document.add(new SortedSetDocValuesFacetField(PREDICATE, lineReader.predicate));
}

From source file:com.qwazr.search.bench.test.TaxonomyFacet.TaxonomyFacetLuceneTest.java

License:Apache License

@Override
final public void accept(final TtlLineReader lineReader, final LuceneRecord.Indexable record) {
    final BytesRef termBytesRef = new BytesRef(lineReader.subject);
    record.reset(new Term(URL, termBytesRef));
    record.document.add(new StringField(URL, termBytesRef, Field.Store.NO));
    record.document.add(new FacetField(PREDICATE, lineReader.predicate));
}

From source file:com.qwazr.search.field.BinaryDocValuesType.java

License:Apache License

@Override
final public void fillValue(final Object value, final FieldConsumer consumer) {
    consumer.accept(new BinaryDocValuesField(fieldName, new BytesRef(value.toString())));
}