List of usage examples for org.apache.lucene.util BytesRef BytesRef
public BytesRef(CharSequence text)
From source file:com.meizu.nlp.classification.KNearestNeighborClassifier.java
License:Apache License
private List<ClassificationResult<BytesRef>> buildListFromTopDocs(TopDocs topDocs) throws IOException { Map<BytesRef, Integer> classCounts = new HashMap<>(); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { BytesRef cl = new BytesRef(indexSearcher.doc(scoreDoc.doc).getField(classFieldName).stringValue()); Integer count = classCounts.get(cl); if (count != null) { classCounts.put(cl, count + 1); } else {/*from w w w . j a v a2 s .c o m*/ classCounts.put(cl, 1); } } List<ClassificationResult<BytesRef>> returnList = new ArrayList<>(); int sumdoc = 0; for (Map.Entry<BytesRef, Integer> entry : classCounts.entrySet()) { Integer count = entry.getValue(); returnList.add(new ClassificationResult<>(entry.getKey().clone(), count / (double) k)); sumdoc += count; } //correction if (sumdoc < k) { for (ClassificationResult<BytesRef> cr : returnList) { cr.setScore(cr.getScore() * (double) k / (double) sumdoc); } } return returnList; }
From source file:com.o19s.solr.swan.nodes.SwanRangeNode.java
License:Apache License
private Query setSwanRangeBounds(SwanRangeBounds bounds, Operation operation, String value) { if (value == null || operation == null) return null; switch (operation) { case LESS_THAN: // @key < value bounds.inc_upper = false;//from www . java2 s. c o m bounds.ref_upper = new BytesRef(value); break; case LESS_THAN_EQUAL: // @key <= value bounds.inc_upper = true; bounds.ref_upper = new BytesRef(value); break; case GREATER_THAN: bounds.inc_lower = false; bounds.ref_lower = new BytesRef(value); break; case GREATER_THAN_EQUAL: bounds.inc_lower = true; bounds.ref_lower = new BytesRef(value); break; case EQUAL: SwanTermNode termNode = new SwanTermNode(value); termNode.setField(_field); termNode.setSchema(schema); return termNode.getQuery(); case NOT_EQUAL: SwanOrOperationNode node = new SwanOrOperationNode(new SwanRangeNode(_field, "<", value), new SwanRangeNode(_field, ">", value)); node.setSchema(schema); return node.getQuery(); default: break; } return null; }
From source file:com.quentinxxz.lucene.kv.IntPayloadTokenizer.java
License:Open Source License
@Override public final boolean incrementToken() throws IOException { clearAttributes();/* w w w .j av a 2 s . co m*/ int length = 0; int start = -1; // this variable is always initialized char[] buffer = termAtt.buffer(); while (true) { if (bufferIndex >= dataLen) { offset += dataLen; charUtils.fill(ioBuffer, input); // read supplementary char aware with CharacterUtils if (ioBuffer.getLength() == 0) { dataLen = 0; // so next offset += dataLen won't decrement offset if (length > 0) { break; } else { return false; } } dataLen = ioBuffer.getLength(); bufferIndex = 0; } // use CharacterUtils here to support < 3.1 UTF-16 code unit behavior if the char based methods are gone final int c = charUtils.codePointAt(ioBuffer.getBuffer(), bufferIndex, ioBuffer.getLength()); final int charCount = Character.charCount(c); bufferIndex += charCount; if (isTokenChar(c)) { // if it's a token char if (length == 0) { // start of token assert start == -1; start = offset + bufferIndex - charCount; } else if (length >= buffer.length - 1) { // check if a supplementary could run out of bounds buffer = termAtt.resizeBuffer(2 + length); // make sure a supplementary fits in the buffer } length += Character.toChars(normalize(c), buffer, length); // buffer it, normalized if (length >= MAX_WORD_LEN) // buffer overflow! make sure to check for >= surrogate pair could break == // test break; } else if (length > 0) // at non-Letter w/ chars break; // return 'em } String mayTerm = new String(buffer, 0, length); int index = mayTerm.indexOf(weightSplitChar); String label = mayTerm; int actualLength = length; int actualWeight = 0; // 0 if (index != -1) { label = mayTerm.substring(0, index); actualLength = index; actualWeight = Integer.parseInt(mayTerm.substring(index + 1)); } assert start != -1; if (labels.contains(label)) { return incrementToken(); } else { labels.add(label); } termAtt.setLength(actualLength); payload.setPayload(new BytesRef(Ints.toByteArray(actualWeight))); return true; }
From source file:com.querydsl.lucene4.LuceneSerializer.java
License:Apache License
private BytesRef convertNumber(Number number) { if (Integer.class.isInstance(number) || Byte.class.isInstance(number) || Short.class.isInstance(number)) { BytesRef bytes = new BytesRef(NumericUtils.BUF_SIZE_INT); NumericUtils.intToPrefixCoded(number.intValue(), 0, bytes); return bytes; } else if (Double.class.isInstance(number) || BigDecimal.class.isInstance(number)) { BytesRef bytes = new BytesRef(NumericUtils.BUF_SIZE_LONG); long l = NumericUtils.doubleToSortableLong(number.doubleValue()); NumericUtils.longToPrefixCoded(l, 0, bytes); return bytes; } else if (Long.class.isInstance(number) || BigInteger.class.isInstance(number)) { BytesRef bytes = new BytesRef(NumericUtils.BUF_SIZE_LONG); NumericUtils.longToPrefixCoded(number.longValue(), 0, bytes); return bytes; } else if (Float.class.isInstance(number)) { BytesRef bytes = new BytesRef(NumericUtils.BUF_SIZE_INT); int i = NumericUtils.floatToSortableInt(number.floatValue()); NumericUtils.intToPrefixCoded(i, 0, bytes); return bytes; } else {// w ww . java 2s. c o m throw new IllegalArgumentException("Unsupported numeric type " + number.getClass().getName()); } }
From source file:com.querydsl.lucene5.LuceneQueryTest.java
License:Apache License
private Document createDocument(final String docTitle, final String docAuthor, final String docText, final int docYear, final double docGross) { Document doc = new Document(); // Reusing field for performance if (titleField == null) { titleField = new TextField("title", docTitle, Store.YES); doc.add(titleField);//from w w w . j a v a 2s.co m titleSortedField = new SortedDocValuesField("title", new BytesRef(docTitle)); doc.add(titleSortedField); } else { titleField.setStringValue(docTitle); titleSortedField.setBytesValue(new BytesRef(docTitle)); doc.add(titleField); doc.add(titleSortedField); } if (authorField == null) { authorField = new TextField("author", docAuthor, Store.YES); doc.add(authorField); authorSortedField = new SortedDocValuesField("author", new BytesRef(docAuthor)); doc.add(authorSortedField); } else { authorField.setStringValue(docAuthor); authorSortedField.setBytesValue(new BytesRef(docAuthor)); doc.add(authorField); doc.add(authorSortedField); } if (textField == null) { textField = new TextField("text", docText, Store.YES); doc.add(textField); textSortedField = new SortedDocValuesField("text", new BytesRef(docText)); doc.add(textSortedField); } else { textField.setStringValue(docText); textSortedField.setBytesValue(new BytesRef(docText)); doc.add(textField); doc.add(textSortedField); } if (yearField == null) { yearField = new IntField("year", docYear, Store.YES); doc.add(yearField); yearSortedField = new NumericDocValuesField("year", docYear); doc.add(yearSortedField); } else { yearField.setIntValue(docYear); yearSortedField.setLongValue(docYear); doc.add(yearField); doc.add(yearSortedField); } if (grossField == null) { grossField = new DoubleField("gross", docGross, Store.YES); doc.add(grossField); grossSortedField = new DoubleDocValuesField("gross", docGross); doc.add(grossSortedField); } else { grossField.setDoubleValue(docGross); grossSortedField.setDoubleValue(docGross); doc.add(grossField); doc.add(grossSortedField); } return doc; }
From source file:com.qwazr.search.bench.test.FullText.ShortAbstractLuceneTest.java
License:Apache License
@Override final public void accept(final TtlLineReader lineReader, final LuceneRecord.Indexable record) { final BytesRef termBytesRef = new BytesRef(lineReader.subject); record.reset(new Term(URL, termBytesRef)); record.document.add(new StringField(URL, termBytesRef, Field.Store.NO)); record.document.add(new TextField(SHORT_ABSTRACT, lineReader.object, Field.Store.NO)); }
From source file:com.qwazr.search.bench.test.Merging.UpdateBinaryDocValuesTest.java
License:Apache License
@Override protected BinaryDocValuesField getField(String id) { return new BinaryDocValuesField("dv", new BytesRef("dv" + id)); }
From source file:com.qwazr.search.bench.test.SortedSetFacet.SortedSetFacetLuceneTest.java
License:Apache License
@Override final public void accept(final TtlLineReader lineReader, final LuceneRecord.Indexable record) { final BytesRef termBytesRef = new BytesRef(lineReader.subject); record.reset(new Term(URL, termBytesRef)); record.document.add(new StringField(URL, termBytesRef, Field.Store.NO)); record.document.add(new SortedSetDocValuesFacetField(PREDICATE, lineReader.predicate)); }
From source file:com.qwazr.search.bench.test.TaxonomyFacet.TaxonomyFacetLuceneTest.java
License:Apache License
@Override final public void accept(final TtlLineReader lineReader, final LuceneRecord.Indexable record) { final BytesRef termBytesRef = new BytesRef(lineReader.subject); record.reset(new Term(URL, termBytesRef)); record.document.add(new StringField(URL, termBytesRef, Field.Store.NO)); record.document.add(new FacetField(PREDICATE, lineReader.predicate)); }
From source file:com.qwazr.search.field.BinaryDocValuesType.java
License:Apache License
@Override final public void fillValue(final Object value, final FieldConsumer consumer) { consumer.accept(new BinaryDocValuesField(fieldName, new BytesRef(value.toString()))); }