List of usage examples for org.apache.lucene.util BytesRef BytesRef
public BytesRef(CharSequence text)
From source file:SearcherTest.java
/** * TermRangeQuery /* w w w. j a v a2s . c o m*/ * TermRangeQuery??? * ?ASC??ASC? * ?ASC?? * ASC??TermRangeQuery * ?NumericRangeQuery * ????? * * @throws Exception */ @Test public void testTermRangeQuery() throws Exception { String searchField = "contents"; String q = "1000001----1000002"; String lowerTermString = "1000001"; String upperTermString = "1000003"; /** * field * lowerterm - *upperterm -? *includelower -lowerterm *includeupper -upperterm *https://yq.aliyun.com/articles/45353 */ Query query = new TermRangeQuery(searchField, new BytesRef(lowerTermString), new BytesRef(upperTermString), true, true); TopDocs hits = is.search(query, 10); System.out.println("? '" + q + "'" + hits.totalHits + ""); for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = is.doc(scoreDoc.doc); System.out.println(doc.get("fullPath")); } }
From source file:DVBench.java
License:Apache License
static void doBench(int bpv) throws Exception { File file = new File("/data/indices/dvbench"); file.mkdirs();//from w w w .j a v a 2 s. co m Directory dir = FSDirectory.open(file); IndexWriterConfig config = new IndexWriterConfig(null); config.setOpenMode(OpenMode.CREATE); config.setMergeScheduler(new SerialMergeScheduler()); config.setMergePolicy(new LogDocMergePolicy()); config.setMaxBufferedDocs(25000); IndexWriter writer = new IndexWriter(dir, config); MyRandom r = new MyRandom(); int numdocs = 400000; Document doc = new Document(); Field dv = new NumericDocValuesField("dv", 0); Field inv = new LongField("inv", 0, Field.Store.NO); Field boxed = new BinaryDocValuesField("boxed", new BytesRef(8)); Field boxed2 = new BinaryDocValuesField("boxed2", new BytesRef(8)); doc.add(dv); doc.add(inv); doc.add(boxed); doc.add(boxed2); for (int i = 0; i < numdocs; i++) { // defeat blockpackedwriter final long value; if (i % 8192 == 0) { value = bpv == 64 ? Long.MIN_VALUE : 0; } else if (i % 8192 == 1) { value = bpv == 64 ? Long.MAX_VALUE : (1L << bpv) - 1; } else { value = r.nextLong(bpv); } dv.setLongValue(value); inv.setLongValue(value); box(value, boxed.binaryValue()); box(value, boxed2.binaryValue()); boxed2.binaryValue().length = (bpv + 7) / 8; // fixed length writer.addDocument(doc); } writer.close(); // run dv search tests String description = "dv (bpv=" + bpv + ")"; DirectoryReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); searcher.setQueryCache(null); // don't bench the cache int hash = 0; // warmup hash += search(description, searcher, "dv", 300, true); hash += search(description, searcher, "dv", 300, false); // Uninverting Map<String, UninvertingReader.Type> mapping = Collections.singletonMap("inv", UninvertingReader.Type.LONG); DirectoryReader uninv = UninvertingReader.wrap(reader, mapping); IndexSearcher searcher2 = new IndexSearcher(uninv); searcher2.setQueryCache(null); // don't bench the cache description = "fc (bpv=" + bpv + ")"; // warmup hash += search(description, searcher2, "inv", 300, true); hash += search(description, searcher2, "inv", 300, false); // Boxed inside binary DirectoryReader boxedReader = new BinaryAsVLongReader(reader); IndexSearcher searcher3 = new IndexSearcher(boxedReader); searcher3.setQueryCache(null); // don't bench the cache description = "boxed (bpv=" + bpv + ")"; // warmup hash += search(description, searcher3, "boxed", 300, true); hash += search(description, searcher3, "boxed", 300, false); description = "boxed fixed-length (bpv=" + bpv + ")"; // warmup hash += search(description, searcher3, "boxed2", 300, true); hash += search(description, searcher3, "boxed2", 300, false); if (hash == 3) { // wont happen System.out.println("hash=" + hash); } reader.close(); dir.close(); }
From source file:BuildFST.java
License:Apache License
@SuppressWarnings({ "rawtypes", "unchecked" }) public static void main(String[] args) throws IOException { boolean numeric = true; boolean negative = false; for (int i = 0; i < args.length; i++) { int j = args[i].lastIndexOf('/'); if (j != -1) { try { negative |= Long.parseLong(args[i].substring(j + 1)) < 0; } catch (NumberFormatException nfe) { numeric = false;/*from w w w .j a va 2s .co m*/ break; } } } Outputs outputs; if (numeric) { if (negative) { throw new RuntimeException("can only handle numeric outputs >= 0"); } outputs = PositiveIntOutputs.getSingleton(); } else { outputs = ByteSequenceOutputs.getSingleton(); } Pair<?>[] inputs = new Pair[args.length]; for (int i = 0; i < args.length; i++) { int j = args[i].lastIndexOf('/'); String input; Object output; if (j == -1) { output = outputs.getNoOutput(); input = args[i]; } else { input = args[i].substring(0, j); String outputString = args[i].substring(j + 1); if (numeric) { output = Long.parseLong(outputString); } else { output = new BytesRef(outputString); } } inputs[i] = new Pair(new BytesRef(input), output); } Arrays.sort(inputs); FST<?> fst; if (numeric) { Builder<Long> b = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs); for (Pair pair : inputs) { IntsRefBuilder intsBuilder = new IntsRefBuilder(); Util.toIntsRef(pair.input, intsBuilder); b.add(intsBuilder.get(), (Long) pair.output); } fst = b.finish(); } else { Builder<BytesRef> b = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, outputs); for (Pair pair : inputs) { IntsRefBuilder intsBuilder = new IntsRefBuilder(); Util.toIntsRef(pair.input, intsBuilder); b.add(intsBuilder.get(), (BytesRef) pair.output); } fst = b.finish(); } Util.toDot(fst, new PrintWriter(System.out), true, true); }
From source file:IndexTaxis.java
License:Apache License
static void addOneField(Document doc, String fieldName, String rawValue) { // nocommit//from w w w. ja va2 s .c o m /* if (fieldName.equals("pick_up_lat")) { double value = Double.parseDouble(rawValue); doc.add(new DoublePoint(fieldName, value)); doc.add(new SortedNumericDocValuesField(fieldName, NumericUtils.doubleToSortableLong(value))); } */ switch (fieldName) { case "vendor_id": case "cab_color": case "payment_type": case "trip_type": case "rate_code": case "store_and_fwd_flag": doc.add(new StringField(fieldName, rawValue, Field.Store.NO)); doc.add(new SortedSetDocValuesField(fieldName, new BytesRef(rawValue))); break; case "vendor_name": doc.add(new TextField(fieldName, rawValue, Field.Store.NO)); break; case "pick_up_date_time": case "drop_off_date_time": { long value = Long.parseLong(rawValue); doc.add(new LongPoint(fieldName, value)); doc.add(new SortedNumericDocValuesField(fieldName, value)); } break; case "passenger_count": { int value = Integer.parseInt(rawValue); doc.add(new IntPoint(fieldName, value)); doc.add(new SortedNumericDocValuesField(fieldName, value)); } break; case "trip_distance": case "pick_up_lat": case "pick_up_lon": case "drop_off_lat": case "drop_off_lon": case "fare_amount": case "surcharge": case "mta_tax": case "extra": case "ehail_fee": case "improvement_surcharge": case "tip_amount": case "tolls_amount": case "total_amount": { double value; try { value = Double.parseDouble(rawValue); } catch (NumberFormatException nfe) { System.out.println( "WARNING: failed to parse \"" + rawValue + "\" as double for field \"" + fieldName + "\""); return; } doc.add(new DoublePoint(fieldName, value)); doc.add(new SortedNumericDocValuesField(fieldName, NumericUtils.doubleToSortableLong(value))); } break; default: throw new AssertionError("failed to handle field \"" + fieldName + "\""); } }
From source file:KNearestNeighborClassifier.java
License:Apache License
/** * build a list of classification results from search results * @param topDocs the search results as a {@link TopDocs} object * @return a {@link List} of {@link ClassificationResult}, one for each existing class * @throws IOException if it's not possible to get the stored value of class field *///from w ww . j a v a 2 s. c o m protected List<ClassificationResult<BytesRef>> buildListFromTopDocs(TopDocs topDocs) throws IOException { Map<BytesRef, Integer> classCounts = new HashMap<>(); Map<BytesRef, Double> classBoosts = new HashMap<>(); // this is a boost based on class ranking positions in topDocs float maxScore = topDocs.getMaxScore(); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { IndexableField storableField = indexSearcher.doc(scoreDoc.doc).getField(classFieldName); if (storableField != null) { BytesRef cl = new BytesRef(storableField.stringValue()); //update count Integer count = classCounts.get(cl); if (count != null) { classCounts.put(cl, count + 1); } else { classCounts.put(cl, 1); } //update boost, the boost is based on the best score Double totalBoost = classBoosts.get(cl); double singleBoost = scoreDoc.score / maxScore; if (totalBoost != null) { classBoosts.put(cl, totalBoost + singleBoost); } else { classBoosts.put(cl, singleBoost); } } } List<ClassificationResult<BytesRef>> returnList = new ArrayList<>(); List<ClassificationResult<BytesRef>> temporaryList = new ArrayList<>(); int sumdoc = 0; for (Map.Entry<BytesRef, Integer> entry : classCounts.entrySet()) { Integer count = entry.getValue(); Double normBoost = classBoosts.get(entry.getKey()) / count; //the boost is normalized to be 0<b<1 temporaryList.add(new ClassificationResult<>(entry.getKey().clone(), (count * normBoost) / (double) k)); sumdoc += count; } //correction if (sumdoc < k) { for (ClassificationResult<BytesRef> cr : temporaryList) { returnList.add( new ClassificationResult<>(cr.getAssignedClass(), cr.getScore() * k / (double) sumdoc)); } } else { returnList = temporaryList; } return returnList; }
From source file:at.ac.univie.mminf.luceneSKOS.analysis.AbstractMeSHFilter.java
License:Apache License
/** * Replaces the current term (attributes) with term (attributes) from the * stack/* w w w .ja va 2s .c om*/ * * @throws IOException */ protected void processTermOnStack() throws IOException { ExpandedTerm expandedTerm = termStack.pop(); String term = expandedTerm.getTerm(); SKOSType termType = expandedTerm.getTermType(); String sTerm = ""; try { sTerm = analyze(analyzer, term, new CharsRef()).toString(); } catch (IllegalArgumentException e) { // skip this term return; } /* * copies the values of all attribute implementations from this state into * the implementations of the target stream */ restoreState(current); /* * Adds the expanded term to the term buffer */ termAtt.setEmpty().append(sTerm); /* * set position increment to zero to put multiple terms into the same * position */ posIncrAtt.setPositionIncrement(0); /* * sets the type of the expanded term (pref, alt, broader, narrower, etc.) */ skosAtt.setSkosType(termType); /* * converts the SKOS Attribute to a payload, which is propagated to the * index */ byte[] bytes = PayloadHelper.encodeInt(skosAtt.getSkosType().ordinal()); payloadAtt.setPayload(new BytesRef(bytes)); }
From source file:at.ac.univie.mminf.luceneSKOS.analysis.SNOMEDFilter.java
License:Apache License
/** * Replaces the current term (attributes) with term (attributes) from the * stack//from ww w . jav a2s . c o m * * @throws IOException */ protected void processTermOnStack() throws IOException { ExpandedTerm expandedTerm = termStack.pop(); String term = expandedTerm.getTerm(); SKOSType termType = expandedTerm.getTermType(); String sTerm = ""; try { sTerm = analyze(analyzer, term, new CharsRef()).toString(); } catch (IllegalArgumentException e) { // skip this term return; } /* * copies the values of all attribute implementations from this state * into the implementations of the target stream */ restoreState(current); /* * Adds the expanded term to the term buffer */ termAtt.setEmpty().append(sTerm); /* * set position increment to zero to put multiple terms into the same * position */ posIncrAtt.setPositionIncrement(0); /* * sets the type of the expanded term (pref, alt, broader, narrower, * etc.) */ skosAtt.setSkosType(termType); /* * converts the SKOS Attribute to a payload, which is propagated to the * index */ byte[] bytes = PayloadHelper.encodeInt(skosAtt.getSkosType().ordinal()); payloadAtt.setPayload(new BytesRef(bytes)); }
From source file:cc.pp.analyzer.ik.query.IKQueryExpressionParser.java
License:Apache License
/** * TermRangeQuery/*from ww w . j ava2 s. c om*/ * @param elements * @return */ private TermRangeQuery toTermRangeQuery(Element fieldNameEle, LinkedList<Element> elements) { boolean includeFirst = false; boolean includeLast = false; String firstValue = null; String lastValue = null; //?[{ Element first = elements.getFirst(); if ('[' == first.type) { includeFirst = true; } else if ('{' == first.type) { includeFirst = false; } else { throw new IllegalStateException("?"); } //??]} Element last = elements.getLast(); if (']' == last.type) { includeLast = true; } else if ('}' == last.type) { includeLast = false; } else { throw new IllegalStateException("?, RangeQuery??"); } if (elements.size() < 4 || elements.size() > 5) { throw new IllegalStateException("?, RangeQuery "); } // Element e2 = elements.get(1); if ('\'' == e2.type) { firstValue = e2.toString(); // Element e3 = elements.get(2); if (',' != e3.type) { throw new IllegalStateException("?, RangeQuery?"); } // Element e4 = elements.get(3); if ('\'' == e4.type) { lastValue = e4.toString(); } else if (e4 != last) { throw new IllegalStateException("?RangeQuery?"); } } else if (',' == e2.type) { firstValue = null; // Element e3 = elements.get(2); if ('\'' == e3.type) { lastValue = e3.toString(); } else { throw new IllegalStateException("?RangeQuery?"); } } else { throw new IllegalStateException("?, RangeQuery?"); } /** * lucene4.6? */ TermRangeQuery query = null; try { query = new TermRangeQuery(fieldNameEle.toString(), new BytesRef(firstValue.getBytes("UTF8")), new BytesRef(lastValue.getBytes("UTF8")), includeFirst, includeLast); } catch (UnsupportedEncodingException e) { throw new IllegalStateException("?, ??"); } return query; }
From source file:cn.codepub.redis.directory.Main.java
License:Apache License
private static Document addDocument(int i) { Document document = new Document(); document.add(new StringField("key1", "key" + i, Field.Store.YES)); document.add(new IntField("key2", i * 100000, Field.Store.YES)); document.add(new FloatField("key3", (float) i * 100000, Field.Store.YES)); document.add(new LongField("key4", (long) i * 100000, Field.Store.YES)); document.add(new DoubleField("key5", (double) i * 100000, Field.Store.YES)); document.add(new TextField("key6", RandomStringUtils.randomAlphabetic(10), Field.Store.YES)); document.add(new StringField("key7", RandomStringUtils.randomAlphabetic(5), Field.Store.YES)); document.add(new BinaryDocValuesField("key8", new BytesRef(RandomStringUtils.randomAlphabetic(5)))); document.add(new DoubleDocValuesField("key9", RandomUtils.nextDouble(0, 1000))); document.add(new FloatDocValuesField("key10", RandomUtils.nextFloat(0, 1000))); document.add(new LongField("key11", (long) i * 50000, Field.Store.YES)); document.add(new IntField("key12", i * 50000, Field.Store.YES)); document.add(new FloatField("key13", (float) i * 50000, Field.Store.YES)); document.add(new DoubleField("key14", (double) i * 50000, Field.Store.YES)); document.add(new StringField("key15", RandomStringUtils.randomAlphabetic(6), Field.Store.YES)); return document; }
From source file:com.b2international.index.lucene.BooleanIndexField.java
License:Apache License
@Override protected BytesRef toBytesRef(Boolean value) { return new BytesRef(convertToString(value)); }