Example usage for org.apache.lucene.util BytesRef BytesRef

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef BytesRef.

Prototype

public BytesRef(CharSequence text)

Source Link

Document

Initialize the byte[] from the UTF8 bytes for the provided String.

Usage

From source file:SearcherTest.java

/**
 * TermRangeQuery /* w  w w. j  a  v  a2s . c o m*/
 * TermRangeQuery???
 * ?ASC??ASC?
 * ?ASC??
 * ASC??TermRangeQuery
 * ?NumericRangeQuery
 * ?????
 *
 * @throws Exception
 */
@Test
public void testTermRangeQuery() throws Exception {
    String searchField = "contents";
    String q = "1000001----1000002";
    String lowerTermString = "1000001";
    String upperTermString = "1000003";
    /**
     * field  
     * lowerterm -
     *upperterm -?
     *includelower -lowerterm
     *includeupper -upperterm
     *https://yq.aliyun.com/articles/45353
     */
    Query query = new TermRangeQuery(searchField, new BytesRef(lowerTermString), new BytesRef(upperTermString),
            true, true);
    TopDocs hits = is.search(query, 10);
    System.out.println("? '" + q + "'" + hits.totalHits + "");
    for (ScoreDoc scoreDoc : hits.scoreDocs) {
        Document doc = is.doc(scoreDoc.doc);
        System.out.println(doc.get("fullPath"));
    }
}

From source file:DVBench.java

License:Apache License

static void doBench(int bpv) throws Exception {
    File file = new File("/data/indices/dvbench");
    file.mkdirs();//from   w w w .j  a v  a 2 s.  co m
    Directory dir = FSDirectory.open(file);
    IndexWriterConfig config = new IndexWriterConfig(null);
    config.setOpenMode(OpenMode.CREATE);
    config.setMergeScheduler(new SerialMergeScheduler());
    config.setMergePolicy(new LogDocMergePolicy());
    config.setMaxBufferedDocs(25000);
    IndexWriter writer = new IndexWriter(dir, config);

    MyRandom r = new MyRandom();
    int numdocs = 400000;
    Document doc = new Document();
    Field dv = new NumericDocValuesField("dv", 0);
    Field inv = new LongField("inv", 0, Field.Store.NO);
    Field boxed = new BinaryDocValuesField("boxed", new BytesRef(8));
    Field boxed2 = new BinaryDocValuesField("boxed2", new BytesRef(8));

    doc.add(dv);
    doc.add(inv);
    doc.add(boxed);
    doc.add(boxed2);
    for (int i = 0; i < numdocs; i++) {
        // defeat blockpackedwriter
        final long value;
        if (i % 8192 == 0) {
            value = bpv == 64 ? Long.MIN_VALUE : 0;
        } else if (i % 8192 == 1) {
            value = bpv == 64 ? Long.MAX_VALUE : (1L << bpv) - 1;
        } else {
            value = r.nextLong(bpv);
        }
        dv.setLongValue(value);
        inv.setLongValue(value);
        box(value, boxed.binaryValue());
        box(value, boxed2.binaryValue());
        boxed2.binaryValue().length = (bpv + 7) / 8; // fixed length
        writer.addDocument(doc);
    }

    writer.close();

    // run dv search tests
    String description = "dv (bpv=" + bpv + ")";
    DirectoryReader reader = DirectoryReader.open(dir);
    IndexSearcher searcher = new IndexSearcher(reader);
    searcher.setQueryCache(null); // don't bench the cache

    int hash = 0;
    // warmup
    hash += search(description, searcher, "dv", 300, true);
    hash += search(description, searcher, "dv", 300, false);

    // Uninverting
    Map<String, UninvertingReader.Type> mapping = Collections.singletonMap("inv", UninvertingReader.Type.LONG);
    DirectoryReader uninv = UninvertingReader.wrap(reader, mapping);
    IndexSearcher searcher2 = new IndexSearcher(uninv);
    searcher2.setQueryCache(null); // don't bench the cache

    description = "fc (bpv=" + bpv + ")";
    // warmup
    hash += search(description, searcher2, "inv", 300, true);
    hash += search(description, searcher2, "inv", 300, false);

    // Boxed inside binary
    DirectoryReader boxedReader = new BinaryAsVLongReader(reader);
    IndexSearcher searcher3 = new IndexSearcher(boxedReader);
    searcher3.setQueryCache(null); // don't bench the cache
    description = "boxed (bpv=" + bpv + ")";
    // warmup
    hash += search(description, searcher3, "boxed", 300, true);
    hash += search(description, searcher3, "boxed", 300, false);

    description = "boxed fixed-length (bpv=" + bpv + ")";
    // warmup
    hash += search(description, searcher3, "boxed2", 300, true);
    hash += search(description, searcher3, "boxed2", 300, false);

    if (hash == 3) {
        // wont happen
        System.out.println("hash=" + hash);
    }
    reader.close();
    dir.close();
}

From source file:BuildFST.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked" })
public static void main(String[] args) throws IOException {

    boolean numeric = true;
    boolean negative = false;
    for (int i = 0; i < args.length; i++) {
        int j = args[i].lastIndexOf('/');
        if (j != -1) {
            try {
                negative |= Long.parseLong(args[i].substring(j + 1)) < 0;
            } catch (NumberFormatException nfe) {
                numeric = false;/*from   w  w w  .j a va 2s .co  m*/
                break;
            }
        }
    }

    Outputs outputs;
    if (numeric) {
        if (negative) {
            throw new RuntimeException("can only handle numeric outputs >= 0");
        }
        outputs = PositiveIntOutputs.getSingleton();
    } else {
        outputs = ByteSequenceOutputs.getSingleton();
    }

    Pair<?>[] inputs = new Pair[args.length];
    for (int i = 0; i < args.length; i++) {
        int j = args[i].lastIndexOf('/');
        String input;
        Object output;
        if (j == -1) {
            output = outputs.getNoOutput();
            input = args[i];
        } else {
            input = args[i].substring(0, j);
            String outputString = args[i].substring(j + 1);
            if (numeric) {
                output = Long.parseLong(outputString);
            } else {
                output = new BytesRef(outputString);
            }
        }
        inputs[i] = new Pair(new BytesRef(input), output);
    }
    Arrays.sort(inputs);

    FST<?> fst;
    if (numeric) {
        Builder<Long> b = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
        for (Pair pair : inputs) {
            IntsRefBuilder intsBuilder = new IntsRefBuilder();
            Util.toIntsRef(pair.input, intsBuilder);
            b.add(intsBuilder.get(), (Long) pair.output);
        }
        fst = b.finish();
    } else {
        Builder<BytesRef> b = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, outputs);
        for (Pair pair : inputs) {
            IntsRefBuilder intsBuilder = new IntsRefBuilder();
            Util.toIntsRef(pair.input, intsBuilder);
            b.add(intsBuilder.get(), (BytesRef) pair.output);
        }
        fst = b.finish();
    }
    Util.toDot(fst, new PrintWriter(System.out), true, true);
}

From source file:IndexTaxis.java

License:Apache License

static void addOneField(Document doc, String fieldName, String rawValue) {
    // nocommit//from   w w  w. ja  va2  s  .c o  m
    /*
    if (fieldName.equals("pick_up_lat")) {
      double value = Double.parseDouble(rawValue);
      doc.add(new DoublePoint(fieldName, value));
      doc.add(new SortedNumericDocValuesField(fieldName, NumericUtils.doubleToSortableLong(value)));
    }
    */
    switch (fieldName) {
    case "vendor_id":
    case "cab_color":
    case "payment_type":
    case "trip_type":
    case "rate_code":
    case "store_and_fwd_flag":
        doc.add(new StringField(fieldName, rawValue, Field.Store.NO));
        doc.add(new SortedSetDocValuesField(fieldName, new BytesRef(rawValue)));
        break;
    case "vendor_name":
        doc.add(new TextField(fieldName, rawValue, Field.Store.NO));
        break;
    case "pick_up_date_time":
    case "drop_off_date_time": {
        long value = Long.parseLong(rawValue);
        doc.add(new LongPoint(fieldName, value));
        doc.add(new SortedNumericDocValuesField(fieldName, value));
    }
        break;
    case "passenger_count": {
        int value = Integer.parseInt(rawValue);
        doc.add(new IntPoint(fieldName, value));
        doc.add(new SortedNumericDocValuesField(fieldName, value));
    }
        break;
    case "trip_distance":
    case "pick_up_lat":
    case "pick_up_lon":
    case "drop_off_lat":
    case "drop_off_lon":
    case "fare_amount":
    case "surcharge":
    case "mta_tax":
    case "extra":
    case "ehail_fee":
    case "improvement_surcharge":
    case "tip_amount":
    case "tolls_amount":
    case "total_amount": {
        double value;
        try {
            value = Double.parseDouble(rawValue);
        } catch (NumberFormatException nfe) {
            System.out.println(
                    "WARNING: failed to parse \"" + rawValue + "\" as double for field \"" + fieldName + "\"");
            return;
        }
        doc.add(new DoublePoint(fieldName, value));
        doc.add(new SortedNumericDocValuesField(fieldName, NumericUtils.doubleToSortableLong(value)));
    }
        break;
    default:
        throw new AssertionError("failed to handle field \"" + fieldName + "\"");
    }
}

From source file:KNearestNeighborClassifier.java

License:Apache License

/**
 * build a list of classification results from search results
 * @param topDocs the search results as a {@link TopDocs} object
 * @return a {@link List} of {@link ClassificationResult}, one for each existing class
 * @throws IOException if it's not possible to get the stored value of class field
 *///from   w ww . j a  v  a  2  s. c o m
protected List<ClassificationResult<BytesRef>> buildListFromTopDocs(TopDocs topDocs) throws IOException {
    Map<BytesRef, Integer> classCounts = new HashMap<>();
    Map<BytesRef, Double> classBoosts = new HashMap<>(); // this is a boost based on class ranking positions in topDocs
    float maxScore = topDocs.getMaxScore();
    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
        IndexableField storableField = indexSearcher.doc(scoreDoc.doc).getField(classFieldName);
        if (storableField != null) {
            BytesRef cl = new BytesRef(storableField.stringValue());
            //update count
            Integer count = classCounts.get(cl);
            if (count != null) {
                classCounts.put(cl, count + 1);
            } else {
                classCounts.put(cl, 1);
            }
            //update boost, the boost is based on the best score
            Double totalBoost = classBoosts.get(cl);
            double singleBoost = scoreDoc.score / maxScore;
            if (totalBoost != null) {
                classBoosts.put(cl, totalBoost + singleBoost);
            } else {
                classBoosts.put(cl, singleBoost);
            }
        }
    }
    List<ClassificationResult<BytesRef>> returnList = new ArrayList<>();
    List<ClassificationResult<BytesRef>> temporaryList = new ArrayList<>();
    int sumdoc = 0;
    for (Map.Entry<BytesRef, Integer> entry : classCounts.entrySet()) {
        Integer count = entry.getValue();
        Double normBoost = classBoosts.get(entry.getKey()) / count; //the boost is normalized to be 0<b<1
        temporaryList.add(new ClassificationResult<>(entry.getKey().clone(), (count * normBoost) / (double) k));
        sumdoc += count;
    }

    //correction
    if (sumdoc < k) {
        for (ClassificationResult<BytesRef> cr : temporaryList) {
            returnList.add(
                    new ClassificationResult<>(cr.getAssignedClass(), cr.getScore() * k / (double) sumdoc));
        }
    } else {
        returnList = temporaryList;
    }
    return returnList;
}

From source file:at.ac.univie.mminf.luceneSKOS.analysis.AbstractMeSHFilter.java

License:Apache License

/**
 * Replaces the current term (attributes) with term (attributes) from the
 * stack/* w  w w  .ja va  2s  .c  om*/
 * 
 * @throws IOException
 */
protected void processTermOnStack() throws IOException {
    ExpandedTerm expandedTerm = termStack.pop();

    String term = expandedTerm.getTerm();

    SKOSType termType = expandedTerm.getTermType();

    String sTerm = "";

    try {
        sTerm = analyze(analyzer, term, new CharsRef()).toString();
    } catch (IllegalArgumentException e) {
        // skip this term
        return;
    }

    /*
     * copies the values of all attribute implementations from this state into
     * the implementations of the target stream
     */
    restoreState(current);

    /*
     * Adds the expanded term to the term buffer
     */
    termAtt.setEmpty().append(sTerm);

    /*
     * set position increment to zero to put multiple terms into the same
     * position
     */
    posIncrAtt.setPositionIncrement(0);

    /*
     * sets the type of the expanded term (pref, alt, broader, narrower, etc.)
     */
    skosAtt.setSkosType(termType);

    /*
     * converts the SKOS Attribute to a payload, which is propagated to the
     * index
     */
    byte[] bytes = PayloadHelper.encodeInt(skosAtt.getSkosType().ordinal());
    payloadAtt.setPayload(new BytesRef(bytes));
}

From source file:at.ac.univie.mminf.luceneSKOS.analysis.SNOMEDFilter.java

License:Apache License

/**
 * Replaces the current term (attributes) with term (attributes) from the
 * stack//from   ww w .  jav  a2s .  c  o  m
 * 
 * @throws IOException
 */
protected void processTermOnStack() throws IOException {
    ExpandedTerm expandedTerm = termStack.pop();

    String term = expandedTerm.getTerm();

    SKOSType termType = expandedTerm.getTermType();

    String sTerm = "";

    try {
        sTerm = analyze(analyzer, term, new CharsRef()).toString();
    } catch (IllegalArgumentException e) {
        // skip this term
        return;
    }

    /*
     * copies the values of all attribute implementations from this state
     * into the implementations of the target stream
     */
    restoreState(current);

    /*
     * Adds the expanded term to the term buffer
     */
    termAtt.setEmpty().append(sTerm);

    /*
     * set position increment to zero to put multiple terms into the same
     * position
     */
    posIncrAtt.setPositionIncrement(0);

    /*
     * sets the type of the expanded term (pref, alt, broader, narrower,
     * etc.)
     */
    skosAtt.setSkosType(termType);

    /*
     * converts the SKOS Attribute to a payload, which is propagated to the
     * index
     */
    byte[] bytes = PayloadHelper.encodeInt(skosAtt.getSkosType().ordinal());
    payloadAtt.setPayload(new BytesRef(bytes));
}

From source file:cc.pp.analyzer.ik.query.IKQueryExpressionParser.java

License:Apache License

/**
 * TermRangeQuery/*from  ww w .  j  ava2 s.  c  om*/
 * @param elements
 * @return
 */
private TermRangeQuery toTermRangeQuery(Element fieldNameEle, LinkedList<Element> elements) {

    boolean includeFirst = false;
    boolean includeLast = false;
    String firstValue = null;
    String lastValue = null;
    //?[{
    Element first = elements.getFirst();
    if ('[' == first.type) {
        includeFirst = true;
    } else if ('{' == first.type) {
        includeFirst = false;
    } else {
        throw new IllegalStateException("?");
    }
    //??]}
    Element last = elements.getLast();
    if (']' == last.type) {
        includeLast = true;
    } else if ('}' == last.type) {
        includeLast = false;
    } else {
        throw new IllegalStateException("?, RangeQuery??");
    }
    if (elements.size() < 4 || elements.size() > 5) {
        throw new IllegalStateException("?, RangeQuery ");
    }
    //
    Element e2 = elements.get(1);
    if ('\'' == e2.type) {
        firstValue = e2.toString();
        //
        Element e3 = elements.get(2);
        if (',' != e3.type) {
            throw new IllegalStateException("?, RangeQuery?");
        }
        //
        Element e4 = elements.get(3);
        if ('\'' == e4.type) {
            lastValue = e4.toString();
        } else if (e4 != last) {
            throw new IllegalStateException("?RangeQuery?");
        }
    } else if (',' == e2.type) {
        firstValue = null;
        //
        Element e3 = elements.get(2);
        if ('\'' == e3.type) {
            lastValue = e3.toString();
        } else {
            throw new IllegalStateException("?RangeQuery?");
        }

    } else {
        throw new IllegalStateException("?, RangeQuery?");
    }

    /**
     * lucene4.6?
     */
    TermRangeQuery query = null;
    try {
        query = new TermRangeQuery(fieldNameEle.toString(), new BytesRef(firstValue.getBytes("UTF8")),
                new BytesRef(lastValue.getBytes("UTF8")), includeFirst, includeLast);
    } catch (UnsupportedEncodingException e) {
        throw new IllegalStateException("?, ??");
    }
    return query;
}

From source file:cn.codepub.redis.directory.Main.java

License:Apache License

private static Document addDocument(int i) {
    Document document = new Document();
    document.add(new StringField("key1", "key" + i, Field.Store.YES));
    document.add(new IntField("key2", i * 100000, Field.Store.YES));
    document.add(new FloatField("key3", (float) i * 100000, Field.Store.YES));
    document.add(new LongField("key4", (long) i * 100000, Field.Store.YES));
    document.add(new DoubleField("key5", (double) i * 100000, Field.Store.YES));
    document.add(new TextField("key6", RandomStringUtils.randomAlphabetic(10), Field.Store.YES));
    document.add(new StringField("key7", RandomStringUtils.randomAlphabetic(5), Field.Store.YES));
    document.add(new BinaryDocValuesField("key8", new BytesRef(RandomStringUtils.randomAlphabetic(5))));
    document.add(new DoubleDocValuesField("key9", RandomUtils.nextDouble(0, 1000)));
    document.add(new FloatDocValuesField("key10", RandomUtils.nextFloat(0, 1000)));
    document.add(new LongField("key11", (long) i * 50000, Field.Store.YES));
    document.add(new IntField("key12", i * 50000, Field.Store.YES));
    document.add(new FloatField("key13", (float) i * 50000, Field.Store.YES));
    document.add(new DoubleField("key14", (double) i * 50000, Field.Store.YES));
    document.add(new StringField("key15", RandomStringUtils.randomAlphabetic(6), Field.Store.YES));
    return document;
}

From source file:com.b2international.index.lucene.BooleanIndexField.java

License:Apache License

@Override
protected BytesRef toBytesRef(Boolean value) {
    return new BytesRef(convertToString(value));
}