Example usage for org.apache.lucene.util BytesRef BytesRef

List of usage examples for org.apache.lucene.util BytesRef BytesRef

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef BytesRef.

Prototype

public BytesRef(CharSequence text) 

Source Link

Document

Initialize the byte[] from the UTF8 bytes for the provided String.

Usage

From source file:SearcherTest.java

/**
 * TermRangeQuery /* w  w w. j  a  v  a2s . c o m*/
 * TermRangeQuery???
 * ?ASC??ASC?
 * ?ASC??
 * ASC??TermRangeQuery
 * ?NumericRangeQuery
 * ?????
 *
 * @throws Exception
 */
@Test
public void testTermRangeQuery() throws Exception {
    String searchField = "contents";
    String q = "1000001----1000002";
    String lowerTermString = "1000001";
    String upperTermString = "1000003";
    /**
     * field  
     * lowerterm -
     *upperterm -?
     *includelower -lowerterm
     *includeupper -upperterm
     *https://yq.aliyun.com/articles/45353
     */
    Query query = new TermRangeQuery(searchField, new BytesRef(lowerTermString), new BytesRef(upperTermString),
            true, true);
    TopDocs hits = is.search(query, 10);
    System.out.println("? '" + q + "'" + hits.totalHits + "");
    for (ScoreDoc scoreDoc : hits.scoreDocs) {
        Document doc = is.doc(scoreDoc.doc);
        System.out.println(doc.get("fullPath"));
    }
}

From source file:DVBench.java

License:Apache License

static void doBench(int bpv) throws Exception {
    File file = new File("/data/indices/dvbench");
    file.mkdirs();//from   w w w .j  a v  a 2 s.  co m
    Directory dir = FSDirectory.open(file);
    IndexWriterConfig config = new IndexWriterConfig(null);
    config.setOpenMode(OpenMode.CREATE);
    config.setMergeScheduler(new SerialMergeScheduler());
    config.setMergePolicy(new LogDocMergePolicy());
    config.setMaxBufferedDocs(25000);
    IndexWriter writer = new IndexWriter(dir, config);

    MyRandom r = new MyRandom();
    int numdocs = 400000;
    Document doc = new Document();
    Field dv = new NumericDocValuesField("dv", 0);
    Field inv = new LongField("inv", 0, Field.Store.NO);
    Field boxed = new BinaryDocValuesField("boxed", new BytesRef(8));
    Field boxed2 = new BinaryDocValuesField("boxed2", new BytesRef(8));

    doc.add(dv);
    doc.add(inv);
    doc.add(boxed);
    doc.add(boxed2);
    for (int i = 0; i < numdocs; i++) {
        // defeat blockpackedwriter
        final long value;
        if (i % 8192 == 0) {
            value = bpv == 64 ? Long.MIN_VALUE : 0;
        } else if (i % 8192 == 1) {
            value = bpv == 64 ? Long.MAX_VALUE : (1L << bpv) - 1;
        } else {
            value = r.nextLong(bpv);
        }
        dv.setLongValue(value);
        inv.setLongValue(value);
        box(value, boxed.binaryValue());
        box(value, boxed2.binaryValue());
        boxed2.binaryValue().length = (bpv + 7) / 8; // fixed length
        writer.addDocument(doc);
    }

    writer.close();

    // run dv search tests
    String description = "dv (bpv=" + bpv + ")";
    DirectoryReader reader = DirectoryReader.open(dir);
    IndexSearcher searcher = new IndexSearcher(reader);
    searcher.setQueryCache(null); // don't bench the cache

    int hash = 0;
    // warmup
    hash += search(description, searcher, "dv", 300, true);
    hash += search(description, searcher, "dv", 300, false);

    // Uninverting
    Map<String, UninvertingReader.Type> mapping = Collections.singletonMap("inv", UninvertingReader.Type.LONG);
    DirectoryReader uninv = UninvertingReader.wrap(reader, mapping);
    IndexSearcher searcher2 = new IndexSearcher(uninv);
    searcher2.setQueryCache(null); // don't bench the cache

    description = "fc (bpv=" + bpv + ")";
    // warmup
    hash += search(description, searcher2, "inv", 300, true);
    hash += search(description, searcher2, "inv", 300, false);

    // Boxed inside binary
    DirectoryReader boxedReader = new BinaryAsVLongReader(reader);
    IndexSearcher searcher3 = new IndexSearcher(boxedReader);
    searcher3.setQueryCache(null); // don't bench the cache
    description = "boxed (bpv=" + bpv + ")";
    // warmup
    hash += search(description, searcher3, "boxed", 300, true);
    hash += search(description, searcher3, "boxed", 300, false);

    description = "boxed fixed-length (bpv=" + bpv + ")";
    // warmup
    hash += search(description, searcher3, "boxed2", 300, true);
    hash += search(description, searcher3, "boxed2", 300, false);

    if (hash == 3) {
        // wont happen
        System.out.println("hash=" + hash);
    }
    reader.close();
    dir.close();
}

From source file:BuildFST.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked" })
public static void main(String[] args) throws IOException {

    boolean numeric = true;
    boolean negative = false;
    for (int i = 0; i < args.length; i++) {
        int j = args[i].lastIndexOf('/');
        if (j != -1) {
            try {
                negative |= Long.parseLong(args[i].substring(j + 1)) < 0;
            } catch (NumberFormatException nfe) {
                numeric = false;/*from   w  w w  .j a va 2s .co  m*/
                break;
            }
        }
    }

    Outputs outputs;
    if (numeric) {
        if (negative) {
            throw new RuntimeException("can only handle numeric outputs >= 0");
        }
        outputs = PositiveIntOutputs.getSingleton();
    } else {
        outputs = ByteSequenceOutputs.getSingleton();
    }

    Pair<?>[] inputs = new Pair[args.length];
    for (int i = 0; i < args.length; i++) {
        int j = args[i].lastIndexOf('/');
        String input;
        Object output;
        if (j == -1) {
            output = outputs.getNoOutput();
            input = args[i];
        } else {
            input = args[i].substring(0, j);
            String outputString = args[i].substring(j + 1);
            if (numeric) {
                output = Long.parseLong(outputString);
            } else {
                output = new BytesRef(outputString);
            }
        }
        inputs[i] = new Pair(new BytesRef(input), output);
    }
    Arrays.sort(inputs);

    FST<?> fst;
    if (numeric) {
        Builder<Long> b = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
        for (Pair pair : inputs) {
            IntsRefBuilder intsBuilder = new IntsRefBuilder();
            Util.toIntsRef(pair.input, intsBuilder);
            b.add(intsBuilder.get(), (Long) pair.output);
        }
        fst = b.finish();
    } else {
        Builder<BytesRef> b = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, outputs);
        for (Pair pair : inputs) {
            IntsRefBuilder intsBuilder = new IntsRefBuilder();
            Util.toIntsRef(pair.input, intsBuilder);
            b.add(intsBuilder.get(), (BytesRef) pair.output);
        }
        fst = b.finish();
    }
    Util.toDot(fst, new PrintWriter(System.out), true, true);
}

From source file:IndexTaxis.java

License:Apache License

static void addOneField(Document doc, String fieldName, String rawValue) {
    // nocommit//from   w w  w. ja  va2  s  .c o  m
    /*
    if (fieldName.equals("pick_up_lat")) {
      double value = Double.parseDouble(rawValue);
      doc.add(new DoublePoint(fieldName, value));
      doc.add(new SortedNumericDocValuesField(fieldName, NumericUtils.doubleToSortableLong(value)));
    }
    */
    switch (fieldName) {
    case "vendor_id":
    case "cab_color":
    case "payment_type":
    case "trip_type":
    case "rate_code":
    case "store_and_fwd_flag":
        doc.add(new StringField(fieldName, rawValue, Field.Store.NO));
        doc.add(new SortedSetDocValuesField(fieldName, new BytesRef(rawValue)));
        break;
    case "vendor_name":
        doc.add(new TextField(fieldName, rawValue, Field.Store.NO));
        break;
    case "pick_up_date_time":
    case "drop_off_date_time": {
        long value = Long.parseLong(rawValue);
        doc.add(new LongPoint(fieldName, value));
        doc.add(new SortedNumericDocValuesField(fieldName, value));
    }
        break;
    case "passenger_count": {
        int value = Integer.parseInt(rawValue);
        doc.add(new IntPoint(fieldName, value));
        doc.add(new SortedNumericDocValuesField(fieldName, value));
    }
        break;
    case "trip_distance":
    case "pick_up_lat":
    case "pick_up_lon":
    case "drop_off_lat":
    case "drop_off_lon":
    case "fare_amount":
    case "surcharge":
    case "mta_tax":
    case "extra":
    case "ehail_fee":
    case "improvement_surcharge":
    case "tip_amount":
    case "tolls_amount":
    case "total_amount": {
        double value;
        try {
            value = Double.parseDouble(rawValue);
        } catch (NumberFormatException nfe) {
            System.out.println(
                    "WARNING: failed to parse \"" + rawValue + "\" as double for field \"" + fieldName + "\"");
            return;
        }
        doc.add(new DoublePoint(fieldName, value));
        doc.add(new SortedNumericDocValuesField(fieldName, NumericUtils.doubleToSortableLong(value)));
    }
        break;
    default:
        throw new AssertionError("failed to handle field \"" + fieldName + "\"");
    }
}

From source file:KNearestNeighborClassifier.java

License:Apache License

/**
 * build a list of classification results from search results
 * @param topDocs the search results as a {@link TopDocs} object
 * @return a {@link List} of {@link ClassificationResult}, one for each existing class
 * @throws IOException if it's not possible to get the stored value of class field
 *///from   w ww . j a  v  a  2  s. c o m
protected List<ClassificationResult<BytesRef>> buildListFromTopDocs(TopDocs topDocs) throws IOException {
    Map<BytesRef, Integer> classCounts = new HashMap<>();
    Map<BytesRef, Double> classBoosts = new HashMap<>(); // this is a boost based on class ranking positions in topDocs
    float maxScore = topDocs.getMaxScore();
    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
        IndexableField storableField = indexSearcher.doc(scoreDoc.doc).getField(classFieldName);
        if (storableField != null) {
            BytesRef cl = new BytesRef(storableField.stringValue());
            //update count
            Integer count = classCounts.get(cl);
            if (count != null) {
                classCounts.put(cl, count + 1);
            } else {
                classCounts.put(cl, 1);
            }
            //update boost, the boost is based on the best score
            Double totalBoost = classBoosts.get(cl);
            double singleBoost = scoreDoc.score / maxScore;
            if (totalBoost != null) {
                classBoosts.put(cl, totalBoost + singleBoost);
            } else {
                classBoosts.put(cl, singleBoost);
            }
        }
    }
    List<ClassificationResult<BytesRef>> returnList = new ArrayList<>();
    List<ClassificationResult<BytesRef>> temporaryList = new ArrayList<>();
    int sumdoc = 0;
    for (Map.Entry<BytesRef, Integer> entry : classCounts.entrySet()) {
        Integer count = entry.getValue();
        Double normBoost = classBoosts.get(entry.getKey()) / count; //the boost is normalized to be 0<b<1
        temporaryList.add(new ClassificationResult<>(entry.getKey().clone(), (count * normBoost) / (double) k));
        sumdoc += count;
    }

    //correction
    if (sumdoc < k) {
        for (ClassificationResult<BytesRef> cr : temporaryList) {
            returnList.add(
                    new ClassificationResult<>(cr.getAssignedClass(), cr.getScore() * k / (double) sumdoc));
        }
    } else {
        returnList = temporaryList;
    }
    return returnList;
}

From source file:at.ac.univie.mminf.luceneSKOS.analysis.AbstractMeSHFilter.java

License:Apache License

/**
 * Replaces the current term (attributes) with term (attributes) from the
 * stack/* w  w w  .ja va  2s  .c  om*/
 * 
 * @throws IOException
 */
protected void processTermOnStack() throws IOException {
    ExpandedTerm expandedTerm = termStack.pop();

    String term = expandedTerm.getTerm();

    SKOSType termType = expandedTerm.getTermType();

    String sTerm = "";

    try {
        sTerm = analyze(analyzer, term, new CharsRef()).toString();
    } catch (IllegalArgumentException e) {
        // skip this term
        return;
    }

    /*
     * copies the values of all attribute implementations from this state into
     * the implementations of the target stream
     */
    restoreState(current);

    /*
     * Adds the expanded term to the term buffer
     */
    termAtt.setEmpty().append(sTerm);

    /*
     * set position increment to zero to put multiple terms into the same
     * position
     */
    posIncrAtt.setPositionIncrement(0);

    /*
     * sets the type of the expanded term (pref, alt, broader, narrower, etc.)
     */
    skosAtt.setSkosType(termType);

    /*
     * converts the SKOS Attribute to a payload, which is propagated to the
     * index
     */
    byte[] bytes = PayloadHelper.encodeInt(skosAtt.getSkosType().ordinal());
    payloadAtt.setPayload(new BytesRef(bytes));
}

From source file:at.ac.univie.mminf.luceneSKOS.analysis.SNOMEDFilter.java

License:Apache License

/**
 * Replaces the current term (attributes) with term (attributes) from the
 * stack//from   ww w .  jav  a2s .  c  o  m
 * 
 * @throws IOException
 */
protected void processTermOnStack() throws IOException {
    ExpandedTerm expandedTerm = termStack.pop();

    String term = expandedTerm.getTerm();

    SKOSType termType = expandedTerm.getTermType();

    String sTerm = "";

    try {
        sTerm = analyze(analyzer, term, new CharsRef()).toString();
    } catch (IllegalArgumentException e) {
        // skip this term
        return;
    }

    /*
     * copies the values of all attribute implementations from this state
     * into the implementations of the target stream
     */
    restoreState(current);

    /*
     * Adds the expanded term to the term buffer
     */
    termAtt.setEmpty().append(sTerm);

    /*
     * set position increment to zero to put multiple terms into the same
     * position
     */
    posIncrAtt.setPositionIncrement(0);

    /*
     * sets the type of the expanded term (pref, alt, broader, narrower,
     * etc.)
     */
    skosAtt.setSkosType(termType);

    /*
     * converts the SKOS Attribute to a payload, which is propagated to the
     * index
     */
    byte[] bytes = PayloadHelper.encodeInt(skosAtt.getSkosType().ordinal());
    payloadAtt.setPayload(new BytesRef(bytes));
}

From source file:cc.pp.analyzer.ik.query.IKQueryExpressionParser.java

License:Apache License

/**
 * TermRangeQuery/*from  ww w .  j  ava2 s.  c  om*/
 * @param elements
 * @return
 */
private TermRangeQuery toTermRangeQuery(Element fieldNameEle, LinkedList<Element> elements) {

    boolean includeFirst = false;
    boolean includeLast = false;
    String firstValue = null;
    String lastValue = null;
    //?[{
    Element first = elements.getFirst();
    if ('[' == first.type) {
        includeFirst = true;
    } else if ('{' == first.type) {
        includeFirst = false;
    } else {
        throw new IllegalStateException("?");
    }
    //??]}
    Element last = elements.getLast();
    if (']' == last.type) {
        includeLast = true;
    } else if ('}' == last.type) {
        includeLast = false;
    } else {
        throw new IllegalStateException("?, RangeQuery??");
    }
    if (elements.size() < 4 || elements.size() > 5) {
        throw new IllegalStateException("?, RangeQuery ");
    }
    //
    Element e2 = elements.get(1);
    if ('\'' == e2.type) {
        firstValue = e2.toString();
        //
        Element e3 = elements.get(2);
        if (',' != e3.type) {
            throw new IllegalStateException("?, RangeQuery?");
        }
        //
        Element e4 = elements.get(3);
        if ('\'' == e4.type) {
            lastValue = e4.toString();
        } else if (e4 != last) {
            throw new IllegalStateException("?RangeQuery?");
        }
    } else if (',' == e2.type) {
        firstValue = null;
        //
        Element e3 = elements.get(2);
        if ('\'' == e3.type) {
            lastValue = e3.toString();
        } else {
            throw new IllegalStateException("?RangeQuery?");
        }

    } else {
        throw new IllegalStateException("?, RangeQuery?");
    }

    /**
     * lucene4.6?
     */
    TermRangeQuery query = null;
    try {
        query = new TermRangeQuery(fieldNameEle.toString(), new BytesRef(firstValue.getBytes("UTF8")),
                new BytesRef(lastValue.getBytes("UTF8")), includeFirst, includeLast);
    } catch (UnsupportedEncodingException e) {
        throw new IllegalStateException("?, ??");
    }
    return query;
}

From source file:cn.codepub.redis.directory.Main.java

License:Apache License

private static Document addDocument(int i) {
    Document document = new Document();
    document.add(new StringField("key1", "key" + i, Field.Store.YES));
    document.add(new IntField("key2", i * 100000, Field.Store.YES));
    document.add(new FloatField("key3", (float) i * 100000, Field.Store.YES));
    document.add(new LongField("key4", (long) i * 100000, Field.Store.YES));
    document.add(new DoubleField("key5", (double) i * 100000, Field.Store.YES));
    document.add(new TextField("key6", RandomStringUtils.randomAlphabetic(10), Field.Store.YES));
    document.add(new StringField("key7", RandomStringUtils.randomAlphabetic(5), Field.Store.YES));
    document.add(new BinaryDocValuesField("key8", new BytesRef(RandomStringUtils.randomAlphabetic(5))));
    document.add(new DoubleDocValuesField("key9", RandomUtils.nextDouble(0, 1000)));
    document.add(new FloatDocValuesField("key10", RandomUtils.nextFloat(0, 1000)));
    document.add(new LongField("key11", (long) i * 50000, Field.Store.YES));
    document.add(new IntField("key12", i * 50000, Field.Store.YES));
    document.add(new FloatField("key13", (float) i * 50000, Field.Store.YES));
    document.add(new DoubleField("key14", (double) i * 50000, Field.Store.YES));
    document.add(new StringField("key15", RandomStringUtils.randomAlphabetic(6), Field.Store.YES));
    return document;
}

From source file:com.b2international.index.lucene.BooleanIndexField.java

License:Apache License

@Override
protected BytesRef toBytesRef(Boolean value) {
    return new BytesRef(convertToString(value));
}