Example usage for org.apache.lucene.util BytesRef BytesRef

List of usage examples for org.apache.lucene.util BytesRef BytesRef

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef BytesRef.

Prototype

public BytesRef(CharSequence text) 

Source Link

Document

Initialize the byte[] from the UTF8 bytes for the provided String.

Usage

From source file:com.hrhih.index.suggest.Input.java

License:Apache License

public Input(String term, long v) {
    this(new BytesRef(term), v, null, false, null, false);
}

From source file:com.hrhih.index.suggest.Input.java

License:Apache License

public Input(String term, int v, BytesRef payload, Set<BytesRef> contexts) {
    this(new BytesRef(term), v, payload, true, contexts, true);
}

From source file:com.ibm.watson.developer_cloud.professor_languo.ingestion.indexing.RetrieveAndRankIndexerTest.java

License:Open Source License

/**
 * check if the deserialized candidate answers are exactly the same as the candidate answers we
 * expected. Since the query is "question Title: what is right", the expected candidate answers
 * should be the entire corpus.//from www  .  j a  v a  2 s.c  om
 * 
 * @throws IngestionException
 */
private void compare_indexed_records_to_corpus() throws IngestionException {
    indexdCorpus = corpusBuilder.getUniqueThreadSetFromBinFiles();

    // Check that the size of the corpus is the same as the size of the
    // indexed documents
    assertTrue("Wrong number of documents indexed", indexedRecords.size() == indexdCorpus.size());

    // Check that the indexed document in the corpus is in the index
    File serFile = new File(corpusBuilder.getUniqueThreadDirPath()).listFiles()[0];
    StackExchangeThread thread = StackExchangeThreadSerializer.deserializeThreadFromBinFile(serFile.getPath());

    final Document luceneDoc = new LuceneDocumentMapper().createDocument(thread);
    SolrInputDocument recordDoc = indexedRecords.get(0);

    for (IndexableField field : luceneDoc.getFields()) {
        BytesRef bin = luceneDoc.getBinaryValue(field.name());

        // Check that indexed fields (title and id) are indexed correctly
        if (field.name().equals(IndexDocumentFieldName.THREAD_TITLE.toString())
                || field.name().equals(IndexDocumentFieldName.THREAD_POST_ID.toString())) {

            String value = luceneDoc.get(field.name());
            assertEquals(value, recordDoc.getFieldValue(field.name()).toString());
        }

        // Check that indexed serialized field is indexed correctly
        if (bin != null) {
            BytesRef recordbin = new BytesRef((byte[]) recordDoc.getFieldValue(field.name()));
            assertEquals(bin, recordbin);
        }

    }
}

From source file:com.lorelib.analyzer.query.IKQueryExpressionParser.java

License:Apache License

/**
 * TermRangeQuery//from  ww  w . ja  v a2 s.  c o m
 * @param elements
 * @return
 */
private TermRangeQuery toTermRangeQuery(Element fieldNameEle, LinkedList<Element> elements) {

    boolean includeFirst = false;
    boolean includeLast = false;
    String firstValue = null;
    String lastValue = null;
    //?[{
    Element first = elements.getFirst();
    if ('[' == first.type) {
        includeFirst = true;
    } else if ('{' == first.type) {
        includeFirst = false;
    } else {
        throw new IllegalStateException("?");
    }
    //??]}
    Element last = elements.getLast();
    if (']' == last.type) {
        includeLast = true;
    } else if ('}' == last.type) {
        includeLast = false;
    } else {
        throw new IllegalStateException("?, RangeQuery??");
    }
    if (elements.size() < 4 || elements.size() > 5) {
        throw new IllegalStateException("?, RangeQuery ");
    }
    //
    Element e2 = elements.get(1);
    if ('\'' == e2.type) {
        firstValue = e2.toString();
        //
        Element e3 = elements.get(2);
        if (',' != e3.type) {
            throw new IllegalStateException("?, RangeQuery?");
        }
        //
        Element e4 = elements.get(3);
        if ('\'' == e4.type) {
            lastValue = e4.toString();
        } else if (e4 != last) {
            throw new IllegalStateException("?RangeQuery?");
        }
    } else if (',' == e2.type) {
        firstValue = null;
        //
        Element e3 = elements.get(2);
        if ('\'' == e3.type) {
            lastValue = e3.toString();
        } else {
            throw new IllegalStateException("?RangeQuery?");
        }

    } else {
        throw new IllegalStateException("?, RangeQuery?");
    }

    return new TermRangeQuery(fieldNameEle.toString(), new BytesRef(firstValue), new BytesRef(lastValue),
            includeFirst, includeLast);
}

From source file:com.lucure.core.codec.CompressingStoredFieldsReader.java

License:Apache License

private CompressingStoredFieldsReader(CompressingStoredFieldsReader reader) {
    this.version = reader.version;
    this.fieldInfos = reader.fieldInfos;
    this.fieldsStream = reader.fieldsStream.clone();
    this.indexReader = reader.indexReader.clone();
    this.maxPointer = reader.maxPointer;
    this.chunkSize = reader.chunkSize;
    this.packedIntsVersion = reader.packedIntsVersion;
    this.compressionMode = reader.compressionMode;
    this.decompressor = reader.decompressor.clone();
    this.numDocs = reader.numDocs;
    this.bytes = new BytesRef(reader.bytes.bytes.length);
    this.closed = false;
}

From source file:com.mathworks.xzheng.advsearching.MultiSearcherTest.java

License:Apache License

public void testMulti() throws Exception {

    MultiReader multiReader = new MultiReader(searchers[0].getIndexReader(), searchers[1].getIndexReader());
    IndexSearcher searcher = new IndexSearcher(multiReader);

    TermRangeQuery query = new TermRangeQuery("animal", // #3
            new BytesRef("h"), // #3
            new BytesRef("t"), // #3
            true, true);// #3

    TopDocs hits = searcher.search(query, 10);
    assertEquals("tarantula not included", 12, hits.totalHits);
}

From source file:com.mathworks.xzheng.extsearch.payloads.BulletinPayloadsFilter.java

License:Apache License

BulletinPayloadsFilter(TokenStream in, float warningBoost) {
    super(in);//from w w w .  j a va2 s .  co  m
    payloadAttr = addAttribute(PayloadAttribute.class);
    termAtt = addAttribute(CharTermAttribute.class);
    boostPayload = new BytesRef(PayloadHelper.encodeFloat(warningBoost));
}

From source file:com.mathworks.xzheng.searching.TermRangeQueryTest.java

License:Apache License

public void testTermRangeQuery() throws Exception {
    Directory dir = TestUtil.getBookIndexDirectory();
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(dir));
    TermRangeQuery query = new TermRangeQuery("title2", new BytesRef("d"), new BytesRef("j"), true, true);

    TopDocs matches = searcher.search(query, 100);
    /*/* w w w.ja  v a  2s  .com*/
    for(int i=0;i<matches.totalHits;i++) {
      System.out.println("match " + i + ": " + searcher.doc(matches.scoreDocs[i].doc).get("title2"));
    }
    */
    assertEquals(3, matches.totalHits);

    dir.close();
}

From source file:com.mathworks.xzheng.tools.ChainedFilterTest.java

License:Apache License

public void setUp() throws Exception {
    directory = new RAMDirectory();

    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46,
            new WhitespaceAnalyzer(Version.LUCENE_46));

    IndexWriter writer = new IndexWriter(directory, config);

    Calendar cal = Calendar.getInstance();
    cal.set(2009, 1, 1, 0, 0); // A

    for (int i = 0; i < MAX; i++) {
        Document doc = new Document();
        doc.add(new Field("key", "" + (i + 1), Field.Store.YES, Field.Index.NOT_ANALYZED));
        doc.add(new Field("owner", (i < MAX / 2) ? "bob" : "sue", Field.Store.YES, Field.Index.NOT_ANALYZED));
        doc.add(new Field("date", DateTools.timeToString(cal.getTimeInMillis(), DateTools.Resolution.DAY),
                Field.Store.YES, Field.Index.NOT_ANALYZED));
        writer.addDocument(doc);/*from   ww w  . j  a  v a 2  s  . c om*/

        cal.add(Calendar.DATE, 1);
    }

    writer.close();

    searcher = new IndexSearcher(DirectoryReader.open(directory));

    BooleanQuery bq = new BooleanQuery(); // B
    bq.add(new TermQuery(new Term("owner", "bob")), // B
            BooleanClause.Occur.SHOULD); // B
    bq.add(new TermQuery(new Term("owner", "sue")), // B
            BooleanClause.Occur.SHOULD); // B
    query = bq;

    cal.set(2099, 1, 1, 0, 0);
    dateFilter = TermRangeFilter.Less("date", // C
            new BytesRef(DateTools.timeToString( // C
                    cal.getTimeInMillis(), // C
                    DateTools.Resolution.DAY)));// C

    bobFilter = new CachingWrapperFilter( // D
            new QueryWrapperFilter( // D
                    new TermQuery(new Term("owner", "bob")))); // D

    sueFilter = new CachingWrapperFilter( // E
            new QueryWrapperFilter( // E
                    new TermQuery(new Term("owner", "sue")))); // E
}

From source file:com.meizu.nlp.classification.BooleanPerceptronClassifier.java

License:Apache License

/**
 * {@inheritDoc}/* w w w .  ja v  a 2 s.c  om*/
 */
@Override
public ClassificationResult<Boolean> assignClass(String text) throws IOException {
    if (textTerms == null) {
        throw new IOException("You must first call Classifier#train");
    }
    Long output = 0l;
    try (TokenStream tokenStream = analyzer.tokenStream(textFieldName, text)) {
        CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
        tokenStream.reset();
        while (tokenStream.incrementToken()) {
            String s = charTermAttribute.toString();
            Long d = Util.get(fst, new BytesRef(s));
            if (d != null) {
                output += d;
            }
        }
        tokenStream.end();
    }

    return new ClassificationResult<>(output >= threshold, output.doubleValue());
}