Example usage for org.apache.lucene.util BytesRef BytesRef

List of usage examples for org.apache.lucene.util BytesRef BytesRef

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef BytesRef.

Prototype

public BytesRef(CharSequence text) 

Source Link

Document

Initialize the byte[] from the UTF8 bytes for the provided String.

Usage

From source file:de.jetsli.lumeo.RawLucene.java

License:Apache License

public Document findByUserId(final String uId) {
    return searchSomething(new SearchExecutor<Document>() {

        @Override//w  w w.j a va  2  s  . c  o m
        public Document execute(final IndexSearcher searcher) throws IOException {
            final BytesRef bytes = new BytesRef(uId);
            Document doc = null;
            //IndexReaderContext trc = searcher.getTopReaderContext();
            //trc.children();

            //TODO -MH  search subreaders - share common subreader code in findByID?

            //Hopefully Lucene should bail after collecting our result of 1
            TopDocs results = searcher.search(new TermQuery(new Term(UID, bytes)), 1);
            if (results.totalHits > 1) {
                throw new IllegalStateException("Document with " + UID + "=" + uId + " not the only one");
            }
            if (results.totalHits == 1) {
                doc = searcher.document(results.scoreDocs[0].doc, null);
            }

            //                new MyGather(searcher.getIndexReader()) {
            //
            //                    @Override protected boolean runLeaf(int base, AtomicReader leaf) throws IOException {
            //                        DocsEnum docs = leaf.termDocsEnum(leaf.getLiveDocs(), UID, bytes, false);
            //                        if (docs == null)
            //                            return true;
            //
            //                        int docID = docs.nextDoc();
            //                        if (docID == DocsEnum.NO_MORE_DOCS)
            //                            return true;
            //
            //                        if (docs.nextDoc() != DocsEnum.NO_MORE_DOCS)
            //                            throw new IllegalStateException("Document with " + UID + "=" + uId + " not the only one");
            //
            //                        doc = searcher.doc(base + docID);
            //                        return false;
            //                    }
            //                }.run();
            return doc;
        }
    });
}

From source file:de.jetsli.lumeo.util.Mapping.java

License:Apache License

public BytesRef toBytes(String fieldName, Object o) {
    if (o instanceof String) {
        if (getAnalyzerFor(fieldName) == KEYWORD_ANALYZER_LC)
            return new BytesRef(((String) o).toLowerCase());
        else/* w  ww.  ja va2  s  . com*/
            return new BytesRef((String) o);
    } else if (o instanceof Integer)
        return LuceneHelper.newRefFromInt((Integer) o);
    else if (o instanceof Long)
        return LuceneHelper.newRefFromLong((Long) o);
    else if (o instanceof Double)
        return LuceneHelper.newRefFromDouble((Double) o);
    else if (o instanceof Date)
        return new BytesRef(DateTools.timeToString(((Date) o).getTime(), DateTools.Resolution.MINUTE));
    else
        throw new UnsupportedOperationException("Couldn't find bytesRef usage for object  " + o);
}

From source file:de.qaware.chronix.lucene.client.add.LuceneAddingService.java

License:Apache License

/**
 * Tries to cast field value (object) to a string or byte[].
 * If the field value is not a string or a byte[] then the method ignores the field.
 * <p>//from  w w  w . j a  va  2  s  .  c o m
 * If the value is a string or byte[] than the value is warped into a matching lucene field (Field for String,
 * StoredField for byte[]) and added to the lucene document.
 *
 * @param document   the lucene document to add the number
 * @param fieldName  the field name
 * @param fieldValue the field value
 */
private static void handleStringsAndBytes(Document document, String fieldName, Object fieldValue) {
    if (fieldValue instanceof String) {
        document.add(new Field(fieldName, fieldValue.toString(), TextField.TYPE_STORED));
    } else if (fieldValue instanceof byte[]) {
        document.add(new StoredField(fieldName, new BytesRef((byte[]) fieldValue)));
    }
}

From source file:de.unihildesheim.iw.lucene.analyzer.EnglishAnalyzerTest.java

License:Open Source License

@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" })
@Test//www  .j  av a  2  s.  com
public void testTokenStream_noStopwords() throws Exception {
    final String query = "foo bar baz bam";
    final Analyzer analyzer = new EnglishAnalyzer();
    final BytesRefArray result = new BytesRefArray(Counter.newCounter(false));

    try (TokenStream stream = analyzer.tokenStream(null, query)) {
        stream.reset();
        while (stream.incrementToken()) {
            final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class));
            if (term.length > 0) {
                result.append(term);
            }
        }
    }

    Assert.assertEquals("Not all terms returned.", 4L, result.size());

    final BytesRefIterator bri = result.iterator();
    BytesRef term;
    while ((term = bri.next()) != null) {
        Assert.assertTrue("Unknown term found.",
                "foo".equals(term.utf8ToString()) || "bar".equals(term.utf8ToString())
                        || "baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString()));
    }
}

From source file:de.unihildesheim.iw.lucene.analyzer.EnglishAnalyzerTest.java

License:Open Source License

@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" })
@Test//from w w  w  .jav a2  s .c om
public void testTokenStream() throws Exception {
    final CharArraySet csa = new CharArraySet(Arrays.asList("foo", "bar"), true);
    final String query = "foo bar baz bam";
    final Analyzer analyzer = new EnglishAnalyzer(csa);
    final BytesRefArray result = new BytesRefArray(Counter.newCounter(false));

    try (TokenStream stream = analyzer.tokenStream(null, query)) {
        stream.reset();
        while (stream.incrementToken()) {
            final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class));
            if (term.length > 0) {
                result.append(term);
            }
        }
    }

    Assert.assertEquals("Not all terms returned.", 2L, result.size());

    final BytesRefIterator bri = result.iterator();
    BytesRef term;
    while ((term = bri.next()) != null) {
        Assert.assertTrue("Unknown term found.",
                "baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString()));
    }
}

From source file:de.unihildesheim.iw.lucene.analyzer.FrenchAnalyzerTest.java

License:Open Source License

@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" })
@Test/* w  w  w  . j  av  a  2s .  c o  m*/
public void testTokenStream_noStopwords() throws Exception {
    final String query = "foo bar baz bam";
    final Analyzer analyzer = new FrenchAnalyzer();
    final BytesRefArray result = new BytesRefArray(Counter.newCounter(false));

    try (TokenStream stream = analyzer.tokenStream(null, query)) {
        stream.reset();
        while (stream.incrementToken()) {
            final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class));
            if (term.length > 0) {
                result.append(term);
            }
        }
    }

    Assert.assertEquals("Not all terms returned.", 4L, result.size());

    final BytesRefIterator bri = result.iterator();
    BytesRef term;
    while ((term = bri.next()) != null) {
        Assert.assertTrue("Unknown term found.",
                "foo".equals(term.utf8ToString()) || "bar".equals(term.utf8ToString())
                        || "baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString()));
    }
}

From source file:de.unihildesheim.iw.lucene.analyzer.FrenchAnalyzerTest.java

License:Open Source License

@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" })
@Test// w  w w  . j a va2 s .co  m
public void testTokenStream() throws Exception {
    final CharArraySet csa = new CharArraySet(Arrays.asList("foo", "bar"), true);
    final String query = "foo bar baz bam";
    final Analyzer analyzer = new FrenchAnalyzer(csa);
    final BytesRefArray result = new BytesRefArray(Counter.newCounter(false));

    try (TokenStream stream = analyzer.tokenStream(null, query)) {
        stream.reset();
        while (stream.incrementToken()) {
            final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class));
            if (term.length > 0) {
                result.append(term);
            }
        }
    }

    Assert.assertEquals("Not all terms returned.", 2L, result.size());

    final BytesRefIterator bri = result.iterator();
    BytesRef term;
    while ((term = bri.next()) != null) {
        Assert.assertTrue("Unknown term found.",
                "baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString()));
    }
}

From source file:de.unihildesheim.iw.lucene.analyzer.FrenchAnalyzerTest.java

License:Open Source License

@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" })
@Test//from   w ww  .j a  v  a2s . c om
public void testTokenStream_elisions() throws Exception {
    final CharArraySet csa = new CharArraySet(Arrays.asList("foo", "bar"), true);
    final StringBuilder query = new StringBuilder("foo bar baz bam ");
    // add all elisions to the query
    for (final String s : FrenchAnalyzer.DEFAULT_ELISIONS) {
        query.append(s).append("\'bim ");
    }
    final Analyzer analyzer = new FrenchAnalyzer(csa);
    final BytesRefArray result = new BytesRefArray(Counter.newCounter(false));

    try (TokenStream stream = analyzer.tokenStream(null, query.toString())) {
        stream.reset();
        while (stream.incrementToken()) {
            final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class));
            if (term.length > 0) {
                result.append(term);
            }
        }
    }

    Assert.assertEquals("Not all terms returned.", 2L + FrenchAnalyzer.DEFAULT_ELISIONS.length, result.size());

    final BytesRefIterator bri = result.iterator();
    BytesRef term;
    while ((term = bri.next()) != null) {
        Assert.assertTrue("Unknown term found.",
                "baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString()) ||
                // elisions should be removed from this
                        "bim".equals(term.utf8ToString()));
    }
}

From source file:de.unihildesheim.iw.lucene.analyzer.GermanAnalyzerTest.java

License:Open Source License

@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" })
@Test/*from   w  w w  .j  a  v  a  2s  . c  o m*/
public void testTokenStream_noStopwords() throws Exception {
    final String query = "foo bar baz bam";
    final Analyzer analyzer = new GermanAnalyzer();
    final BytesRefArray result = new BytesRefArray(Counter.newCounter(false));

    try (TokenStream stream = analyzer.tokenStream(null, query)) {
        stream.reset();
        while (stream.incrementToken()) {
            final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class));
            if (term.length > 0) {
                result.append(term);
            }
        }
    }

    Assert.assertEquals("Not all terms returned.", 4L, result.size());

    final BytesRefIterator bri = result.iterator();
    BytesRef term;
    while ((term = bri.next()) != null) {
        Assert.assertTrue("Unknown term found.",
                "foo".equals(term.utf8ToString()) || "bar".equals(term.utf8ToString())
                        || "baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString()));
    }
}

From source file:de.unihildesheim.iw.lucene.analyzer.GermanAnalyzerTest.java

License:Open Source License

@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" })
@Test//from ww w  .  j a  va2s  .c om
public void testTokenStream() throws Exception {
    final CharArraySet csa = new CharArraySet(Arrays.asList("foo", "bar"), true);
    final String query = "foo bar baz bam";
    final Analyzer analyzer = new GermanAnalyzer(csa);
    final BytesRefArray result = new BytesRefArray(Counter.newCounter(false));

    try (TokenStream stream = analyzer.tokenStream(null, query)) {
        stream.reset();
        while (stream.incrementToken()) {
            final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class));
            if (term.length > 0) {
                result.append(term);
            }
        }
    }

    Assert.assertEquals("Not all terms returned.", 2L, result.size());

    final BytesRefIterator bri = result.iterator();
    BytesRef term;
    while ((term = bri.next()) != null) {
        Assert.assertTrue("Unknown term found.",
                "baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString()));
    }
}