List of usage examples for org.apache.lucene.util BytesRef BytesRef
public BytesRef(CharSequence text)
From source file:de.jetsli.lumeo.RawLucene.java
License:Apache License
public Document findByUserId(final String uId) { return searchSomething(new SearchExecutor<Document>() { @Override//w w w.j a va 2 s . c o m public Document execute(final IndexSearcher searcher) throws IOException { final BytesRef bytes = new BytesRef(uId); Document doc = null; //IndexReaderContext trc = searcher.getTopReaderContext(); //trc.children(); //TODO -MH search subreaders - share common subreader code in findByID? //Hopefully Lucene should bail after collecting our result of 1 TopDocs results = searcher.search(new TermQuery(new Term(UID, bytes)), 1); if (results.totalHits > 1) { throw new IllegalStateException("Document with " + UID + "=" + uId + " not the only one"); } if (results.totalHits == 1) { doc = searcher.document(results.scoreDocs[0].doc, null); } // new MyGather(searcher.getIndexReader()) { // // @Override protected boolean runLeaf(int base, AtomicReader leaf) throws IOException { // DocsEnum docs = leaf.termDocsEnum(leaf.getLiveDocs(), UID, bytes, false); // if (docs == null) // return true; // // int docID = docs.nextDoc(); // if (docID == DocsEnum.NO_MORE_DOCS) // return true; // // if (docs.nextDoc() != DocsEnum.NO_MORE_DOCS) // throw new IllegalStateException("Document with " + UID + "=" + uId + " not the only one"); // // doc = searcher.doc(base + docID); // return false; // } // }.run(); return doc; } }); }
From source file:de.jetsli.lumeo.util.Mapping.java
License:Apache License
public BytesRef toBytes(String fieldName, Object o) { if (o instanceof String) { if (getAnalyzerFor(fieldName) == KEYWORD_ANALYZER_LC) return new BytesRef(((String) o).toLowerCase()); else/* w ww. ja va2 s . com*/ return new BytesRef((String) o); } else if (o instanceof Integer) return LuceneHelper.newRefFromInt((Integer) o); else if (o instanceof Long) return LuceneHelper.newRefFromLong((Long) o); else if (o instanceof Double) return LuceneHelper.newRefFromDouble((Double) o); else if (o instanceof Date) return new BytesRef(DateTools.timeToString(((Date) o).getTime(), DateTools.Resolution.MINUTE)); else throw new UnsupportedOperationException("Couldn't find bytesRef usage for object " + o); }
From source file:de.qaware.chronix.lucene.client.add.LuceneAddingService.java
License:Apache License
/** * Tries to cast field value (object) to a string or byte[]. * If the field value is not a string or a byte[] then the method ignores the field. * <p>//from w w w . j a va 2 s . c o m * If the value is a string or byte[] than the value is warped into a matching lucene field (Field for String, * StoredField for byte[]) and added to the lucene document. * * @param document the lucene document to add the number * @param fieldName the field name * @param fieldValue the field value */ private static void handleStringsAndBytes(Document document, String fieldName, Object fieldValue) { if (fieldValue instanceof String) { document.add(new Field(fieldName, fieldValue.toString(), TextField.TYPE_STORED)); } else if (fieldValue instanceof byte[]) { document.add(new StoredField(fieldName, new BytesRef((byte[]) fieldValue))); } }
From source file:de.unihildesheim.iw.lucene.analyzer.EnglishAnalyzerTest.java
License:Open Source License
@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" }) @Test//www .j av a 2 s. com public void testTokenStream_noStopwords() throws Exception { final String query = "foo bar baz bam"; final Analyzer analyzer = new EnglishAnalyzer(); final BytesRefArray result = new BytesRefArray(Counter.newCounter(false)); try (TokenStream stream = analyzer.tokenStream(null, query)) { stream.reset(); while (stream.incrementToken()) { final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class)); if (term.length > 0) { result.append(term); } } } Assert.assertEquals("Not all terms returned.", 4L, result.size()); final BytesRefIterator bri = result.iterator(); BytesRef term; while ((term = bri.next()) != null) { Assert.assertTrue("Unknown term found.", "foo".equals(term.utf8ToString()) || "bar".equals(term.utf8ToString()) || "baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString())); } }
From source file:de.unihildesheim.iw.lucene.analyzer.EnglishAnalyzerTest.java
License:Open Source License
@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" }) @Test//from w w w .jav a2 s .c om public void testTokenStream() throws Exception { final CharArraySet csa = new CharArraySet(Arrays.asList("foo", "bar"), true); final String query = "foo bar baz bam"; final Analyzer analyzer = new EnglishAnalyzer(csa); final BytesRefArray result = new BytesRefArray(Counter.newCounter(false)); try (TokenStream stream = analyzer.tokenStream(null, query)) { stream.reset(); while (stream.incrementToken()) { final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class)); if (term.length > 0) { result.append(term); } } } Assert.assertEquals("Not all terms returned.", 2L, result.size()); final BytesRefIterator bri = result.iterator(); BytesRef term; while ((term = bri.next()) != null) { Assert.assertTrue("Unknown term found.", "baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString())); } }
From source file:de.unihildesheim.iw.lucene.analyzer.FrenchAnalyzerTest.java
License:Open Source License
@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" }) @Test/* w w w . j av a 2s . c o m*/ public void testTokenStream_noStopwords() throws Exception { final String query = "foo bar baz bam"; final Analyzer analyzer = new FrenchAnalyzer(); final BytesRefArray result = new BytesRefArray(Counter.newCounter(false)); try (TokenStream stream = analyzer.tokenStream(null, query)) { stream.reset(); while (stream.incrementToken()) { final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class)); if (term.length > 0) { result.append(term); } } } Assert.assertEquals("Not all terms returned.", 4L, result.size()); final BytesRefIterator bri = result.iterator(); BytesRef term; while ((term = bri.next()) != null) { Assert.assertTrue("Unknown term found.", "foo".equals(term.utf8ToString()) || "bar".equals(term.utf8ToString()) || "baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString())); } }
From source file:de.unihildesheim.iw.lucene.analyzer.FrenchAnalyzerTest.java
License:Open Source License
@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" }) @Test// w w w . j a va2 s .co m public void testTokenStream() throws Exception { final CharArraySet csa = new CharArraySet(Arrays.asList("foo", "bar"), true); final String query = "foo bar baz bam"; final Analyzer analyzer = new FrenchAnalyzer(csa); final BytesRefArray result = new BytesRefArray(Counter.newCounter(false)); try (TokenStream stream = analyzer.tokenStream(null, query)) { stream.reset(); while (stream.incrementToken()) { final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class)); if (term.length > 0) { result.append(term); } } } Assert.assertEquals("Not all terms returned.", 2L, result.size()); final BytesRefIterator bri = result.iterator(); BytesRef term; while ((term = bri.next()) != null) { Assert.assertTrue("Unknown term found.", "baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString())); } }
From source file:de.unihildesheim.iw.lucene.analyzer.FrenchAnalyzerTest.java
License:Open Source License
@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" }) @Test//from w ww .j a v a2s . c om public void testTokenStream_elisions() throws Exception { final CharArraySet csa = new CharArraySet(Arrays.asList("foo", "bar"), true); final StringBuilder query = new StringBuilder("foo bar baz bam "); // add all elisions to the query for (final String s : FrenchAnalyzer.DEFAULT_ELISIONS) { query.append(s).append("\'bim "); } final Analyzer analyzer = new FrenchAnalyzer(csa); final BytesRefArray result = new BytesRefArray(Counter.newCounter(false)); try (TokenStream stream = analyzer.tokenStream(null, query.toString())) { stream.reset(); while (stream.incrementToken()) { final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class)); if (term.length > 0) { result.append(term); } } } Assert.assertEquals("Not all terms returned.", 2L + FrenchAnalyzer.DEFAULT_ELISIONS.length, result.size()); final BytesRefIterator bri = result.iterator(); BytesRef term; while ((term = bri.next()) != null) { Assert.assertTrue("Unknown term found.", "baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString()) || // elisions should be removed from this "bim".equals(term.utf8ToString())); } }
From source file:de.unihildesheim.iw.lucene.analyzer.GermanAnalyzerTest.java
License:Open Source License
@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" }) @Test/*from w w w .j a v a 2s . c o m*/ public void testTokenStream_noStopwords() throws Exception { final String query = "foo bar baz bam"; final Analyzer analyzer = new GermanAnalyzer(); final BytesRefArray result = new BytesRefArray(Counter.newCounter(false)); try (TokenStream stream = analyzer.tokenStream(null, query)) { stream.reset(); while (stream.incrementToken()) { final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class)); if (term.length > 0) { result.append(term); } } } Assert.assertEquals("Not all terms returned.", 4L, result.size()); final BytesRefIterator bri = result.iterator(); BytesRef term; while ((term = bri.next()) != null) { Assert.assertTrue("Unknown term found.", "foo".equals(term.utf8ToString()) || "bar".equals(term.utf8ToString()) || "baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString())); } }
From source file:de.unihildesheim.iw.lucene.analyzer.GermanAnalyzerTest.java
License:Open Source License
@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" }) @Test//from ww w . j a va2s .c om public void testTokenStream() throws Exception { final CharArraySet csa = new CharArraySet(Arrays.asList("foo", "bar"), true); final String query = "foo bar baz bam"; final Analyzer analyzer = new GermanAnalyzer(csa); final BytesRefArray result = new BytesRefArray(Counter.newCounter(false)); try (TokenStream stream = analyzer.tokenStream(null, query)) { stream.reset(); while (stream.incrementToken()) { final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class)); if (term.length > 0) { result.append(term); } } } Assert.assertEquals("Not all terms returned.", 2L, result.size()); final BytesRefIterator bri = result.iterator(); BytesRef term; while ((term = bri.next()) != null) { Assert.assertTrue("Unknown term found.", "baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString())); } }