List of usage examples for org.apache.lucene.util BytesRef utf8ToString
public String utf8ToString()
From source file:com.romeikat.datamessie.core.processing.service.fulltext.query.QueryUtil.java
License:Open Source License
public List<String> getIndexTerms(final FullTextSession fullTextSession, final int luceneDocumentId, final Class<?> clazz, final String field) { final IndexReader indexReader = fullTextSession.getSearchFactory().getIndexReaderAccessor().open(clazz); try {//from w w w . j a v a 2 s. c o m final Terms terms = indexReader.getTermVector(luceneDocumentId, field); final List<String> termsList = Lists.newArrayListWithExpectedSize((int) terms.size()); final TermsEnum termsEnum = terms.iterator(); BytesRef text; while ((text = termsEnum.next()) != null) { final String term = text.utf8ToString(); termsList.add(term); } return termsList; } catch (final IOException e) { LOG.error("Could not determine index terms", e); return null; } }
From source file:com.rondhuit.w2v.lucene.LuceneIndexCorpus.java
License:Apache License
@Override public void learnVocab() throws IOException { super.learnVocab(); final String field = ((LuceneIndexConfig) config).getField(); final Terms terms = MultiFields.getTerms(reader, field); final BytesRef maxTerm = terms.getMax(); final BytesRef minTerm = terms.getMin(); Query q = new TermRangeQuery(field, minTerm, maxTerm, true, true); IndexSearcher searcher = new IndexSearcher(reader); topDocs = searcher.search(q, Integer.MAX_VALUE); TermsEnum termsEnum = null;// ww w . java2 s .co m termsEnum = terms.iterator(termsEnum); termsEnum.seekCeil(new BytesRef()); BytesRef term = termsEnum.term(); while (term != null) { int p = addWordToVocab(term.utf8ToString()); vocab[p].setCn((int) termsEnum.totalTermFreq()); term = termsEnum.next(); } }
From source file:com.shaie.annots.AnnotationSearchExample.java
License:Apache License
/** Prints the terms indexed under the given field. */ static void printFieldTerms(LeafReader reader, String field) throws IOException { System.out.println("Terms for field: " + field); TermsEnum te = reader.terms(field).iterator(null); BytesRef scratch; while ((scratch = te.next()) != null) { System.out.println(" " + scratch.utf8ToString()); }/*from w ww .java2 s .c o m*/ }
From source file:com.shaie.PhraseVsSpanQuery.java
License:Apache License
@SuppressWarnings("resource") public static void main(String[] args) throws Exception { final Directory dir = new RAMDirectory(); final IndexWriterConfig conf = new IndexWriterConfig(new WhitespaceAnalyzer()); final IndexWriter writer = new IndexWriter(dir, conf); final Document doc = new Document(); doc.add(new TextField("f", new TokenStream() { final PositionIncrementAttribute pos = addAttribute(PositionIncrementAttribute.class); final CharTermAttribute term = addAttribute(CharTermAttribute.class); boolean first = true, done = false; @Override/*from w w w.ja va 2s. c om*/ public boolean incrementToken() throws IOException { if (done) { return false; } if (first) { term.setEmpty().append("a"); pos.setPositionIncrement(1); first = false; } else { term.setEmpty().append("b"); pos.setPositionIncrement(0); done = true; } return true; } })); writer.addDocument(doc); writer.close(); final DirectoryReader reader = DirectoryReader.open(dir); final IndexSearcher searcher = new IndexSearcher(reader); final LeafReader ar = reader.leaves().get(0).reader(); final TermsEnum te = ar.terms("f").iterator(); BytesRef scratch = new BytesRef(); while ((scratch = te.next()) != null) { System.out.println(scratch.utf8ToString()); final PostingsEnum dape = ar.postings(new Term("f", scratch.utf8ToString())); System.out.println(" doc=" + dape.nextDoc() + ", pos=" + dape.nextPosition()); } System.out.println(); // try a phrase query with a slop final PhraseQuery pqNoSlop = buildPhraseQuery(0); System.out.println("searching for \"a b\"; num results = " + searcher.search(pqNoSlop, 10).totalHits); final PhraseQuery pqSlop1 = buildPhraseQuery(1); System.out.println("searching for \"a b\"~1; num results = " + searcher.search(pqSlop1, 10).totalHits); final PhraseQuery pqSlop3 = buildPhraseQuery(3); System.out.println("searching for \"a b\"~3; num results = " + searcher.search(pqSlop3, 10).totalHits); final SpanNearQuery snqUnOrdered = new SpanNearQuery( new SpanQuery[] { new SpanTermQuery(new Term("f", "a")), new SpanTermQuery(new Term("f", "b")) }, 1, false); System.out.println("searching for SpanNearUnordered('a', 'b'), slop=1; num results = " + searcher.search(snqUnOrdered, 10).totalHits); final SpanNearQuery snqOrdered = new SpanNearQuery( new SpanQuery[] { new SpanTermQuery(new Term("f", "a")), new SpanTermQuery(new Term("f", "b")) }, 1, true); System.out.println("searching for SpanNearOrdered('a', 'b'), slop=1; num results = " + searcher.search(snqOrdered, 10).totalHits); reader.close(); }
From source file:com.shaie.suggest.ContextSuggestDemo.java
License:Apache License
public void lookupWithContext(BytesRef context) throws IOException { System.out.println("Running lookup() with context [" + context.utf8ToString() + "]:"); final List<LookupResult> lookups = suggester.lookup("qu", Collections.singleton(context), 10, true, true); for (final LookupResult lookup : lookups) { System.out.println(lookup); }/*from w w w . j a va 2s . c o m*/ System.out.println(); }
From source file:com.shaie.utils.IndexUtils.java
License:Apache License
/** Prints the terms indexed under the given fields. */ public static void printFieldTerms(LeafReader reader, String... fields) throws IOException { for (final String field : fields) { System.out.println(format("Terms for field [%s]:", field)); final TermsEnum te = reader.terms(field).iterator(); BytesRef scratch; while ((scratch = te.next()) != null) { System.out.println(format(" %s", scratch.utf8ToString())); }//from w w w .ja v a 2 s. com } }
From source file:com.shaie.utils.IndexUtils.java
License:Apache License
/** Prints the terms indexed under the given fields with full postings information. */ public static void printFieldTermsWithInfo(LeafReader reader, String... fields) throws IOException { for (final String field : fields) { System.out.println(format("Terms for field [%s], with positional info:", field)); final TermsEnum te = reader.terms(field).iterator(); BytesRef scratch; PostingsEnum postings = null;//from w w w .j a v a 2 s . c o m while ((scratch = te.next()) != null) { System.out.println(format(" %s", scratch.utf8ToString())); postings = te.postings(postings, PostingsEnum.ALL); for (postings.nextDoc(); postings.docID() != DocIdSetIterator.NO_MORE_DOCS; postings.nextDoc()) { final Map<Integer, BytesRef> positions = Maps.newTreeMap(); boolean addedPayload = false; for (int i = 0; i < postings.freq(); i++) { final int pos = postings.nextPosition(); final BytesRef payload = postings.getPayload(); if (payload != null) { positions.put(pos, BytesRef.deepCopyOf(payload)); addedPayload = true; } else { positions.put(pos, null); } } if (addedPayload) { System.out.println( format(" doc=%d, freq=%d", postings.docID(), postings.freq(), positions)); for (final Entry<Integer, BytesRef> e : positions.entrySet()) { System.out.println(format(" pos=%d, payload=%s", e.getKey(), e.getValue())); } } else { System.out.println(format(" doc=%d, freq=%d, pos=%s", postings.docID(), postings.freq(), positions.keySet())); } } } } }
From source file:com.stratio.cassandra.index.ClusteringKeyMapper.java
License:Apache License
/** * Returns the raw clustering key contained in the specified Lucene field value. * * @param bytesRef The {@link BytesRef} containing the raw clustering key to be get. * @return The raw clustering key contained in the specified Lucene field value. */// w w w. j ava2 s.c o m public final CellName clusteringKey(BytesRef bytesRef) { String string = bytesRef.utf8ToString(); ByteBuffer bb = ByteBufferUtils.fromString(string); return cellNameType.cellFromByteBuffer(bb); }
From source file:com.stratio.cassandra.index.TokenMapperGeneric.java
License:Apache License
/** * Returns the Cassandra {@link Token} represented by the specified Lucene {@link BytesRef}. * * @param bytesRef A Lucene {@link BytesRef} representation of a Cassandra {@link Token}. * @return The Cassandra {@link Token} represented by the specified Lucene {@link BytesRef}. *///from w ww . j a v a 2 s .com Token token(BytesRef bytesRef) { String string = bytesRef.utf8ToString(); ByteBuffer bb = ByteBufferUtils.fromString(string); return factory.fromByteArray(bb); }
From source file:com.tuplejump.stargate.lucene.LuceneUtils.java
License:Apache License
public static String stringDocValue(BinaryDocValues rowKeyValues, int docId) throws IOException { BytesRef ref = new BytesRef(); rowKeyValues.get(docId, ref);// w w w. j a va 2 s . c o m return ref.utf8ToString(); }