Example usage for org.apache.lucene.util BytesRef utf8ToString

List of usage examples for org.apache.lucene.util BytesRef utf8ToString

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef utf8ToString.

Prototype

public String utf8ToString() 

Source Link

Document

Interprets stored bytes as UTF8 bytes, returning the resulting string

Usage

From source file:com.romeikat.datamessie.core.processing.service.fulltext.query.QueryUtil.java

License:Open Source License

public List<String> getIndexTerms(final FullTextSession fullTextSession, final int luceneDocumentId,
        final Class<?> clazz, final String field) {
    final IndexReader indexReader = fullTextSession.getSearchFactory().getIndexReaderAccessor().open(clazz);
    try {//from  w  w w . j a  v a 2 s. c o m
        final Terms terms = indexReader.getTermVector(luceneDocumentId, field);
        final List<String> termsList = Lists.newArrayListWithExpectedSize((int) terms.size());

        final TermsEnum termsEnum = terms.iterator();
        BytesRef text;
        while ((text = termsEnum.next()) != null) {
            final String term = text.utf8ToString();
            termsList.add(term);
        }

        return termsList;
    } catch (final IOException e) {
        LOG.error("Could not determine index terms", e);
        return null;
    }
}

From source file:com.rondhuit.w2v.lucene.LuceneIndexCorpus.java

License:Apache License

@Override
public void learnVocab() throws IOException {
    super.learnVocab();

    final String field = ((LuceneIndexConfig) config).getField();
    final Terms terms = MultiFields.getTerms(reader, field);
    final BytesRef maxTerm = terms.getMax();
    final BytesRef minTerm = terms.getMin();
    Query q = new TermRangeQuery(field, minTerm, maxTerm, true, true);
    IndexSearcher searcher = new IndexSearcher(reader);
    topDocs = searcher.search(q, Integer.MAX_VALUE);

    TermsEnum termsEnum = null;//  ww  w  .  java2  s .co m
    termsEnum = terms.iterator(termsEnum);

    termsEnum.seekCeil(new BytesRef());
    BytesRef term = termsEnum.term();
    while (term != null) {
        int p = addWordToVocab(term.utf8ToString());
        vocab[p].setCn((int) termsEnum.totalTermFreq());
        term = termsEnum.next();
    }
}

From source file:com.shaie.annots.AnnotationSearchExample.java

License:Apache License

/** Prints the terms indexed under the given field. */
static void printFieldTerms(LeafReader reader, String field) throws IOException {
    System.out.println("Terms for field: " + field);
    TermsEnum te = reader.terms(field).iterator(null);
    BytesRef scratch;
    while ((scratch = te.next()) != null) {
        System.out.println("  " + scratch.utf8ToString());
    }/*from  w ww .java2 s  .c o m*/
}

From source file:com.shaie.PhraseVsSpanQuery.java

License:Apache License

@SuppressWarnings("resource")
public static void main(String[] args) throws Exception {
    final Directory dir = new RAMDirectory();
    final IndexWriterConfig conf = new IndexWriterConfig(new WhitespaceAnalyzer());
    final IndexWriter writer = new IndexWriter(dir, conf);

    final Document doc = new Document();
    doc.add(new TextField("f", new TokenStream() {
        final PositionIncrementAttribute pos = addAttribute(PositionIncrementAttribute.class);
        final CharTermAttribute term = addAttribute(CharTermAttribute.class);
        boolean first = true, done = false;

        @Override/*from  w  w  w.ja va  2s. c om*/
        public boolean incrementToken() throws IOException {
            if (done) {
                return false;
            }
            if (first) {
                term.setEmpty().append("a");
                pos.setPositionIncrement(1);
                first = false;
            } else {
                term.setEmpty().append("b");
                pos.setPositionIncrement(0);
                done = true;
            }
            return true;
        }
    }));
    writer.addDocument(doc);
    writer.close();

    final DirectoryReader reader = DirectoryReader.open(dir);
    final IndexSearcher searcher = new IndexSearcher(reader);
    final LeafReader ar = reader.leaves().get(0).reader();
    final TermsEnum te = ar.terms("f").iterator();
    BytesRef scratch = new BytesRef();
    while ((scratch = te.next()) != null) {
        System.out.println(scratch.utf8ToString());
        final PostingsEnum dape = ar.postings(new Term("f", scratch.utf8ToString()));
        System.out.println("  doc=" + dape.nextDoc() + ", pos=" + dape.nextPosition());
    }

    System.out.println();

    // try a phrase query with a slop
    final PhraseQuery pqNoSlop = buildPhraseQuery(0);
    System.out.println("searching for \"a b\"; num results = " + searcher.search(pqNoSlop, 10).totalHits);

    final PhraseQuery pqSlop1 = buildPhraseQuery(1);
    System.out.println("searching for \"a b\"~1; num results = " + searcher.search(pqSlop1, 10).totalHits);

    final PhraseQuery pqSlop3 = buildPhraseQuery(3);
    System.out.println("searching for \"a b\"~3; num results = " + searcher.search(pqSlop3, 10).totalHits);

    final SpanNearQuery snqUnOrdered = new SpanNearQuery(
            new SpanQuery[] { new SpanTermQuery(new Term("f", "a")), new SpanTermQuery(new Term("f", "b")) }, 1,
            false);
    System.out.println("searching for SpanNearUnordered('a', 'b'), slop=1; num results = "
            + searcher.search(snqUnOrdered, 10).totalHits);

    final SpanNearQuery snqOrdered = new SpanNearQuery(
            new SpanQuery[] { new SpanTermQuery(new Term("f", "a")), new SpanTermQuery(new Term("f", "b")) }, 1,
            true);
    System.out.println("searching for SpanNearOrdered('a', 'b'), slop=1; num results = "
            + searcher.search(snqOrdered, 10).totalHits);

    reader.close();
}

From source file:com.shaie.suggest.ContextSuggestDemo.java

License:Apache License

public void lookupWithContext(BytesRef context) throws IOException {
    System.out.println("Running lookup() with context [" + context.utf8ToString() + "]:");
    final List<LookupResult> lookups = suggester.lookup("qu", Collections.singleton(context), 10, true, true);
    for (final LookupResult lookup : lookups) {
        System.out.println(lookup);
    }/*from w  w w . j  a  va  2s  .  c o  m*/
    System.out.println();
}

From source file:com.shaie.utils.IndexUtils.java

License:Apache License

/** Prints the terms indexed under the given fields. */
public static void printFieldTerms(LeafReader reader, String... fields) throws IOException {
    for (final String field : fields) {
        System.out.println(format("Terms for field [%s]:", field));
        final TermsEnum te = reader.terms(field).iterator();
        BytesRef scratch;
        while ((scratch = te.next()) != null) {
            System.out.println(format("  %s", scratch.utf8ToString()));
        }//from  w  w  w .ja v  a  2  s. com
    }
}

From source file:com.shaie.utils.IndexUtils.java

License:Apache License

/** Prints the terms indexed under the given fields with full postings information. */
public static void printFieldTermsWithInfo(LeafReader reader, String... fields) throws IOException {
    for (final String field : fields) {
        System.out.println(format("Terms for field [%s], with positional info:", field));
        final TermsEnum te = reader.terms(field).iterator();
        BytesRef scratch;
        PostingsEnum postings = null;//from w w w  .j a  v a 2  s  . c o  m
        while ((scratch = te.next()) != null) {
            System.out.println(format("  %s", scratch.utf8ToString()));
            postings = te.postings(postings, PostingsEnum.ALL);
            for (postings.nextDoc(); postings.docID() != DocIdSetIterator.NO_MORE_DOCS; postings.nextDoc()) {
                final Map<Integer, BytesRef> positions = Maps.newTreeMap();
                boolean addedPayload = false;
                for (int i = 0; i < postings.freq(); i++) {
                    final int pos = postings.nextPosition();
                    final BytesRef payload = postings.getPayload();
                    if (payload != null) {
                        positions.put(pos, BytesRef.deepCopyOf(payload));
                        addedPayload = true;
                    } else {
                        positions.put(pos, null);
                    }
                }
                if (addedPayload) {
                    System.out.println(
                            format("    doc=%d, freq=%d", postings.docID(), postings.freq(), positions));
                    for (final Entry<Integer, BytesRef> e : positions.entrySet()) {
                        System.out.println(format("      pos=%d, payload=%s", e.getKey(), e.getValue()));
                    }
                } else {
                    System.out.println(format("    doc=%d, freq=%d, pos=%s", postings.docID(), postings.freq(),
                            positions.keySet()));
                }
            }
        }
    }
}

From source file:com.stratio.cassandra.index.ClusteringKeyMapper.java

License:Apache License

/**
 * Returns the raw clustering key contained in the specified Lucene field value.
 *
 * @param bytesRef The {@link BytesRef} containing the raw clustering key to be get.
 * @return The raw clustering key contained in the specified Lucene field value.
 *///  w  w w. j ava2  s.c  o m
public final CellName clusteringKey(BytesRef bytesRef) {
    String string = bytesRef.utf8ToString();
    ByteBuffer bb = ByteBufferUtils.fromString(string);
    return cellNameType.cellFromByteBuffer(bb);
}

From source file:com.stratio.cassandra.index.TokenMapperGeneric.java

License:Apache License

/**
 * Returns the Cassandra {@link Token} represented by the specified Lucene {@link BytesRef}.
 *
 * @param bytesRef A Lucene {@link BytesRef} representation of a Cassandra {@link Token}.
 * @return The Cassandra {@link Token} represented by the specified Lucene {@link BytesRef}.
 *///from  w  ww .  j a v  a 2  s .com
Token token(BytesRef bytesRef) {
    String string = bytesRef.utf8ToString();
    ByteBuffer bb = ByteBufferUtils.fromString(string);
    return factory.fromByteArray(bb);
}

From source file:com.tuplejump.stargate.lucene.LuceneUtils.java

License:Apache License

public static String stringDocValue(BinaryDocValues rowKeyValues, int docId) throws IOException {
    BytesRef ref = new BytesRef();
    rowKeyValues.get(docId, ref);//  w w w. j a va  2  s . c  o  m
    return ref.utf8ToString();
}