Example usage for org.apache.lucene.analysis.tokenattributes TermToBytesRefAttribute getBytesRef

List of usage examples for org.apache.lucene.analysis.tokenattributes TermToBytesRefAttribute getBytesRef

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.tokenattributes TermToBytesRefAttribute getBytesRef.

Prototype

public BytesRef getBytesRef();

Source Link

Document

Retrieve this attribute's BytesRef.

Usage

From source file:biospectra.classify.Classifier.java

License:Apache License

private void createNaiveKmerQueryClauses(BooleanQuery.Builder builder, String field, CachingTokenFilter stream,
        TermToBytesRefAttribute termAtt, OffsetAttribute offsetAtt) throws IOException {
    while (stream.incrementToken()) {
        Term t = new Term(field, BytesRef.deepCopyOf(termAtt.getBytesRef()));
        builder.add(new TermQuery(t), BooleanClause.Occur.SHOULD);
    }/*w w  w  .j  av  a2  s  .co m*/
}

From source file:biospectra.classify.Classifier.java

License:Apache License

private void createChainProximityQueryClauses(BooleanQuery.Builder builder, String field,
        CachingTokenFilter stream, TermToBytesRefAttribute termAtt, OffsetAttribute offsetAtt)
        throws IOException {
    Term termArr[] = new Term[2];
    long offsetArr[] = new long[2];
    for (int i = 0; i < 2; i++) {
        termArr[i] = null;/*from w w w.j a v a 2 s. c  o m*/
        offsetArr[i] = 0;
    }

    while (stream.incrementToken()) {
        Term t = new Term(field, BytesRef.deepCopyOf(termAtt.getBytesRef()));
        if (termArr[0] == null) {
            termArr[0] = t;
            offsetArr[0] = offsetAtt.startOffset();
        } else if (termArr[1] == null) {
            termArr[1] = t;
            offsetArr[1] = offsetAtt.startOffset();
        } else {
            // shift
            termArr[0] = termArr[1];
            offsetArr[0] = offsetArr[1];
            // fill
            termArr[1] = t;
            offsetArr[1] = offsetAtt.startOffset();
        }

        if (termArr[0] != null && termArr[1] != null) {
            long offsetDiff = offsetArr[1] - offsetArr[0];
            if (offsetDiff > 0) {
                PhraseQuery.Builder pq = new PhraseQuery.Builder();

                pq.setSlop((int) (offsetDiff) + 1);
                pq.add(termArr[0]);
                pq.add(termArr[1]);

                builder.add(pq.build(), BooleanClause.Occur.SHOULD);
            }
        }
    }
}

From source file:biospectra.classify.Classifier.java

License:Apache License

private void createPairedProximityQueryClauses(BooleanQuery.Builder builder, String field,
        CachingTokenFilter stream, TermToBytesRefAttribute termAtt, OffsetAttribute offsetAtt)
        throws IOException {
    Term termArr[] = new Term[2];
    long offsetArr[] = new long[2];
    for (int i = 0; i < 2; i++) {
        termArr[i] = null;//ww  w.j  a va 2  s.c  o  m
        offsetArr[i] = 0;
    }

    int count = 0;
    while (stream.incrementToken()) {
        Term t = new Term(field, BytesRef.deepCopyOf(termAtt.getBytesRef()));
        if (count % 2 == 0) {
            termArr[0] = t;
            offsetArr[0] = offsetAtt.startOffset();
        } else {
            termArr[1] = t;
            offsetArr[1] = offsetAtt.startOffset();

            long offsetDiff = offsetArr[1] - offsetArr[0];
            if (offsetDiff > 0) {
                PhraseQuery.Builder pq = new PhraseQuery.Builder();

                pq.setSlop((int) (offsetDiff) + 1);
                pq.add(termArr[0]);
                pq.add(termArr[1]);

                builder.add(pq.build(), BooleanClause.Occur.SHOULD);
            }

            termArr[0] = null;
            termArr[1] = null;
        }

        count++;
    }

    if (termArr[0] != null) {
        builder.add(new TermQuery(termArr[0]), BooleanClause.Occur.SHOULD);
        termArr[0] = null;
    }
}

From source file:com.stratio.cassandra.index.query.Condition.java

License:Apache License

protected String analyze(String field, String value, ColumnMapper<?> columnMapper) {
    TokenStream source = null;//from w  ww.ja  v  a  2  s .c  o  m
    try {
        Analyzer analyzer = columnMapper.analyzer();
        source = analyzer.tokenStream(field, value);
        source.reset();

        TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
        BytesRef bytes = termAtt.getBytesRef();

        if (!source.incrementToken()) {
            return null;
        }
        termAtt.fillBytesRef();
        if (source.incrementToken()) {
            throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + value);
        }
        source.end();
        return BytesRef.deepCopyOf(bytes).utf8ToString();
    } catch (IOException e) {
        throw new RuntimeException("Error analyzing multiTerm term: " + value, e);
    } finally {
        IOUtils.closeWhileHandlingException(source);
    }
}

From source file:com.tuplejump.stargate.lucene.query.Condition.java

License:Apache License

protected String analyze(String field, String value, Analyzer analyzer) {
    TokenStream source = null;//from   w ww. j  a  v  a2s  . c  om
    try {
        source = analyzer.tokenStream(field, value);
        source.reset();

        TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
        BytesRef bytes = termAtt.getBytesRef();

        if (!source.incrementToken()) {
            return null;
        }
        termAtt.fillBytesRef();
        if (source.incrementToken()) {
            throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + value);
        }
        source.end();
        return BytesRef.deepCopyOf(bytes).utf8ToString();
    } catch (IOException e) {
        throw new RuntimeException("Error analyzing multiTerm term: " + value, e);
    } finally {
        IOUtils.closeWhileHandlingException(source);
    }
}

From source file:jp.scaleout.elasticsearch.plugins.queryparser.classic.QueryParserBase.java

License:Apache License

protected BytesRef analyzeMultitermTerm(String field, String part, Analyzer analyzerIn) {
    if (analyzerIn == null)
        analyzerIn = getAnalyzer();//  w  ww  . ja va2  s .c  o m

    try (TokenStream source = analyzerIn.tokenStream(field, part)) {
        source.reset();

        TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
        BytesRef bytes = termAtt.getBytesRef();

        if (!source.incrementToken())
            throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part);
        //termAtt.fillBytesRef();
        if (source.incrementToken())
            throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part);
        source.end();
        return BytesRef.deepCopyOf(bytes);
    } catch (IOException e) {
        throw new RuntimeException("Error analyzing multiTerm term: " + part, e);
    }
}

From source file:lia.chapter4.AnalyzerUtils.java

License:Apache License

public static void displayTokens(TokenStream stream) throws IOException {

    TermToBytesRefAttribute term = stream.addAttribute(TermToBytesRefAttribute.class);
    while (stream.incrementToken()) {
        System.out.print("[" + term.getBytesRef().utf8ToString() + "] "); //B
    }// w w w .  ja  v a2 s .com
}

From source file:lia.chapter4.AnalyzerUtils.java

License:Apache License

public static String getTerm(AttributeSource source) {
    TermToBytesRefAttribute attr = source.addAttribute(TermToBytesRefAttribute.class);
    return attr.getBytesRef().utf8ToString();
}

From source file:lia.chapter4.AnalyzerUtils.java

License:Apache License

public static void displayTokensWithPositions(Analyzer analyzer, String text) throws IOException {

    TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
    TermToBytesRefAttribute term = stream.addAttribute(TermToBytesRefAttribute.class);
    PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);

    int position = 0;
    while (stream.incrementToken()) {
        int increment = posIncr.getPositionIncrement();
        if (increment > 0) {
            position = position + increment;
            System.out.println();
            System.out.print(position + ": ");
        }/*w  ww .  ja v a 2 s .  c  o m*/

        System.out.print("[" + term.getBytesRef().utf8ToString() + "] ");
    }
    System.out.println();
}

From source file:lia.chapter4.AnalyzerUtils.java

License:Apache License

public static void assertAnalyzesTo(Analyzer analyzer, String input, String[] output) throws Exception {
    TokenStream stream = analyzer.tokenStream("field", new StringReader(input));

    TermToBytesRefAttribute termAttr = stream.addAttribute(TermToBytesRefAttribute.class);
    for (String expected : output) {
        Assert.assertTrue(stream.incrementToken());
        Assert.assertEquals(expected, termAttr.getBytesRef().utf8ToString());
    }/*from  w  ww .ja  va 2s .  c o m*/
    Assert.assertFalse(stream.incrementToken());
    stream.close();
}