Example usage for org.apache.lucene.analysis Token setPayload

List of usage examples for org.apache.lucene.analysis Token setPayload

Introduction

In this page you can find the example usage for org.apache.lucene.analysis Token setPayload.

Prototype

@Override
public void setPayload(BytesRef payload) 

Source Link

Usage

From source file:au.edu.unimelb.csse.analyser.FastStringParser.java

License:Apache License

public Token next(Token token) {
    if (currentPos == 0)
        return null;
    if (tokenPos <= currentPos) {
        token.setTermBuffer(sentence, textPositions[2 * tokenPos],
                textPositions[2 * tokenPos + 1] - textPositions[2 * tokenPos]);
        Payload p = new Payload();
        byte[] b = new byte[4];
        b[0] = (byte) ((payloads[tokenPos] >>> 16) & 255);
        b[1] = (byte) ((payloads[tokenPos] >>> 24) & 255);
        b[2] = (byte) ((payloads[tokenPos] >>> 8) & 255);
        b[3] = (byte) (payloads[tokenPos] & 255);
        p.setData(b);/*  w  w  w  .jav a 2  s .  co  m*/
        token.setPayload(p);
        tokenPos++;
        return token;
    }
    return null;
}

From source file:au.edu.unimelb.csse.analyser.JsonSentenceParser.java

License:Apache License

public Token next(Token token) {
    boolean nameFound = nameMatcher.find();
    boolean indexFound = indexMatcher.find();
    if (nameFound && indexFound) {
        final int nstart = nameMatcher.start();
        final int nend = nameMatcher.end();
        final int indexOfEscapedQuotes = jsonSentence.indexOf("\\\"", nstart + BEFORE_CONST);
        if (indexOfEscapedQuotes != -1 && indexOfEscapedQuotes < nend - AFTER_CONST) {
            String str = jsonSentence.substring(nstart + BEFORE_CONST, nend - AFTER_CONST);
            str = str.replace("\\\"", "\"");
            token.setTermBuffer(str);//  w  ww  .j  a  v a  2 s  .  c  o  m
        } else {
            token.setTermBuffer(jsonSentence, nstart + BEFORE_CONST,
                    nameMatcher.end() - AFTER_CONST - nstart - BEFORE_CONST);
        }
        String index = jsonSentence.substring(indexMatcher.start() + BEFORE_CONST,
                indexMatcher.end() - AFTER_CONST);
        String[] split = index.split("_");
        for (int i = 0; i < 4; i++) {
            intbuffer[i] = Integer.parseInt(split[i]);
            if (intbuffer[i] > 255) {
                throw new OverflowException("Exceeded payload size for element " + i + " = " + intbuffer[i]);
            }
            buffer[i] = (byte) (intbuffer[i] & 255);
        }
        if (compressPayload) {
            byte[] bytes = new byte[8];
            try {
                token.setPayload(getVarDiffPayload(bytes));
            } catch (ArrayIndexOutOfBoundsException e) {
                throw new OverflowException("Exceeded payload size for element ");

            }
        } else {
            token.setPayload(new Payload(buffer.clone()));
        }

        // if (compressPayload) {
        // byte[] bytes = new byte[8];
        // try {
        // token.setPayload(getVarDiffPayload(bytes));
        // }catch (ArrayIndexOutOfBoundsException e) {
        // bytes = new byte[16];
        // try {
        // token.setPayload(getVarDiffPayload(bytes));
        // } catch(ArrayIndexOutOfBoundsException ee) {
        // throw new
        // OverflowException("Exceeded payload size for element ");
        // }
        // }
        //            
        // } else {
        // for (int i = 0; i < 4; i++) {
        // if(intbuffer[i] > 255) {
        // throw new OverflowException("Exceeded payload size for element "
        // + i + " = " + intbuffer[i]);
        // }
        // buffer[i] = (byte) (intbuffer[i] & 255);
        // }
        // token.setPayload(new Payload(buffer.clone()));
        // }
        return token;
    }
    return null;
}

From source file:au.edu.unimelb.csse.analyser.String2NodesParser.java

License:Apache License

public Token next(final Token reusableToken) throws IOException {
    assert reusableToken != null;
    nodesPosition++;/*w  ww. ja  va2s  .  com*/
    if (nodesPosition < nodes.size()) {
        reusableToken.clear();
        Node node = nodes.get(nodesPosition);
        reusableToken.setTermBuffer(node.name);
        reusableToken.setPayload(node.getPayload());
        return reusableToken;
    }
    return null;
}

From source file:magoffin.matt.lucene.KeyFilter.java

License:Open Source License

@Override
public Token next() throws IOException {
    final Token t = new Token();
    Token token = next(t);
    if (token == null) {
        return null;
    }//ww w  . j  a v a  2s .  c  om
    Payload p = token.getPayload();
    if (p != null) {
        token.setPayload((Payload) p.clone());
    }
    String key = token.term();
    if (key.length() > maxLength) {
        key = key.substring(0, maxLength);
    }
    char[] keyChars = key.toCharArray();
    return new Token(keyChars, 0, keyChars.length, token.startOffset(), token.startOffset() + keyChars.length);
}

From source file:magoffin.matt.lucene.LuceneSearchService.java

License:Open Source License

/**
 * Add a series of non-required TermQuery objects to a BooleanQuery, 
 * from tokenizing a string with the Analyzer used by the index type.
 * @param rootQuery the root boolean query
 * @param query the query to tokenize/*from  w  w  w .  j  a  v a2  s  .  c om*/
 * @param field the field this query is searching
 * @param type the index type
 */
@Override
public void addTokenizedTermQuery(BooleanQuery rootQuery, String query, String field, String type) {
    StringReader reader = new StringReader(query);
    IndexData data = getIndexData(type);
    TokenStream stream = data.plugin.getAnalyzer().tokenStream(field, reader);
    try {
        Token t = new Token();
        while (true) {
            Token token = stream.next(t);
            if (token == null) {
                break;
            }
            Payload p = token.getPayload();
            if (p != null) {
                token.setPayload((Payload) p.clone());
            }
            Query q = new TermQuery(new Term(field, token.term()));
            rootQuery.add(q, Occur.SHOULD);
        }
    } catch (IOException e) {
        throw new RuntimeException("Unable to tokenize query string", e);
    }
}

From source file:magoffin.matt.lucene.LuceneSearchService.java

License:Open Source License

/**
 * Add a series of non-required FuzzyQuery objects to a BooleanQuery, 
 * from tokenizing a string with the Analyzer used by the index type.
 * @param rootQuery the root boolean query
 * @param query the query to tokenize//from  w w  w.  j  a  va  2  s .  co m
 * @param field the field this query is searching
 * @param type the index type
 */
@Override
public void addTokenizedFuzzyQuery(BooleanQuery rootQuery, String query, String field, String type) {
    StringReader reader = new StringReader(query);
    IndexData data = getIndexData(type);
    TokenStream stream = data.plugin.getAnalyzer().tokenStream(field, reader);
    try {
        Token t = new Token();
        while (true) {
            Token token = stream.next(t);
            if (token == null) {
                break;
            }
            Payload p = token.getPayload();
            if (p != null) {
                token.setPayload((Payload) p.clone());
            }
            Query q = new FuzzyQuery(new Term(field, token.term()));
            rootQuery.add(q, Occur.SHOULD);
        }
    } catch (IOException e) {
        throw new RuntimeException("Unable to tokenize query string", e);
    }
}

From source file:org.apache.jackrabbit.core.query.lucene.SingletonTokenStream.java

License:Apache License

/**
 * {@inheritDoc}//  www .j  ava2s  .  co m
 */
public Token next(Token reusableToken) throws IOException {
    if (value == null) {
        return null;
    }
    reusableToken.clear();
    reusableToken.setTermBuffer(value);
    reusableToken.setPayload(payload);
    reusableToken.setStartOffset(0);
    reusableToken.setEndOffset(value.length());
    value = null;
    return reusableToken;
}

From source file:org.apache.solr.analysis.BufferedTokenStream.java

License:Apache License

/** old api emulation for back compat */
private Token readToken() throws IOException {
    if (!input.incrementToken()) {
        return null;
    } else {/* w  w  w.  ja v  a 2 s .c  om*/
        Token token = new Token();
        token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
        token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
        token.setType(typeAtt.type());
        token.setFlags(flagsAtt.getFlags());
        token.setPositionIncrement(posIncAtt.getPositionIncrement());
        token.setPayload(payloadAtt.getPayload());
        return token;
    }
}

From source file:org.apache.solr.handler.component.SpellCheckComponent.java

License:Apache License

private Collection<Token> getTokens(String q, Analyzer analyzer) throws IOException {
    Collection<Token> result = new ArrayList<Token>();
    assert analyzer != null;
    TokenStream ts = analyzer.tokenStream("", q);
    try {/* www .  j  a v a2s .co  m*/
        ts.reset();
        // TODO: support custom attributes
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
        TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
        FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
        PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
        PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);

        while (ts.incrementToken()) {
            Token token = new Token();
            token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
            token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
            token.setType(typeAtt.type());
            token.setFlags(flagsAtt.getFlags());
            token.setPayload(payloadAtt.getPayload());
            token.setPositionIncrement(posIncAtt.getPositionIncrement());
            result.add(token);
        }
        ts.end();
        return result;
    } finally {
        IOUtils.closeWhileHandlingException(ts);
    }
}

From source file:org.apache.solr.spelling.SimpleQueryConverter.java

License:Apache License

@Override
public Collection<Token> convert(String origQuery) {
    Collection<Token> result = new HashSet<Token>();
    WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_40);

    TokenStream ts = null;/* ww  w.ja v a2 s  .  c o m*/
    try {
        ts = analyzer.tokenStream("", origQuery);
        // TODO: support custom attributes
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
        TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
        FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
        PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
        PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);

        ts.reset();

        while (ts.incrementToken()) {
            Token tok = new Token();
            tok.copyBuffer(termAtt.buffer(), 0, termAtt.length());
            tok.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
            tok.setFlags(flagsAtt.getFlags());
            tok.setPayload(payloadAtt.getPayload());
            tok.setPositionIncrement(posIncAtt.getPositionIncrement());
            tok.setType(typeAtt.type());
            result.add(tok);
        }
        ts.end();
        return result;
    } catch (IOException e) {
        throw new RuntimeException(e);
    } finally {
        IOUtils.closeWhileHandlingException(ts);
    }
}