List of usage examples for org.apache.lucene.analysis Token setPayload
@Override public void setPayload(BytesRef payload)
From source file:au.edu.unimelb.csse.analyser.FastStringParser.java
License:Apache License
public Token next(Token token) { if (currentPos == 0) return null; if (tokenPos <= currentPos) { token.setTermBuffer(sentence, textPositions[2 * tokenPos], textPositions[2 * tokenPos + 1] - textPositions[2 * tokenPos]); Payload p = new Payload(); byte[] b = new byte[4]; b[0] = (byte) ((payloads[tokenPos] >>> 16) & 255); b[1] = (byte) ((payloads[tokenPos] >>> 24) & 255); b[2] = (byte) ((payloads[tokenPos] >>> 8) & 255); b[3] = (byte) (payloads[tokenPos] & 255); p.setData(b);/* w w w .jav a 2 s . co m*/ token.setPayload(p); tokenPos++; return token; } return null; }
From source file:au.edu.unimelb.csse.analyser.JsonSentenceParser.java
License:Apache License
public Token next(Token token) { boolean nameFound = nameMatcher.find(); boolean indexFound = indexMatcher.find(); if (nameFound && indexFound) { final int nstart = nameMatcher.start(); final int nend = nameMatcher.end(); final int indexOfEscapedQuotes = jsonSentence.indexOf("\\\"", nstart + BEFORE_CONST); if (indexOfEscapedQuotes != -1 && indexOfEscapedQuotes < nend - AFTER_CONST) { String str = jsonSentence.substring(nstart + BEFORE_CONST, nend - AFTER_CONST); str = str.replace("\\\"", "\""); token.setTermBuffer(str);// w ww .j a v a 2 s . c o m } else { token.setTermBuffer(jsonSentence, nstart + BEFORE_CONST, nameMatcher.end() - AFTER_CONST - nstart - BEFORE_CONST); } String index = jsonSentence.substring(indexMatcher.start() + BEFORE_CONST, indexMatcher.end() - AFTER_CONST); String[] split = index.split("_"); for (int i = 0; i < 4; i++) { intbuffer[i] = Integer.parseInt(split[i]); if (intbuffer[i] > 255) { throw new OverflowException("Exceeded payload size for element " + i + " = " + intbuffer[i]); } buffer[i] = (byte) (intbuffer[i] & 255); } if (compressPayload) { byte[] bytes = new byte[8]; try { token.setPayload(getVarDiffPayload(bytes)); } catch (ArrayIndexOutOfBoundsException e) { throw new OverflowException("Exceeded payload size for element "); } } else { token.setPayload(new Payload(buffer.clone())); } // if (compressPayload) { // byte[] bytes = new byte[8]; // try { // token.setPayload(getVarDiffPayload(bytes)); // }catch (ArrayIndexOutOfBoundsException e) { // bytes = new byte[16]; // try { // token.setPayload(getVarDiffPayload(bytes)); // } catch(ArrayIndexOutOfBoundsException ee) { // throw new // OverflowException("Exceeded payload size for element "); // } // } // // } else { // for (int i = 0; i < 4; i++) { // if(intbuffer[i] > 255) { // throw new OverflowException("Exceeded payload size for element " // + i + " = " + intbuffer[i]); // } // buffer[i] = (byte) (intbuffer[i] & 255); // } // token.setPayload(new Payload(buffer.clone())); // } return token; } return null; }
From source file:au.edu.unimelb.csse.analyser.String2NodesParser.java
License:Apache License
public Token next(final Token reusableToken) throws IOException { assert reusableToken != null; nodesPosition++;/*w ww. ja va2s . com*/ if (nodesPosition < nodes.size()) { reusableToken.clear(); Node node = nodes.get(nodesPosition); reusableToken.setTermBuffer(node.name); reusableToken.setPayload(node.getPayload()); return reusableToken; } return null; }
From source file:magoffin.matt.lucene.KeyFilter.java
License:Open Source License
@Override public Token next() throws IOException { final Token t = new Token(); Token token = next(t); if (token == null) { return null; }//ww w . j a v a 2s . c om Payload p = token.getPayload(); if (p != null) { token.setPayload((Payload) p.clone()); } String key = token.term(); if (key.length() > maxLength) { key = key.substring(0, maxLength); } char[] keyChars = key.toCharArray(); return new Token(keyChars, 0, keyChars.length, token.startOffset(), token.startOffset() + keyChars.length); }
From source file:magoffin.matt.lucene.LuceneSearchService.java
License:Open Source License
/** * Add a series of non-required TermQuery objects to a BooleanQuery, * from tokenizing a string with the Analyzer used by the index type. * @param rootQuery the root boolean query * @param query the query to tokenize/*from w w w . j a v a2 s . c om*/ * @param field the field this query is searching * @param type the index type */ @Override public void addTokenizedTermQuery(BooleanQuery rootQuery, String query, String field, String type) { StringReader reader = new StringReader(query); IndexData data = getIndexData(type); TokenStream stream = data.plugin.getAnalyzer().tokenStream(field, reader); try { Token t = new Token(); while (true) { Token token = stream.next(t); if (token == null) { break; } Payload p = token.getPayload(); if (p != null) { token.setPayload((Payload) p.clone()); } Query q = new TermQuery(new Term(field, token.term())); rootQuery.add(q, Occur.SHOULD); } } catch (IOException e) { throw new RuntimeException("Unable to tokenize query string", e); } }
From source file:magoffin.matt.lucene.LuceneSearchService.java
License:Open Source License
/** * Add a series of non-required FuzzyQuery objects to a BooleanQuery, * from tokenizing a string with the Analyzer used by the index type. * @param rootQuery the root boolean query * @param query the query to tokenize//from w w w. j a va 2 s . co m * @param field the field this query is searching * @param type the index type */ @Override public void addTokenizedFuzzyQuery(BooleanQuery rootQuery, String query, String field, String type) { StringReader reader = new StringReader(query); IndexData data = getIndexData(type); TokenStream stream = data.plugin.getAnalyzer().tokenStream(field, reader); try { Token t = new Token(); while (true) { Token token = stream.next(t); if (token == null) { break; } Payload p = token.getPayload(); if (p != null) { token.setPayload((Payload) p.clone()); } Query q = new FuzzyQuery(new Term(field, token.term())); rootQuery.add(q, Occur.SHOULD); } } catch (IOException e) { throw new RuntimeException("Unable to tokenize query string", e); } }
From source file:org.apache.jackrabbit.core.query.lucene.SingletonTokenStream.java
License:Apache License
/** * {@inheritDoc}// www .j ava2s . co m */ public Token next(Token reusableToken) throws IOException { if (value == null) { return null; } reusableToken.clear(); reusableToken.setTermBuffer(value); reusableToken.setPayload(payload); reusableToken.setStartOffset(0); reusableToken.setEndOffset(value.length()); value = null; return reusableToken; }
From source file:org.apache.solr.analysis.BufferedTokenStream.java
License:Apache License
/** old api emulation for back compat */ private Token readToken() throws IOException { if (!input.incrementToken()) { return null; } else {/* w w w. ja v a 2 s .c om*/ Token token = new Token(); token.copyBuffer(termAtt.buffer(), 0, termAtt.length()); token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); token.setType(typeAtt.type()); token.setFlags(flagsAtt.getFlags()); token.setPositionIncrement(posIncAtt.getPositionIncrement()); token.setPayload(payloadAtt.getPayload()); return token; } }
From source file:org.apache.solr.handler.component.SpellCheckComponent.java
License:Apache License
private Collection<Token> getTokens(String q, Analyzer analyzer) throws IOException { Collection<Token> result = new ArrayList<Token>(); assert analyzer != null; TokenStream ts = analyzer.tokenStream("", q); try {/* www . j a v a2s .co m*/ ts.reset(); // TODO: support custom attributes CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class); FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class); PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class); PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class); while (ts.incrementToken()) { Token token = new Token(); token.copyBuffer(termAtt.buffer(), 0, termAtt.length()); token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); token.setType(typeAtt.type()); token.setFlags(flagsAtt.getFlags()); token.setPayload(payloadAtt.getPayload()); token.setPositionIncrement(posIncAtt.getPositionIncrement()); result.add(token); } ts.end(); return result; } finally { IOUtils.closeWhileHandlingException(ts); } }
From source file:org.apache.solr.spelling.SimpleQueryConverter.java
License:Apache License
@Override public Collection<Token> convert(String origQuery) { Collection<Token> result = new HashSet<Token>(); WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_40); TokenStream ts = null;/* ww w.ja v a2 s . c o m*/ try { ts = analyzer.tokenStream("", origQuery); // TODO: support custom attributes CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class); FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class); PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class); PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class); ts.reset(); while (ts.incrementToken()) { Token tok = new Token(); tok.copyBuffer(termAtt.buffer(), 0, termAtt.length()); tok.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); tok.setFlags(flagsAtt.getFlags()); tok.setPayload(payloadAtt.getPayload()); tok.setPositionIncrement(posIncAtt.getPositionIncrement()); tok.setType(typeAtt.type()); result.add(tok); } ts.end(); return result; } catch (IOException e) { throw new RuntimeException(e); } finally { IOUtils.closeWhileHandlingException(ts); } }