List of usage examples for org.apache.lucene.analysis Token copyBuffer
@Override
public final void copyBuffer(char[] buffer, int offset, int length)
From source file:com.zb.mmseg.analysis.TokenUtils.java
License:Open Source License
/** * @param input//from w w w . j a v a 2 s . c o m * @param reusableToken is null well new one auto. * @return null - if not next token or input is null. * @throws IOException */ public static Token nextToken(TokenStream input, Token reusableToken) throws IOException { if (input == null) { return null; } if (!input.incrementToken()) { return null; } CharTermAttribute termAtt = input.getAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = input.getAttribute(OffsetAttribute.class); TypeAttribute typeAtt = input.getAttribute(TypeAttribute.class); if (reusableToken == null) { reusableToken = new Token(); } reusableToken.clear(); if (termAtt != null) { // lucene 3.0 // reusableToken.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength()); // lucene 3.1 reusableToken.copyBuffer(termAtt.buffer(), 0, termAtt.length()); } if (offsetAtt != null) { // lucene 3.1 // reusableToken.setStartOffset(offsetAtt.startOffset()); // reusableToken.setEndOffset(offsetAtt.endOffset()); // lucene 4.0 reusableToken.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); } if (typeAtt != null) { reusableToken.setType(typeAtt.type()); } return reusableToken; }
From source file:eu.socialsensor.framework.client.lucene.TweetTokenizerImpl.java
License:Apache License
/** * Fills Lucene token with the current token text. *//*ww w . ja v a 2 s . c om*/ final void getText(Token t) { t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead); }
From source file:org.apache.solr.analysis.BufferedTokenStream.java
License:Apache License
/** old api emulation for back compat */ private Token readToken() throws IOException { if (!input.incrementToken()) { return null; } else {//from ww w .j a v a 2s.c om Token token = new Token(); token.copyBuffer(termAtt.buffer(), 0, termAtt.length()); token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); token.setType(typeAtt.type()); token.setFlags(flagsAtt.getFlags()); token.setPositionIncrement(posIncAtt.getPositionIncrement()); token.setPayload(payloadAtt.getPayload()); return token; } }
From source file:org.apache.solr.analysis.SlowSynonymMap.java
License:Apache License
/** * Merge two lists of tokens, producing a single list with manipulated positionIncrements so that * the tokens end up at the same position. * * Example: [a b] merged with [c d] produces [a/b c/d] ('/' denotes tokens in the same position) * Example: [a,5 b,2] merged with [c d,4 e,4] produces [c a,5/d b,2 e,2] (a,n means a has posInc=n) * *///w ww .ja va 2s .c o m public static List<Token> mergeTokens(List<Token> lst1, List<Token> lst2) { ArrayList<Token> result = new ArrayList<Token>(); if (lst1 == null || lst2 == null) { if (lst2 != null) result.addAll(lst2); if (lst1 != null) result.addAll(lst1); return result; } int pos = 0; Iterator<Token> iter1 = lst1.iterator(); Iterator<Token> iter2 = lst2.iterator(); Token tok1 = iter1.hasNext() ? iter1.next() : null; Token tok2 = iter2.hasNext() ? iter2.next() : null; int pos1 = tok1 != null ? tok1.getPositionIncrement() : 0; int pos2 = tok2 != null ? tok2.getPositionIncrement() : 0; while (tok1 != null || tok2 != null) { while (tok1 != null && (pos1 <= pos2 || tok2 == null)) { Token tok = new Token(tok1.startOffset(), tok1.endOffset(), tok1.type()); tok.copyBuffer(tok1.buffer(), 0, tok1.length()); tok.setPositionIncrement(pos1 - pos); result.add(tok); pos = pos1; tok1 = iter1.hasNext() ? iter1.next() : null; pos1 += tok1 != null ? tok1.getPositionIncrement() : 0; } while (tok2 != null && (pos2 <= pos1 || tok1 == null)) { Token tok = new Token(tok2.startOffset(), tok2.endOffset(), tok2.type()); tok.copyBuffer(tok2.buffer(), 0, tok2.length()); tok.setPositionIncrement(pos2 - pos); result.add(tok); pos = pos2; tok2 = iter2.hasNext() ? iter2.next() : null; pos2 += tok2 != null ? tok2.getPositionIncrement() : 0; } } return result; }
From source file:org.apache.solr.handler.component.SpellCheckComponent.java
License:Apache License
private Collection<Token> getTokens(String q, Analyzer analyzer) throws IOException { Collection<Token> result = new ArrayList<Token>(); assert analyzer != null; TokenStream ts = analyzer.tokenStream("", q); try {// w w w . j a va2 s . co m ts.reset(); // TODO: support custom attributes CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class); FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class); PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class); PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class); while (ts.incrementToken()) { Token token = new Token(); token.copyBuffer(termAtt.buffer(), 0, termAtt.length()); token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); token.setType(typeAtt.type()); token.setFlags(flagsAtt.getFlags()); token.setPayload(payloadAtt.getPayload()); token.setPositionIncrement(posIncAtt.getPositionIncrement()); result.add(token); } ts.end(); return result; } finally { IOUtils.closeWhileHandlingException(ts); } }
From source file:org.apache.solr.spelling.SimpleQueryConverter.java
License:Apache License
@Override public Collection<Token> convert(String origQuery) { Collection<Token> result = new HashSet<Token>(); WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_40); TokenStream ts = null;/* www . j a va 2 s . co m*/ try { ts = analyzer.tokenStream("", origQuery); // TODO: support custom attributes CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class); FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class); PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class); PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class); ts.reset(); while (ts.incrementToken()) { Token tok = new Token(); tok.copyBuffer(termAtt.buffer(), 0, termAtt.length()); tok.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); tok.setFlags(flagsAtt.getFlags()); tok.setPayload(payloadAtt.getPayload()); tok.setPositionIncrement(posIncAtt.getPositionIncrement()); tok.setType(typeAtt.type()); result.add(tok); } ts.end(); return result; } catch (IOException e) { throw new RuntimeException(e); } finally { IOUtils.closeWhileHandlingException(ts); } }
From source file:org.apache.solr.spelling.SpellingQueryConverter.java
License:Apache License
protected void analyze(Collection<Token> result, String text, int offset, int flagsAttValue) throws IOException { TokenStream stream = analyzer.tokenStream("", text); // TODO: support custom attributes CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class); TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class); PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class); PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class); OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class); stream.reset();//from w ww . j av a 2s . c o m while (stream.incrementToken()) { Token token = new Token(); token.copyBuffer(termAtt.buffer(), 0, termAtt.length()); token.setOffset(offset + offsetAtt.startOffset(), offset + offsetAtt.endOffset()); token.setFlags(flagsAttValue); //overwriting any flags already set... token.setType(typeAtt.type()); token.setPayload(payloadAtt.getPayload()); token.setPositionIncrement(posIncAtt.getPositionIncrement()); result.add(token); } stream.end(); stream.close(); }
From source file:org.hibernate.search.util.AnalyzerUtils.java
License:Open Source License
public static Token[] tokensFromAnalysis(Analyzer analyzer, String field, String text) throws IOException { TokenStream stream = analyzer.tokenStream(field, new StringReader(text)); CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); List<Token> tokenList = new ArrayList<Token>(); while (stream.incrementToken()) { Token token = new Token(); token.copyBuffer(term.buffer(), 0, term.length()); tokenList.add(token);// w w w . j a v a2 s . c o m } return tokenList.toArray(new Token[tokenList.size()]); }