List of usage examples for org.apache.lucene.analysis Token setOffset
@Override public void setOffset(int startOffset, int endOffset)
From source file:com.zb.mmseg.analysis.TokenUtils.java
License:Open Source License
/** * @param input/* www . ja v a2 s.co m*/ * @param reusableToken is null well new one auto. * @return null - if not next token or input is null. * @throws IOException */ public static Token nextToken(TokenStream input, Token reusableToken) throws IOException { if (input == null) { return null; } if (!input.incrementToken()) { return null; } CharTermAttribute termAtt = input.getAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = input.getAttribute(OffsetAttribute.class); TypeAttribute typeAtt = input.getAttribute(TypeAttribute.class); if (reusableToken == null) { reusableToken = new Token(); } reusableToken.clear(); if (termAtt != null) { // lucene 3.0 // reusableToken.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength()); // lucene 3.1 reusableToken.copyBuffer(termAtt.buffer(), 0, termAtt.length()); } if (offsetAtt != null) { // lucene 3.1 // reusableToken.setStartOffset(offsetAtt.startOffset()); // reusableToken.setEndOffset(offsetAtt.endOffset()); // lucene 4.0 reusableToken.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); } if (typeAtt != null) { reusableToken.setType(typeAtt.type()); } return reusableToken; }
From source file:it.cnr.ilc.lc.clavius.search.ClaviusTokenGroup.java
void addToken(float score) { if (numTokens < MAX_NUM_TOKENS_PER_GROUP) { final int termStartOffset = offsetAtt.startOffset(); final int termEndOffset = offsetAtt.endOffset(); if (numTokens == 0) { startOffset = matchStartOffset = termStartOffset; endOffset = matchEndOffset = termEndOffset; tot += score;//from w w w . j av a2s.c o m } else { startOffset = Math.min(startOffset, termStartOffset); endOffset = Math.max(endOffset, termEndOffset); if (score > 0) { if (tot == 0) { matchStartOffset = termStartOffset; matchEndOffset = termEndOffset; } else { matchStartOffset = Math.min(matchStartOffset, termStartOffset); matchEndOffset = Math.max(matchEndOffset, termEndOffset); } tot += score; } } Token token = new Token(); token.setOffset(termStartOffset, termEndOffset); token.setEmpty().append(termAtt); tokens[numTokens] = token; scores[numTokens] = score; numTokens++; } }
From source file:jaligner.Sequence.java
License:Open Source License
/** * Constructor//from ww w .ja v a 2 s. c o m * * @param sequence */ public Sequence(String sequence, Analyzer analyzer, int max_length) throws IOException { super(); this.sequence = sequence; TokenStream stream = analyzer.tokenStream("contents", new StringReader(sequence)); Token.TokenAttributeFactory tokenAttributeFactory = new Token.TokenAttributeFactory( stream.getAttributeFactory()); Vector<Token> tokenVector = new Vector<Token>(); while (stream.incrementToken() && tokenVector.size() < max_length) { // Token token = new Token(); // Token token = (Token) stream.getAttribute(CharTermAttribute.class); Token token = (Token) tokenAttributeFactory.createAttributeInstance(Token.class); CharTermAttribute charTerm = stream.getAttribute(CharTermAttribute.class); OffsetAttribute offset = stream.getAttribute(OffsetAttribute.class); // PayloadAttribute payload = stream.getAttribute(PayloadAttribute.class); // FlagsAttribute flags = stream.getAttribute(FlagsAttribute.class); // public Token reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, String newType) { token.reinit(charTerm.buffer(), 0, charTerm.length(), offset.startOffset(), offset.endOffset()); token.setOffset(offset.startOffset(), offset.endOffset()); // token.setPayload(payload.getPayload()); // token.setFlags(flags.getFlags()); if (stream.hasAttribute(PositionIncrementAttribute.class)) { PositionIncrementAttribute positionIncrement = stream .getAttribute(PositionIncrementAttribute.class); token.setPositionIncrement(positionIncrement.getPositionIncrement()); } if (stream.hasAttribute(TypeAttribute.class)) { TypeAttribute type = stream.getAttribute(TypeAttribute.class); token.setType(type.type()); } tokenVector.add(token); } stream.end(); stream.close(); this.tokens = tokenVector.toArray(new Token[tokenVector.size()]); }
From source file:org.apache.solr.analysis.BufferedTokenStream.java
License:Apache License
/** old api emulation for back compat */ private Token readToken() throws IOException { if (!input.incrementToken()) { return null; } else {//w ww .j a v a 2s . c o m Token token = new Token(); token.copyBuffer(termAtt.buffer(), 0, termAtt.length()); token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); token.setType(typeAtt.type()); token.setFlags(flagsAtt.getFlags()); token.setPositionIncrement(posIncAtt.getPositionIncrement()); token.setPayload(payloadAtt.getPayload()); return token; } }
From source file:org.apache.solr.handler.component.SpellCheckComponent.java
License:Apache License
private Collection<Token> getTokens(String q, Analyzer analyzer) throws IOException { Collection<Token> result = new ArrayList<Token>(); assert analyzer != null; TokenStream ts = analyzer.tokenStream("", q); try {//from ww w. j av a2 s . c o m ts.reset(); // TODO: support custom attributes CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class); FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class); PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class); PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class); while (ts.incrementToken()) { Token token = new Token(); token.copyBuffer(termAtt.buffer(), 0, termAtt.length()); token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); token.setType(typeAtt.type()); token.setFlags(flagsAtt.getFlags()); token.setPayload(payloadAtt.getPayload()); token.setPositionIncrement(posIncAtt.getPositionIncrement()); result.add(token); } ts.end(); return result; } finally { IOUtils.closeWhileHandlingException(ts); } }
From source file:org.apache.solr.spelling.SimpleQueryConverter.java
License:Apache License
@Override public Collection<Token> convert(String origQuery) { Collection<Token> result = new HashSet<Token>(); WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_40); TokenStream ts = null;// w w w. ja v a2 s.c om try { ts = analyzer.tokenStream("", origQuery); // TODO: support custom attributes CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class); FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class); PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class); PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class); ts.reset(); while (ts.incrementToken()) { Token tok = new Token(); tok.copyBuffer(termAtt.buffer(), 0, termAtt.length()); tok.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); tok.setFlags(flagsAtt.getFlags()); tok.setPayload(payloadAtt.getPayload()); tok.setPositionIncrement(posIncAtt.getPositionIncrement()); tok.setType(typeAtt.type()); result.add(tok); } ts.end(); return result; } catch (IOException e) { throw new RuntimeException(e); } finally { IOUtils.closeWhileHandlingException(ts); } }
From source file:org.apache.solr.spelling.SpellingQueryConverter.java
License:Apache License
protected void analyze(Collection<Token> result, String text, int offset, int flagsAttValue) throws IOException { TokenStream stream = analyzer.tokenStream("", text); // TODO: support custom attributes CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class); TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class); PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class); PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class); OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class); stream.reset();// w w w . j ava 2 s. c o m while (stream.incrementToken()) { Token token = new Token(); token.copyBuffer(termAtt.buffer(), 0, termAtt.length()); token.setOffset(offset + offsetAtt.startOffset(), offset + offsetAtt.endOffset()); token.setFlags(flagsAttValue); //overwriting any flags already set... token.setType(typeAtt.type()); token.setPayload(payloadAtt.getPayload()); token.setPositionIncrement(posIncAtt.getPositionIncrement()); result.add(token); } stream.end(); stream.close(); }