List of usage examples for org.apache.lucene.analysis.tokenattributes CharTermAttribute copyBuffer
public void copyBuffer(char[] buffer, int offset, int length);
From source file:com.bizosys.unstructured.HSearchTokenizerImpl.java
License:Apache License
/** * Fills CharTermAttribute with the current token text. *//*from w ww .j av a 2 s . c o m*/ public final void getText(CharTermAttribute t) { t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead); }
From source file:com.mathworks.xzheng.analysis.AnalyzerUtils.java
License:Apache License
public static void setTerm(AttributeSource source, String term) { CharTermAttribute attr = source.addAttribute(CharTermAttribute.class); attr.copyBuffer(term.toCharArray(), 0, term.length()); }
From source file:com.zimbra.cs.index.analysis.UniversalLexer.java
License:Open Source License
void getTerm(CharTermAttribute t) { t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead); }
From source file:com.zimbra.cs.index.analysis.UniversalLexer.java
License:Open Source License
void getTerm(CharTermAttribute t, int offset, int len) { t.copyBuffer(zzBuffer, zzStartRead + offset, len); }
From source file:eu.socialsensor.framework.client.lucene.TweetTokenizerImpl.java
License:Apache License
/** * Fills TermAttribute with the current token text. *//*from w ww .j a va 2s . c om*/ final void getText(CharTermAttribute t) { t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead); }
From source file:nl.inl.blacklab.filter.AbstractSynonymFilter.java
License:Apache License
static void setTerm(AttributeSource source, String term) { CharTermAttribute attr = source.addAttribute(CharTermAttribute.class); attr.copyBuffer(term.toCharArray(), 0, term.length()); }
From source file:org.apache.solr.analysis.SlowSynonymFilter.java
License:Apache License
@Override public boolean incrementToken() throws IOException { while (true) { // if there are any generated tokens, return them... don't try any // matches against them, as we specifically don't want recursion. if (replacement != null && replacement.hasNext()) { copy(this, replacement.next()); return true; }//from w ww .j a v a2s .c o m // common case fast-path of first token not matching anything AttributeSource firstTok = nextTok(); if (firstTok == null) return false; CharTermAttribute termAtt = firstTok.addAttribute(CharTermAttribute.class); SlowSynonymMap result = map.submap != null ? map.submap.get(termAtt.buffer(), 0, termAtt.length()) : null; if (result == null) { copy(this, firstTok); return true; } // fast-path failed, clone ourselves if needed if (firstTok == this) firstTok = cloneAttributes(); // OK, we matched a token, so find the longest match. matched = new LinkedList<AttributeSource>(); result = match(result); if (result == null) { // no match, simply return the first token read. copy(this, firstTok); return true; } // reuse, or create new one each time? ArrayList<AttributeSource> generated = new ArrayList<AttributeSource>( result.synonyms.length + matched.size() + 1); // // there was a match... let's generate the new tokens, merging // in the matched tokens (position increments need adjusting) // AttributeSource lastTok = matched.isEmpty() ? firstTok : matched.getLast(); boolean includeOrig = result.includeOrig(); AttributeSource origTok = includeOrig ? firstTok : null; PositionIncrementAttribute firstPosIncAtt = firstTok.addAttribute(PositionIncrementAttribute.class); int origPos = firstPosIncAtt.getPositionIncrement(); // position of origTok in the original stream int repPos = 0; // curr position in replacement token stream int pos = 0; // current position in merged token stream for (int i = 0; i < result.synonyms.length; i++) { Token repTok = result.synonyms[i]; AttributeSource newTok = firstTok.cloneAttributes(); CharTermAttribute newTermAtt = newTok.addAttribute(CharTermAttribute.class); OffsetAttribute newOffsetAtt = newTok.addAttribute(OffsetAttribute.class); PositionIncrementAttribute newPosIncAtt = newTok.addAttribute(PositionIncrementAttribute.class); OffsetAttribute lastOffsetAtt = lastTok.addAttribute(OffsetAttribute.class); newOffsetAtt.setOffset(newOffsetAtt.startOffset(), lastOffsetAtt.endOffset()); newTermAtt.copyBuffer(repTok.buffer(), 0, repTok.length()); repPos += repTok.getPositionIncrement(); if (i == 0) repPos = origPos; // make position of first token equal to original // if necessary, insert original tokens and adjust position increment while (origTok != null && origPos <= repPos) { PositionIncrementAttribute origPosInc = origTok.addAttribute(PositionIncrementAttribute.class); origPosInc.setPositionIncrement(origPos - pos); generated.add(origTok); pos += origPosInc.getPositionIncrement(); origTok = matched.isEmpty() ? null : matched.removeFirst(); if (origTok != null) { origPosInc = origTok.addAttribute(PositionIncrementAttribute.class); origPos += origPosInc.getPositionIncrement(); } } newPosIncAtt.setPositionIncrement(repPos - pos); generated.add(newTok); pos += newPosIncAtt.getPositionIncrement(); } // finish up any leftover original tokens while (origTok != null) { PositionIncrementAttribute origPosInc = origTok.addAttribute(PositionIncrementAttribute.class); origPosInc.setPositionIncrement(origPos - pos); generated.add(origTok); pos += origPosInc.getPositionIncrement(); origTok = matched.isEmpty() ? null : matched.removeFirst(); if (origTok != null) { origPosInc = origTok.addAttribute(PositionIncrementAttribute.class); origPos += origPosInc.getPositionIncrement(); } } // what if we replaced a longer sequence with a shorter one? // a/0 b/5 => foo/0 // should I re-create the gap on the next buffered token? replacement = generated.iterator(); // Now return to the top of the loop to read and return the first // generated token.. The reason this is done is that we may have generated // nothing at all, and may need to continue with more matching logic. } }
From source file:org.apache.solr.schema.SimplePreAnalyzedParser.java
License:Apache License
private static AttributeSource.State createState(AttributeSource a, Tok state, int tokenEnd) { a.clearAttributes();/*from w w w .java2 s . c o m*/ CharTermAttribute termAtt = a.addAttribute(CharTermAttribute.class); char[] tokChars = state.token.toString().toCharArray(); termAtt.copyBuffer(tokChars, 0, tokChars.length); int tokenStart = tokenEnd - state.token.length(); for (Entry<String, String> e : state.attr.entrySet()) { String k = e.getKey(); if (k.equals("i")) { // position increment int incr = Integer.parseInt(e.getValue()); PositionIncrementAttribute posIncr = a.addAttribute(PositionIncrementAttribute.class); posIncr.setPositionIncrement(incr); } else if (k.equals("s")) { tokenStart = Integer.parseInt(e.getValue()); } else if (k.equals("e")) { tokenEnd = Integer.parseInt(e.getValue()); } else if (k.equals("y")) { TypeAttribute type = a.addAttribute(TypeAttribute.class); type.setType(e.getValue()); } else if (k.equals("f")) { FlagsAttribute flags = a.addAttribute(FlagsAttribute.class); int f = Integer.parseInt(e.getValue(), 16); flags.setFlags(f); } else if (k.equals("p")) { PayloadAttribute p = a.addAttribute(PayloadAttribute.class); byte[] data = hexToBytes(e.getValue()); if (data != null && data.length > 0) { p.setPayload(new BytesRef(data)); } } else { // unknown attribute } } // handle offset attr OffsetAttribute offset = a.addAttribute(OffsetAttribute.class); offset.setOffset(tokenStart, tokenEnd); State resState = a.captureState(); a.clearAttributes(); return resState; }
From source file:org.nlp.lucene.patch.UAX29URLEmailTokenizerImpl.java
License:Apache License
/** * Fills CharTermAttribute with the current token text. *//*from w ww . j av a2 s . co m*/ public final void getText(CharTermAttribute t) { t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead); }
From source file:org.projectforge.lucene.StandardTokenizerImpl.java
License:Apache License
/** * Fills CharTermAttribute with the current token text. *//*from ww w.j a v a2 s . c om*/ public final void getText(final CharTermAttribute t) { t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead); }