Example usage for org.apache.lucene.analysis.tokenattributes CharTermAttribute copyBuffer

List of usage examples for org.apache.lucene.analysis.tokenattributes CharTermAttribute copyBuffer

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.tokenattributes CharTermAttribute copyBuffer.

Prototype

public void copyBuffer(char[] buffer, int offset, int length);

Source Link

Document

Copies the contents of buffer, starting at offset for length characters, into the termBuffer array.

Usage

From source file:com.bizosys.unstructured.HSearchTokenizerImpl.java

License:Apache License

/**
 * Fills CharTermAttribute with the current token text.
 *//*from   w ww .j  av a  2 s  . c  o m*/
public final void getText(CharTermAttribute t) {
    t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
}

From source file:com.mathworks.xzheng.analysis.AnalyzerUtils.java

License:Apache License

public static void setTerm(AttributeSource source, String term) {
    CharTermAttribute attr = source.addAttribute(CharTermAttribute.class);
    attr.copyBuffer(term.toCharArray(), 0, term.length());
}

From source file:com.zimbra.cs.index.analysis.UniversalLexer.java

License:Open Source License

void getTerm(CharTermAttribute t) {
    t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
}

From source file:com.zimbra.cs.index.analysis.UniversalLexer.java

License:Open Source License

void getTerm(CharTermAttribute t, int offset, int len) {
    t.copyBuffer(zzBuffer, zzStartRead + offset, len);
}

From source file:eu.socialsensor.framework.client.lucene.TweetTokenizerImpl.java

License:Apache License

/**
* Fills TermAttribute with the current token text.
*//*from w ww .j a  va  2s  . c om*/
final void getText(CharTermAttribute t) {
    t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
}

From source file:nl.inl.blacklab.filter.AbstractSynonymFilter.java

License:Apache License

static void setTerm(AttributeSource source, String term) {
    CharTermAttribute attr = source.addAttribute(CharTermAttribute.class);
    attr.copyBuffer(term.toCharArray(), 0, term.length());
}

From source file:org.apache.solr.analysis.SlowSynonymFilter.java

License:Apache License

@Override
public boolean incrementToken() throws IOException {
    while (true) {
        // if there are any generated tokens, return them... don't try any
        // matches against them, as we specifically don't want recursion.
        if (replacement != null && replacement.hasNext()) {
            copy(this, replacement.next());
            return true;
        }//from   w ww  .j a v  a2s .c o  m

        // common case fast-path of first token not matching anything
        AttributeSource firstTok = nextTok();
        if (firstTok == null)
            return false;
        CharTermAttribute termAtt = firstTok.addAttribute(CharTermAttribute.class);
        SlowSynonymMap result = map.submap != null ? map.submap.get(termAtt.buffer(), 0, termAtt.length())
                : null;
        if (result == null) {
            copy(this, firstTok);
            return true;
        }

        // fast-path failed, clone ourselves if needed
        if (firstTok == this)
            firstTok = cloneAttributes();
        // OK, we matched a token, so find the longest match.

        matched = new LinkedList<AttributeSource>();

        result = match(result);

        if (result == null) {
            // no match, simply return the first token read.
            copy(this, firstTok);
            return true;
        }

        // reuse, or create new one each time?
        ArrayList<AttributeSource> generated = new ArrayList<AttributeSource>(
                result.synonyms.length + matched.size() + 1);

        //
        // there was a match... let's generate the new tokens, merging
        // in the matched tokens (position increments need adjusting)
        //
        AttributeSource lastTok = matched.isEmpty() ? firstTok : matched.getLast();
        boolean includeOrig = result.includeOrig();

        AttributeSource origTok = includeOrig ? firstTok : null;
        PositionIncrementAttribute firstPosIncAtt = firstTok.addAttribute(PositionIncrementAttribute.class);
        int origPos = firstPosIncAtt.getPositionIncrement(); // position of origTok in the original stream
        int repPos = 0; // curr position in replacement token stream
        int pos = 0; // current position in merged token stream

        for (int i = 0; i < result.synonyms.length; i++) {
            Token repTok = result.synonyms[i];
            AttributeSource newTok = firstTok.cloneAttributes();
            CharTermAttribute newTermAtt = newTok.addAttribute(CharTermAttribute.class);
            OffsetAttribute newOffsetAtt = newTok.addAttribute(OffsetAttribute.class);
            PositionIncrementAttribute newPosIncAtt = newTok.addAttribute(PositionIncrementAttribute.class);

            OffsetAttribute lastOffsetAtt = lastTok.addAttribute(OffsetAttribute.class);

            newOffsetAtt.setOffset(newOffsetAtt.startOffset(), lastOffsetAtt.endOffset());
            newTermAtt.copyBuffer(repTok.buffer(), 0, repTok.length());
            repPos += repTok.getPositionIncrement();
            if (i == 0)
                repPos = origPos; // make position of first token equal to original

            // if necessary, insert original tokens and adjust position increment
            while (origTok != null && origPos <= repPos) {
                PositionIncrementAttribute origPosInc = origTok.addAttribute(PositionIncrementAttribute.class);
                origPosInc.setPositionIncrement(origPos - pos);
                generated.add(origTok);
                pos += origPosInc.getPositionIncrement();
                origTok = matched.isEmpty() ? null : matched.removeFirst();
                if (origTok != null) {
                    origPosInc = origTok.addAttribute(PositionIncrementAttribute.class);
                    origPos += origPosInc.getPositionIncrement();
                }
            }

            newPosIncAtt.setPositionIncrement(repPos - pos);
            generated.add(newTok);
            pos += newPosIncAtt.getPositionIncrement();
        }

        // finish up any leftover original tokens
        while (origTok != null) {
            PositionIncrementAttribute origPosInc = origTok.addAttribute(PositionIncrementAttribute.class);
            origPosInc.setPositionIncrement(origPos - pos);
            generated.add(origTok);
            pos += origPosInc.getPositionIncrement();
            origTok = matched.isEmpty() ? null : matched.removeFirst();
            if (origTok != null) {
                origPosInc = origTok.addAttribute(PositionIncrementAttribute.class);
                origPos += origPosInc.getPositionIncrement();
            }
        }

        // what if we replaced a longer sequence with a shorter one?
        // a/0 b/5 =>  foo/0
        // should I re-create the gap on the next buffered token?

        replacement = generated.iterator();
        // Now return to the top of the loop to read and return the first
        // generated token.. The reason this is done is that we may have generated
        // nothing at all, and may need to continue with more matching logic.
    }
}

From source file:org.apache.solr.schema.SimplePreAnalyzedParser.java

License:Apache License

private static AttributeSource.State createState(AttributeSource a, Tok state, int tokenEnd) {
    a.clearAttributes();/*from w  w  w  .java2  s . c  o  m*/
    CharTermAttribute termAtt = a.addAttribute(CharTermAttribute.class);
    char[] tokChars = state.token.toString().toCharArray();
    termAtt.copyBuffer(tokChars, 0, tokChars.length);
    int tokenStart = tokenEnd - state.token.length();
    for (Entry<String, String> e : state.attr.entrySet()) {
        String k = e.getKey();
        if (k.equals("i")) {
            // position increment
            int incr = Integer.parseInt(e.getValue());
            PositionIncrementAttribute posIncr = a.addAttribute(PositionIncrementAttribute.class);
            posIncr.setPositionIncrement(incr);
        } else if (k.equals("s")) {
            tokenStart = Integer.parseInt(e.getValue());
        } else if (k.equals("e")) {
            tokenEnd = Integer.parseInt(e.getValue());
        } else if (k.equals("y")) {
            TypeAttribute type = a.addAttribute(TypeAttribute.class);
            type.setType(e.getValue());
        } else if (k.equals("f")) {
            FlagsAttribute flags = a.addAttribute(FlagsAttribute.class);
            int f = Integer.parseInt(e.getValue(), 16);
            flags.setFlags(f);
        } else if (k.equals("p")) {
            PayloadAttribute p = a.addAttribute(PayloadAttribute.class);
            byte[] data = hexToBytes(e.getValue());
            if (data != null && data.length > 0) {
                p.setPayload(new BytesRef(data));
            }
        } else {
            // unknown attribute
        }
    }
    // handle offset attr
    OffsetAttribute offset = a.addAttribute(OffsetAttribute.class);
    offset.setOffset(tokenStart, tokenEnd);
    State resState = a.captureState();
    a.clearAttributes();
    return resState;
}

From source file:org.nlp.lucene.patch.UAX29URLEmailTokenizerImpl.java

License:Apache License

/**
 * Fills CharTermAttribute with the current token text.
 *//*from   w ww  .  j av  a2 s  . co  m*/
public final void getText(CharTermAttribute t) {

    t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
}

From source file:org.projectforge.lucene.StandardTokenizerImpl.java

License:Apache License

/**
 * Fills CharTermAttribute with the current token text.
 *//*from  ww w.j a v  a2 s . c om*/
public final void getText(final CharTermAttribute t) {
    t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
}