Example usage for org.apache.lucene.util AttributeSource cloneAttributes

List of usage examples for org.apache.lucene.util AttributeSource cloneAttributes

Introduction

In this page you can find the example usage for org.apache.lucene.util AttributeSource cloneAttributes.

Prototype

public final AttributeSource cloneAttributes() 

Source Link

Document

Performs a clone of all AttributeImpl instances returned in a new AttributeSource instance.

Usage

From source file:org.apache.solr.analysis.SlowSynonymFilter.java

License:Apache License

@Override
public boolean incrementToken() throws IOException {
    while (true) {
        // if there are any generated tokens, return them... don't try any
        // matches against them, as we specifically don't want recursion.
        if (replacement != null && replacement.hasNext()) {
            copy(this, replacement.next());
            return true;
        }//  ww  w . j  ava2 s.co  m

        // common case fast-path of first token not matching anything
        AttributeSource firstTok = nextTok();
        if (firstTok == null)
            return false;
        CharTermAttribute termAtt = firstTok.addAttribute(CharTermAttribute.class);
        SlowSynonymMap result = map.submap != null ? map.submap.get(termAtt.buffer(), 0, termAtt.length())
                : null;
        if (result == null) {
            copy(this, firstTok);
            return true;
        }

        // fast-path failed, clone ourselves if needed
        if (firstTok == this)
            firstTok = cloneAttributes();
        // OK, we matched a token, so find the longest match.

        matched = new LinkedList<AttributeSource>();

        result = match(result);

        if (result == null) {
            // no match, simply return the first token read.
            copy(this, firstTok);
            return true;
        }

        // reuse, or create new one each time?
        ArrayList<AttributeSource> generated = new ArrayList<AttributeSource>(
                result.synonyms.length + matched.size() + 1);

        //
        // there was a match... let's generate the new tokens, merging
        // in the matched tokens (position increments need adjusting)
        //
        AttributeSource lastTok = matched.isEmpty() ? firstTok : matched.getLast();
        boolean includeOrig = result.includeOrig();

        AttributeSource origTok = includeOrig ? firstTok : null;
        PositionIncrementAttribute firstPosIncAtt = firstTok.addAttribute(PositionIncrementAttribute.class);
        int origPos = firstPosIncAtt.getPositionIncrement(); // position of origTok in the original stream
        int repPos = 0; // curr position in replacement token stream
        int pos = 0; // current position in merged token stream

        for (int i = 0; i < result.synonyms.length; i++) {
            Token repTok = result.synonyms[i];
            AttributeSource newTok = firstTok.cloneAttributes();
            CharTermAttribute newTermAtt = newTok.addAttribute(CharTermAttribute.class);
            OffsetAttribute newOffsetAtt = newTok.addAttribute(OffsetAttribute.class);
            PositionIncrementAttribute newPosIncAtt = newTok.addAttribute(PositionIncrementAttribute.class);

            OffsetAttribute lastOffsetAtt = lastTok.addAttribute(OffsetAttribute.class);

            newOffsetAtt.setOffset(newOffsetAtt.startOffset(), lastOffsetAtt.endOffset());
            newTermAtt.copyBuffer(repTok.buffer(), 0, repTok.length());
            repPos += repTok.getPositionIncrement();
            if (i == 0)
                repPos = origPos; // make position of first token equal to original

            // if necessary, insert original tokens and adjust position increment
            while (origTok != null && origPos <= repPos) {
                PositionIncrementAttribute origPosInc = origTok.addAttribute(PositionIncrementAttribute.class);
                origPosInc.setPositionIncrement(origPos - pos);
                generated.add(origTok);
                pos += origPosInc.getPositionIncrement();
                origTok = matched.isEmpty() ? null : matched.removeFirst();
                if (origTok != null) {
                    origPosInc = origTok.addAttribute(PositionIncrementAttribute.class);
                    origPos += origPosInc.getPositionIncrement();
                }
            }

            newPosIncAtt.setPositionIncrement(repPos - pos);
            generated.add(newTok);
            pos += newPosIncAtt.getPositionIncrement();
        }

        // finish up any leftover original tokens
        while (origTok != null) {
            PositionIncrementAttribute origPosInc = origTok.addAttribute(PositionIncrementAttribute.class);
            origPosInc.setPositionIncrement(origPos - pos);
            generated.add(origTok);
            pos += origPosInc.getPositionIncrement();
            origTok = matched.isEmpty() ? null : matched.removeFirst();
            if (origTok != null) {
                origPosInc = origTok.addAttribute(PositionIncrementAttribute.class);
                origPos += origPosInc.getPositionIncrement();
            }
        }

        // what if we replaced a longer sequence with a shorter one?
        // a/0 b/5 =>  foo/0
        // should I re-create the gap on the next buffered token?

        replacement = generated.iterator();
        // Now return to the top of the loop to read and return the first
        // generated token.. The reason this is done is that we may have generated
        // nothing at all, and may need to continue with more matching logic.
    }
}