Example usage for org.apache.lucene.util AttributeSource getAttribute

List of usage examples for org.apache.lucene.util AttributeSource getAttribute

Introduction

In this page you can find the example usage for org.apache.lucene.util AttributeSource getAttribute.

Prototype

public final <T extends Attribute> T getAttribute(Class<T> attClass) 

Source Link

Document

Returns the instance of the passed in Attribute contained in this AttributeSource

The caller must pass in a Class<?

Usage

From source file:at.ac.tuwien.ifs.lupu.LangDetFilter.java

@Override
public boolean incrementToken() throws IOException {
    try {//from  w  w  w  .j  a  v a2 s.co  m
        //LOG.log(Level.INFO, "LTF.incrementToken ");

        if (!setAsside.isEmpty()) {
            AttributeSource token = setAsside.poll();
            this.typeAttribute.setType(token.getAttribute(TypeAttribute.class).type());
            this.termAttribute.setEmpty();
            this.termAttribute.append(token.getAttribute(CharTermAttribute.class));
            this.posIncAtt.setPositionIncrement(
                    token.getAttribute(PositionIncrementAttribute.class).getPositionIncrement());
            this.posLenAtt
                    .setPositionLength(token.getAttribute(PositionLengthAttribute.class).getPositionLength());
            this.offsetAtt.setOffset(token.getAttribute(OffsetAttribute.class).startOffset(),
                    token.getAttribute(OffsetAttribute.class).endOffset());
            return true;
        }

        //update first the buffer by reading from the input stream, if we haven't previously marked it as finished
        if (!frozenBuffer) {
            thereWillBeMore = originalStreamFinished ? false : updateBuffer();
        }

        //in case updating the buffer generated new elements set asside - we need to release them before we process the buffer
        if (!setAsside.isEmpty()) {
            AttributeSource token = setAsside.poll();
            this.typeAttribute.setType(token.getAttribute(TypeAttribute.class).type());
            this.termAttribute.setEmpty();
            this.termAttribute.append(token.getAttribute(CharTermAttribute.class));
            this.posIncAtt.setPositionIncrement(
                    token.getAttribute(PositionIncrementAttribute.class).getPositionIncrement());
            this.posLenAtt
                    .setPositionLength(token.getAttribute(PositionLengthAttribute.class).getPositionLength());
            this.offsetAtt.setOffset(token.getAttribute(OffsetAttribute.class).startOffset(),
                    token.getAttribute(OffsetAttribute.class).endOffset());
            frozenBuffer = true;
            return true;
        }

        if (thereWillBeMore) {
            frozenBuffer = false;
            // make the current token the one at the middle of the buffer
            int middle = buffer.size / 2;
            this.typeAttribute.setType(buffer.inputTokens.get(middle).getAttribute(TypeAttribute.class).type());
            this.termAttribute.setEmpty();
            this.termAttribute.append(buffer.inputTokens.get(middle).getAttribute(CharTermAttribute.class));
            this.posIncAtt.setPositionIncrement(buffer.inputTokens.get(middle)
                    .getAttribute(PositionIncrementAttribute.class).getPositionIncrement());
            this.posLenAtt.setPositionLength(buffer.inputTokens.get(middle)
                    .getAttribute(PositionLengthAttribute.class).getPositionLength());
            this.offsetAtt.setOffset(
                    buffer.inputTokens.get(middle).getAttribute(OffsetAttribute.class).startOffset(),
                    buffer.inputTokens.get(middle).getAttribute(OffsetAttribute.class).endOffset());
            //assing it the language based on the buffer
            assignLanguage();
        } else {
            //no more elements in the original stream, start popping from list.
            if (setAsside.isEmpty()) {
                return false;//that's it - we're done with the list as well.
            } else {
                //read from the list
                AttributeSource token = setAsside.poll();
                this.typeAttribute.setType(token.getAttribute(TypeAttribute.class).type());
                this.termAttribute.setEmpty();
                this.termAttribute.append(token.getAttribute(CharTermAttribute.class));
                this.posIncAtt.setPositionIncrement(
                        token.getAttribute(PositionIncrementAttribute.class).getPositionIncrement());
                this.posLenAtt.setPositionLength(
                        token.getAttribute(PositionLengthAttribute.class).getPositionLength());
                this.offsetAtt.setOffset(token.getAttribute(OffsetAttribute.class).startOffset(),
                        token.getAttribute(OffsetAttribute.class).endOffset());
                //the stack already has the right language tags, so no need to update anything now - just make the current element the one from the stack
            }
        }

        return true;
    } catch (LangDetectException ex) {
        Logger.getLogger(LangDetFilter.class.getName()).log(Level.SEVERE, null, ex);
        //return true to allow it to continue to the next token
        return true;
    }

}

From source file:com.shaie.annots.ColorsSinkFilter.java

License:Apache License

@Override
public boolean accept(AttributeSource source) {
    if (termAtt == null) {
        termAtt = source.getAttribute(CharTermAttribute.class);
        posIncrAtt = source.addAttribute(PositionIncrementAttribute.class);
        annotSpanAtt = source.addAttribute(AnnotationSpanAttribute.class);
    }/* ww w . jav  a  2 s  .c om*/

    // NOTE: the state of the input AttributeSource is not cloned before
    // calling this method and thus shared with other consumers of that
    // source. Therefore we avoid modifying any existing attributes, and add
    // on the stream a special attribute that will be passed on to the
    // TokenFilter which consumes the color terms.

    absTextPos += posIncrAtt.getPositionIncrement(); // adjust the absolute position in the text
    boolean isColor = COLORS.contains(termAtt.buffer(), 0, termAtt.length());
    if (isColor) {
        // System.out.println("found color: " + termAtt + ", pos=" + absTextPos);
        annotSpanAtt.setSpan(absTextPos, 1);
    }

    return isColor;
}

From source file:org.apache.solr.analysis.SlowSynonymFilter.java

License:Apache License

private SlowSynonymMap match(SlowSynonymMap map) throws IOException {
    SlowSynonymMap result = null;// w  w w . j  ava2s  . com

    if (map.submap != null) {
        AttributeSource tok = nextTok();
        if (tok != null) {
            // clone ourselves.
            if (tok == this)
                tok = cloneAttributes();
            // check for positionIncrement!=1?  if>1, should not match, if==0, check multiple at this level?
            CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class);
            SlowSynonymMap subMap = map.submap.get(termAtt.buffer(), 0, termAtt.length());

            if (subMap != null) {
                // recurse
                result = match(subMap);
            }

            if (result != null) {
                matched.addFirst(tok);
            } else {
                // push back unmatched token
                pushTok(tok);
            }
        }
    }

    // if no longer sequence matched, so if this node has synonyms, it's the match.
    if (result == null && map.synonyms != null) {
        result = map;
    }

    return result;
}

From source file:org.apache.solr.handler.AnalysisRequestHandlerBase.java

License:Apache License

/**
 * Analyzes the given value using the given Analyzer.
 *
 * @param value   Value to analyze//from  w  w  w  .j av  a 2  s .  c  o  m
 * @param context The {@link AnalysisContext analysis context}.
 *
 * @return NamedList containing the tokens produced by analyzing the given value
 */
protected NamedList<? extends Object> analyzeValue(String value, AnalysisContext context) {

    Analyzer analyzer = context.getAnalyzer();

    if (!TokenizerChain.class.isInstance(analyzer)) {

        TokenStream tokenStream = null;
        try {
            tokenStream = analyzer.tokenStream(context.getFieldName(), value);
            NamedList<List<NamedList>> namedList = new NamedList<List<NamedList>>();
            namedList.add(tokenStream.getClass().getName(),
                    convertTokensToNamedLists(analyzeTokenStream(tokenStream), context));
            return namedList;
        } catch (IOException e) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
        } finally {
            IOUtils.closeWhileHandlingException(tokenStream);
        }
    }

    TokenizerChain tokenizerChain = (TokenizerChain) analyzer;
    CharFilterFactory[] cfiltfacs = tokenizerChain.getCharFilterFactories();
    TokenizerFactory tfac = tokenizerChain.getTokenizerFactory();
    TokenFilterFactory[] filtfacs = tokenizerChain.getTokenFilterFactories();

    NamedList<Object> namedList = new NamedList<Object>();

    if (cfiltfacs != null) {
        String source = value;
        for (CharFilterFactory cfiltfac : cfiltfacs) {
            Reader reader = new StringReader(source);
            reader = cfiltfac.create(reader);
            source = writeCharStream(namedList, reader);
        }
    }

    TokenStream tokenStream = tfac.create(tokenizerChain.initReader(null, new StringReader(value)));
    List<AttributeSource> tokens = analyzeTokenStream(tokenStream);

    namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context));

    ListBasedTokenStream listBasedTokenStream = new ListBasedTokenStream(tokens);

    for (TokenFilterFactory tokenFilterFactory : filtfacs) {
        for (final AttributeSource tok : tokens) {
            tok.getAttribute(TokenTrackingAttribute.class).freezeStage();
        }
        tokenStream = tokenFilterFactory.create(listBasedTokenStream);
        tokens = analyzeTokenStream(tokenStream);
        namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context));
        listBasedTokenStream = new ListBasedTokenStream(tokens);
    }

    return namedList;
}

From source file:org.apache.solr.handler.AnalysisRequestHandlerBase.java

License:Apache License

/**
 * Converts the list of Tokens to a list of NamedLists representing the tokens.
 *
 * @param tokenList  Tokens to convert/*from ww w.ja v  a 2  s.c  o m*/
 * @param context The analysis context
 *
 * @return List of NamedLists containing the relevant information taken from the tokens
 */
private List<NamedList> convertTokensToNamedLists(final List<AttributeSource> tokenList,
        AnalysisContext context) {
    final List<NamedList> tokensNamedLists = new ArrayList<NamedList>();
    final FieldType fieldType = context.getFieldType();
    final AttributeSource[] tokens = tokenList.toArray(new AttributeSource[tokenList.size()]);

    // sort the tokens by absoulte position
    ArrayUtil.timSort(tokens, new Comparator<AttributeSource>() {
        @Override
        public int compare(AttributeSource a, AttributeSource b) {
            return arrayCompare(a.getAttribute(TokenTrackingAttribute.class).getPositions(),
                    b.getAttribute(TokenTrackingAttribute.class).getPositions());
        }

        private int arrayCompare(int[] a, int[] b) {
            int p = 0;
            final int stop = Math.min(a.length, b.length);
            while (p < stop) {
                int diff = a[p] - b[p];
                if (diff != 0)
                    return diff;
                p++;
            }
            // One is a prefix of the other, or, they are equal:
            return a.length - b.length;
        }
    });

    for (int i = 0; i < tokens.length; i++) {
        AttributeSource token = tokens[i];
        final NamedList<Object> tokenNamedList = new SimpleOrderedMap<Object>();
        final TermToBytesRefAttribute termAtt = token.getAttribute(TermToBytesRefAttribute.class);
        BytesRef rawBytes = termAtt.getBytesRef();
        termAtt.fillBytesRef();
        final String text = fieldType.indexedToReadable(rawBytes, new CharsRef(rawBytes.length)).toString();
        tokenNamedList.add("text", text);

        if (token.hasAttribute(CharTermAttribute.class)) {
            final String rawText = token.getAttribute(CharTermAttribute.class).toString();
            if (!rawText.equals(text)) {
                tokenNamedList.add("raw_text", rawText);
            }
        }

        tokenNamedList.add("raw_bytes", rawBytes.toString());

        if (context.getTermsToMatch().contains(rawBytes)) {
            tokenNamedList.add("match", true);
        }

        token.reflectWith(new AttributeReflector() {
            @Override
            public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
                // leave out position and bytes term
                if (TermToBytesRefAttribute.class.isAssignableFrom(attClass))
                    return;
                if (CharTermAttribute.class.isAssignableFrom(attClass))
                    return;
                if (PositionIncrementAttribute.class.isAssignableFrom(attClass))
                    return;

                String k = attClass.getName() + '#' + key;

                // map keys for "standard attributes":
                if (ATTRIBUTE_MAPPING.containsKey(k)) {
                    k = ATTRIBUTE_MAPPING.get(k);
                }

                if (value instanceof BytesRef) {
                    final BytesRef p = (BytesRef) value;
                    value = p.toString();
                }

                tokenNamedList.add(k, value);
            }
        });

        tokensNamedLists.add(tokenNamedList);
    }

    return tokensNamedLists;
}