List of usage examples for org.apache.lucene.util AttributeSource getAttribute
public final <T extends Attribute> T getAttribute(Class<T> attClass)
The caller must pass in a Class<?
From source file:at.ac.tuwien.ifs.lupu.LangDetFilter.java
@Override public boolean incrementToken() throws IOException { try {//from w w w .j a v a2 s.co m //LOG.log(Level.INFO, "LTF.incrementToken "); if (!setAsside.isEmpty()) { AttributeSource token = setAsside.poll(); this.typeAttribute.setType(token.getAttribute(TypeAttribute.class).type()); this.termAttribute.setEmpty(); this.termAttribute.append(token.getAttribute(CharTermAttribute.class)); this.posIncAtt.setPositionIncrement( token.getAttribute(PositionIncrementAttribute.class).getPositionIncrement()); this.posLenAtt .setPositionLength(token.getAttribute(PositionLengthAttribute.class).getPositionLength()); this.offsetAtt.setOffset(token.getAttribute(OffsetAttribute.class).startOffset(), token.getAttribute(OffsetAttribute.class).endOffset()); return true; } //update first the buffer by reading from the input stream, if we haven't previously marked it as finished if (!frozenBuffer) { thereWillBeMore = originalStreamFinished ? false : updateBuffer(); } //in case updating the buffer generated new elements set asside - we need to release them before we process the buffer if (!setAsside.isEmpty()) { AttributeSource token = setAsside.poll(); this.typeAttribute.setType(token.getAttribute(TypeAttribute.class).type()); this.termAttribute.setEmpty(); this.termAttribute.append(token.getAttribute(CharTermAttribute.class)); this.posIncAtt.setPositionIncrement( token.getAttribute(PositionIncrementAttribute.class).getPositionIncrement()); this.posLenAtt .setPositionLength(token.getAttribute(PositionLengthAttribute.class).getPositionLength()); this.offsetAtt.setOffset(token.getAttribute(OffsetAttribute.class).startOffset(), token.getAttribute(OffsetAttribute.class).endOffset()); frozenBuffer = true; return true; } if (thereWillBeMore) { frozenBuffer = false; // make the current token the one at the middle of the buffer int middle = buffer.size / 2; this.typeAttribute.setType(buffer.inputTokens.get(middle).getAttribute(TypeAttribute.class).type()); this.termAttribute.setEmpty(); this.termAttribute.append(buffer.inputTokens.get(middle).getAttribute(CharTermAttribute.class)); this.posIncAtt.setPositionIncrement(buffer.inputTokens.get(middle) .getAttribute(PositionIncrementAttribute.class).getPositionIncrement()); this.posLenAtt.setPositionLength(buffer.inputTokens.get(middle) .getAttribute(PositionLengthAttribute.class).getPositionLength()); this.offsetAtt.setOffset( buffer.inputTokens.get(middle).getAttribute(OffsetAttribute.class).startOffset(), buffer.inputTokens.get(middle).getAttribute(OffsetAttribute.class).endOffset()); //assing it the language based on the buffer assignLanguage(); } else { //no more elements in the original stream, start popping from list. if (setAsside.isEmpty()) { return false;//that's it - we're done with the list as well. } else { //read from the list AttributeSource token = setAsside.poll(); this.typeAttribute.setType(token.getAttribute(TypeAttribute.class).type()); this.termAttribute.setEmpty(); this.termAttribute.append(token.getAttribute(CharTermAttribute.class)); this.posIncAtt.setPositionIncrement( token.getAttribute(PositionIncrementAttribute.class).getPositionIncrement()); this.posLenAtt.setPositionLength( token.getAttribute(PositionLengthAttribute.class).getPositionLength()); this.offsetAtt.setOffset(token.getAttribute(OffsetAttribute.class).startOffset(), token.getAttribute(OffsetAttribute.class).endOffset()); //the stack already has the right language tags, so no need to update anything now - just make the current element the one from the stack } } return true; } catch (LangDetectException ex) { Logger.getLogger(LangDetFilter.class.getName()).log(Level.SEVERE, null, ex); //return true to allow it to continue to the next token return true; } }
From source file:com.shaie.annots.ColorsSinkFilter.java
License:Apache License
@Override public boolean accept(AttributeSource source) { if (termAtt == null) { termAtt = source.getAttribute(CharTermAttribute.class); posIncrAtt = source.addAttribute(PositionIncrementAttribute.class); annotSpanAtt = source.addAttribute(AnnotationSpanAttribute.class); }/* ww w . jav a 2 s .c om*/ // NOTE: the state of the input AttributeSource is not cloned before // calling this method and thus shared with other consumers of that // source. Therefore we avoid modifying any existing attributes, and add // on the stream a special attribute that will be passed on to the // TokenFilter which consumes the color terms. absTextPos += posIncrAtt.getPositionIncrement(); // adjust the absolute position in the text boolean isColor = COLORS.contains(termAtt.buffer(), 0, termAtt.length()); if (isColor) { // System.out.println("found color: " + termAtt + ", pos=" + absTextPos); annotSpanAtt.setSpan(absTextPos, 1); } return isColor; }
From source file:org.apache.solr.analysis.SlowSynonymFilter.java
License:Apache License
private SlowSynonymMap match(SlowSynonymMap map) throws IOException { SlowSynonymMap result = null;// w w w . j ava2s . com if (map.submap != null) { AttributeSource tok = nextTok(); if (tok != null) { // clone ourselves. if (tok == this) tok = cloneAttributes(); // check for positionIncrement!=1? if>1, should not match, if==0, check multiple at this level? CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class); SlowSynonymMap subMap = map.submap.get(termAtt.buffer(), 0, termAtt.length()); if (subMap != null) { // recurse result = match(subMap); } if (result != null) { matched.addFirst(tok); } else { // push back unmatched token pushTok(tok); } } } // if no longer sequence matched, so if this node has synonyms, it's the match. if (result == null && map.synonyms != null) { result = map; } return result; }
From source file:org.apache.solr.handler.AnalysisRequestHandlerBase.java
License:Apache License
/** * Analyzes the given value using the given Analyzer. * * @param value Value to analyze//from w w w .j av a 2 s . c o m * @param context The {@link AnalysisContext analysis context}. * * @return NamedList containing the tokens produced by analyzing the given value */ protected NamedList<? extends Object> analyzeValue(String value, AnalysisContext context) { Analyzer analyzer = context.getAnalyzer(); if (!TokenizerChain.class.isInstance(analyzer)) { TokenStream tokenStream = null; try { tokenStream = analyzer.tokenStream(context.getFieldName(), value); NamedList<List<NamedList>> namedList = new NamedList<List<NamedList>>(); namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(analyzeTokenStream(tokenStream), context)); return namedList; } catch (IOException e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); } finally { IOUtils.closeWhileHandlingException(tokenStream); } } TokenizerChain tokenizerChain = (TokenizerChain) analyzer; CharFilterFactory[] cfiltfacs = tokenizerChain.getCharFilterFactories(); TokenizerFactory tfac = tokenizerChain.getTokenizerFactory(); TokenFilterFactory[] filtfacs = tokenizerChain.getTokenFilterFactories(); NamedList<Object> namedList = new NamedList<Object>(); if (cfiltfacs != null) { String source = value; for (CharFilterFactory cfiltfac : cfiltfacs) { Reader reader = new StringReader(source); reader = cfiltfac.create(reader); source = writeCharStream(namedList, reader); } } TokenStream tokenStream = tfac.create(tokenizerChain.initReader(null, new StringReader(value))); List<AttributeSource> tokens = analyzeTokenStream(tokenStream); namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context)); ListBasedTokenStream listBasedTokenStream = new ListBasedTokenStream(tokens); for (TokenFilterFactory tokenFilterFactory : filtfacs) { for (final AttributeSource tok : tokens) { tok.getAttribute(TokenTrackingAttribute.class).freezeStage(); } tokenStream = tokenFilterFactory.create(listBasedTokenStream); tokens = analyzeTokenStream(tokenStream); namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context)); listBasedTokenStream = new ListBasedTokenStream(tokens); } return namedList; }
From source file:org.apache.solr.handler.AnalysisRequestHandlerBase.java
License:Apache License
/** * Converts the list of Tokens to a list of NamedLists representing the tokens. * * @param tokenList Tokens to convert/*from ww w.ja v a 2 s.c o m*/ * @param context The analysis context * * @return List of NamedLists containing the relevant information taken from the tokens */ private List<NamedList> convertTokensToNamedLists(final List<AttributeSource> tokenList, AnalysisContext context) { final List<NamedList> tokensNamedLists = new ArrayList<NamedList>(); final FieldType fieldType = context.getFieldType(); final AttributeSource[] tokens = tokenList.toArray(new AttributeSource[tokenList.size()]); // sort the tokens by absoulte position ArrayUtil.timSort(tokens, new Comparator<AttributeSource>() { @Override public int compare(AttributeSource a, AttributeSource b) { return arrayCompare(a.getAttribute(TokenTrackingAttribute.class).getPositions(), b.getAttribute(TokenTrackingAttribute.class).getPositions()); } private int arrayCompare(int[] a, int[] b) { int p = 0; final int stop = Math.min(a.length, b.length); while (p < stop) { int diff = a[p] - b[p]; if (diff != 0) return diff; p++; } // One is a prefix of the other, or, they are equal: return a.length - b.length; } }); for (int i = 0; i < tokens.length; i++) { AttributeSource token = tokens[i]; final NamedList<Object> tokenNamedList = new SimpleOrderedMap<Object>(); final TermToBytesRefAttribute termAtt = token.getAttribute(TermToBytesRefAttribute.class); BytesRef rawBytes = termAtt.getBytesRef(); termAtt.fillBytesRef(); final String text = fieldType.indexedToReadable(rawBytes, new CharsRef(rawBytes.length)).toString(); tokenNamedList.add("text", text); if (token.hasAttribute(CharTermAttribute.class)) { final String rawText = token.getAttribute(CharTermAttribute.class).toString(); if (!rawText.equals(text)) { tokenNamedList.add("raw_text", rawText); } } tokenNamedList.add("raw_bytes", rawBytes.toString()); if (context.getTermsToMatch().contains(rawBytes)) { tokenNamedList.add("match", true); } token.reflectWith(new AttributeReflector() { @Override public void reflect(Class<? extends Attribute> attClass, String key, Object value) { // leave out position and bytes term if (TermToBytesRefAttribute.class.isAssignableFrom(attClass)) return; if (CharTermAttribute.class.isAssignableFrom(attClass)) return; if (PositionIncrementAttribute.class.isAssignableFrom(attClass)) return; String k = attClass.getName() + '#' + key; // map keys for "standard attributes": if (ATTRIBUTE_MAPPING.containsKey(k)) { k = ATTRIBUTE_MAPPING.get(k); } if (value instanceof BytesRef) { final BytesRef p = (BytesRef) value; value = p.toString(); } tokenNamedList.add(k, value); } }); tokensNamedLists.add(tokenNamedList); } return tokensNamedLists; }