Example usage for org.apache.lucene.analysis TokenStream hasAttribute

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream hasAttribute.

Prototype

public final boolean hasAttribute(Class<? extends Attribute> attClass)

Source Link

Document

The caller must pass in a Class<?

Usage

From source file:org.alfresco.solr.query.Solr4QueryParser.java

License:Open Source License

/**
 * @param first/* www  .j  ava 2 s.  co  m*/
 * @param field
 * @return SpanOrQuery
 * @throws IOException
 */
private SpanQuery buildSpanOrQuery(String first, FieldInstance field) throws IOException {
    ArrayList<SpanQuery> spanOrQueryParts = new ArrayList<SpanQuery>();

    PackedTokenAttributeImpl nextToken;
    TokenStream source = null;

    try {
        source = getAnalyzer().tokenStream(field.getField(), new StringReader(first));
        source.reset();
        while (source.incrementToken()) {
            CharTermAttribute cta = source.getAttribute(CharTermAttribute.class);
            OffsetAttribute offsetAtt = source.getAttribute(OffsetAttribute.class);
            TypeAttribute typeAtt = null;
            if (source.hasAttribute(TypeAttribute.class)) {
                typeAtt = source.getAttribute(TypeAttribute.class);
            }
            PositionIncrementAttribute posIncAtt = null;
            if (source.hasAttribute(PositionIncrementAttribute.class)) {
                posIncAtt = source.getAttribute(PositionIncrementAttribute.class);
            }
            nextToken = new PackedTokenAttributeImpl();
            nextToken.setEmpty().copyBuffer(cta.buffer(), 0, cta.length());
            nextToken.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
            if (typeAtt != null) {
                nextToken.setType(typeAtt.type());
            }
            if (posIncAtt != null) {
                nextToken.setPositionIncrement(posIncAtt.getPositionIncrement());
            }

            SpanQuery termQuery = new SpanTermQuery(new Term(field.getField(), nextToken.toString()));
            spanOrQueryParts.add(termQuery);
        }
    } finally {
        try {
            if (source != null) {
                source.close();
            }
        } catch (IOException e) {
            // ignore
        }
    }

    if (spanOrQueryParts.size() == 1) {
        return spanOrQueryParts.get(0);
    } else {
        return new SpanOrQuery(spanOrQueryParts.toArray(new SpanQuery[] {}));
    }
}

From source file:org.apache.solr.handler.component.WordCloudComponent.java

License:Apache License

@Override
public void process(ResponseBuilder rb) throws IOException {
    SolrQueryRequest req = rb.req;//from ww w .j  a v  a2s .  c o  m
    SolrParams params = req.getParams();
    if (!params.getBool(COMPONENT_NAME, true)) {
        return;
    }

    String wcFields = null;
    if ((wcFields = params.get("wordcloud.fl", null)) == null) {
        return;
    }

    Set<String> flds = new HashSet<String>(StrUtils.splitSmart(wcFields, ','));
    DocList ids = rb.getResults().docList;

    SolrIndexSearcher searcher = rb.req.getSearcher();
    IndexSchema schema = rb.req.getCore().getLatestSchema();

    final Analyzer analyzer = rb.req.getCore().getLatestSchema().getAnalyzer();
    final HashMap<String, FieldType> fieldsToLoad = new HashMap<String, FieldType>();

    CharTermAttribute termAtt;
    Map<String, Map<String, Integer>> tokens = new HashMap<String, Map<String, Integer>>();

    for (String f : flds) {
        SchemaField field = schema.getFieldOrNull(f);
        if (field == null || !field.stored()) {
            continue; // ignore this field
        }
        fieldsToLoad.put(f, field.getType());
        tokens.put(f, new HashMap<String, Integer>());
    }

    DocIterator iterator = ids.iterator();
    String w;
    Integer v;
    int sz = ids.size();
    for (int i = 0; i < sz; i++) {
        int id = iterator.nextDoc();
        Document doc = searcher.doc(id, fieldsToLoad.keySet());
        for (Entry<String, FieldType> en : fieldsToLoad.entrySet()) {
            Map<String, Integer> toks = tokens.get(en.getKey());
            String[] vals = doc.getValues(en.getKey());
            FieldType fType = en.getValue();

            if (vals != null) {
                for (String s : vals) {
                    TokenStream buffer = analyzer.tokenStream(en.getKey(),
                            new StringReader(fType.indexedToReadable(s)));

                    if (!buffer.hasAttribute(CharTermAttribute.class)) {
                        continue; // empty stream
                    }

                    termAtt = buffer.getAttribute(CharTermAttribute.class);
                    buffer.reset();

                    while (buffer.incrementToken()) {
                        w = termAtt.toString();
                        v = toks.get(w);
                        if (v == null)
                            v = 0;
                        toks.put(w, ++v);
                    }

                    buffer.close();
                }
            }
        }
    }

    // TODO: filter out the tokens (use some sort of a range 0.1-0.9 by frequency)

    AtomicReader reader = searcher.getAtomicReader();
    BytesRef term;
    int df;
    String f;

    Map<String, Map<String, Double>> docFreqs = new HashMap<String, Map<String, Double>>();
    for (Entry<String, Map<String, Integer>> field : tokens.entrySet()) {
        HashMap<String, Double> idfs = new HashMap<String, Double>();
        f = field.getKey();
        docFreqs.put(f, idfs);
        int N = reader.getDocCount(f);

        for (Entry<String, Integer> token : field.getValue().entrySet()) {
            w = token.getKey();
            df = reader.docFreq(new Term(f, new BytesRef(w)));
            if (df != 0) {
                idfs.put(w, Math.log10(N / df));
            }
        }
    }

    HashMap<String, Object> ret = new HashMap<String, Object>();
    for (String fi : fieldsToLoad.keySet()) {
        HashMap<String, Object> va = new HashMap<String, Object>();
        va.put("tf", tokens.get(fi));
        va.put("idf", docFreqs.get(fi));
        ret.put(fi, va);
    }
    rb.rsp.add("wordcloud", ret);

}

From source file:org.apache.solr.schema.JsonPreAnalyzedParser.java

License:Apache License

@Override
public String toFormattedString(Field f) throws IOException {
    Map<String, Object> map = new LinkedHashMap<String, Object>();
    map.put(VERSION_KEY, VERSION);//from   www. j av a2s .com
    if (f.fieldType().stored()) {
        String stringValue = f.stringValue();
        if (stringValue != null) {
            map.put(STRING_KEY, stringValue);
        }
        BytesRef binaryValue = f.binaryValue();
        if (binaryValue != null) {
            map.put(BINARY_KEY,
                    Base64.byteArrayToBase64(binaryValue.bytes, binaryValue.offset, binaryValue.length));
        }
    }
    TokenStream ts = f.tokenStreamValue();
    if (ts != null) {
        List<Map<String, Object>> tokens = new LinkedList<Map<String, Object>>();
        while (ts.incrementToken()) {
            Iterator<Class<? extends Attribute>> it = ts.getAttributeClassesIterator();
            String cTerm = null;
            String tTerm = null;
            Map<String, Object> tok = new TreeMap<String, Object>();
            while (it.hasNext()) {
                Class<? extends Attribute> cl = it.next();
                if (!ts.hasAttribute(cl)) {
                    continue;
                }
                Attribute att = ts.getAttribute(cl);
                if (cl.isAssignableFrom(CharTermAttribute.class)) {
                    CharTermAttribute catt = (CharTermAttribute) att;
                    cTerm = new String(catt.buffer(), 0, catt.length());
                } else if (cl.isAssignableFrom(TermToBytesRefAttribute.class)) {
                    TermToBytesRefAttribute tatt = (TermToBytesRefAttribute) att;
                    tTerm = tatt.getBytesRef().utf8ToString();
                } else {
                    if (cl.isAssignableFrom(FlagsAttribute.class)) {
                        tok.put(FLAGS_KEY, Integer.toHexString(((FlagsAttribute) att).getFlags()));
                    } else if (cl.isAssignableFrom(OffsetAttribute.class)) {
                        tok.put(OFFSET_START_KEY, ((OffsetAttribute) att).startOffset());
                        tok.put(OFFSET_END_KEY, ((OffsetAttribute) att).endOffset());
                    } else if (cl.isAssignableFrom(PayloadAttribute.class)) {
                        BytesRef p = ((PayloadAttribute) att).getPayload();
                        if (p != null && p.length > 0) {
                            tok.put(PAYLOAD_KEY, Base64.byteArrayToBase64(p.bytes, p.offset, p.length));
                        }
                    } else if (cl.isAssignableFrom(PositionIncrementAttribute.class)) {
                        tok.put(POSINCR_KEY, ((PositionIncrementAttribute) att).getPositionIncrement());
                    } else if (cl.isAssignableFrom(TypeAttribute.class)) {
                        tok.put(TYPE_KEY, ((TypeAttribute) att).type());
                    } else {
                        tok.put(cl.getName(), att.toString());
                    }
                }
            }
            String term = null;
            if (cTerm != null) {
                term = cTerm;
            } else {
                term = tTerm;
            }
            if (term != null && term.length() > 0) {
                tok.put(TOKEN_KEY, term);
            }
            tokens.add(tok);
        }
        map.put(TOKENS_KEY, tokens);
    }
    return JSONUtil.toJSON(map, -1);
}

From source file:org.apache.solr.schema.SimplePreAnalyzedParser.java

License:Apache License

@Override
public String toFormattedString(Field f) throws IOException {
    StringBuilder sb = new StringBuilder();
    sb.append(VERSION + " ");
    if (f.fieldType().stored()) {
        String s = f.stringValue();
        if (s != null) {
            // encode the equals sign
            s = s.replaceAll("=", "\\=");
            sb.append('=');
            sb.append(s);//from   www. j  ava2s .com
            sb.append('=');
        }
    }
    TokenStream ts = f.tokenStreamValue();
    if (ts != null) {
        StringBuilder tok = new StringBuilder();
        boolean next = false;
        while (ts.incrementToken()) {
            if (next) {
                sb.append(' ');
            } else {
                next = true;
            }
            tok.setLength(0);
            Iterator<Class<? extends Attribute>> it = ts.getAttributeClassesIterator();
            String cTerm = null;
            String tTerm = null;
            while (it.hasNext()) {
                Class<? extends Attribute> cl = it.next();
                if (!ts.hasAttribute(cl)) {
                    continue;
                }
                Attribute att = ts.getAttribute(cl);
                if (cl.isAssignableFrom(CharTermAttribute.class)) {
                    CharTermAttribute catt = (CharTermAttribute) att;
                    cTerm = escape(catt.buffer(), catt.length());
                } else if (cl.isAssignableFrom(TermToBytesRefAttribute.class)) {
                    TermToBytesRefAttribute tatt = (TermToBytesRefAttribute) att;
                    char[] tTermChars = tatt.getBytesRef().utf8ToString().toCharArray();
                    tTerm = escape(tTermChars, tTermChars.length);
                } else {
                    if (tok.length() > 0)
                        tok.append(',');
                    if (cl.isAssignableFrom(FlagsAttribute.class)) {
                        tok.append("f=" + Integer.toHexString(((FlagsAttribute) att).getFlags()));
                    } else if (cl.isAssignableFrom(OffsetAttribute.class)) {
                        tok.append("s=" + ((OffsetAttribute) att).startOffset() + ",e="
                                + ((OffsetAttribute) att).endOffset());
                    } else if (cl.isAssignableFrom(PayloadAttribute.class)) {
                        BytesRef p = ((PayloadAttribute) att).getPayload();
                        if (p != null && p.length > 0) {
                            tok.append("p=" + bytesToHex(p.bytes, p.offset, p.length));
                        } else if (tok.length() > 0) {
                            tok.setLength(tok.length() - 1); // remove the last comma
                        }
                    } else if (cl.isAssignableFrom(PositionIncrementAttribute.class)) {
                        tok.append("i=" + ((PositionIncrementAttribute) att).getPositionIncrement());
                    } else if (cl.isAssignableFrom(TypeAttribute.class)) {
                        tok.append("y=" + escape(((TypeAttribute) att).type()));
                    } else {

                        tok.append(cl.getName() + "=" + escape(att.toString()));
                    }
                }
            }
            String term = null;
            if (cTerm != null) {
                term = cTerm;
            } else {
                term = tTerm;
            }
            if (term != null && term.length() > 0) {
                if (tok.length() > 0) {
                    tok.insert(0, term + ",");
                } else {
                    tok.insert(0, term);
                }
            }
            sb.append(tok);
        }
    }
    return sb.toString();
}

From source file:org.elasticsearch.analysis.common.ShingleTokenFilterTests.java

License:Apache License

public void testPreConfiguredShingleFilterDisableGraphAttribute() throws Exception {
    ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(
            Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
                    .put("index.analysis.filter.my_ascii_folding.type", "asciifolding").build(),
            new CommonAnalysisPlugin());
    TokenFilterFactory tokenFilter = analysis.tokenFilter.get("shingle");
    Tokenizer tokenizer = new WhitespaceTokenizer();
    tokenizer.setReader(new StringReader("this is a test"));
    TokenStream tokenStream = tokenFilter.create(tokenizer);
    assertTrue(tokenStream.hasAttribute(DisableGraphAttribute.class));
}

From source file:org.elasticsearch.search.highlight.PlainHighlighter.java

License:Apache License

public HighlightField highlight(HighlighterContext highlighterContext) {
    SearchContextHighlight.Field field = highlighterContext.field;
    SearchContext context = highlighterContext.context;
    FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
    FieldMapper<?> mapper = highlighterContext.mapper;

    Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML
            : HighlightUtils.Encoders.DEFAULT;

    if (!hitContext.cache().containsKey(CACHE_KEY)) {
        Map<FieldMapper<?>, org.apache.lucene.search.highlight.Highlighter> mappers = Maps.newHashMap();
        hitContext.cache().put(CACHE_KEY, mappers);
    }//from ww w  .j a  v a 2 s .  co  m
    @SuppressWarnings("unchecked")
    Map<FieldMapper<?>, org.apache.lucene.search.highlight.Highlighter> cache = (Map<FieldMapper<?>, org.apache.lucene.search.highlight.Highlighter>) hitContext
            .cache().get(CACHE_KEY);

    org.apache.lucene.search.highlight.Highlighter entry = cache.get(mapper);
    if (entry == null) {
        Query query = highlighterContext.query.originalQuery();
        QueryScorer queryScorer = new CustomQueryScorer(query,
                field.fieldOptions().requireFieldMatch() ? mapper.names().indexName() : null);
        queryScorer.setExpandMultiTermQuery(true);
        Fragmenter fragmenter;
        if (field.fieldOptions().numberOfFragments() == 0) {
            fragmenter = new NullFragmenter();
        } else if (field.fieldOptions().fragmenter() == null) {
            fragmenter = new SimpleSpanFragmenter(queryScorer, field.fieldOptions().fragmentCharSize());
        } else if ("simple".equals(field.fieldOptions().fragmenter())) {
            fragmenter = new SimpleFragmenter(field.fieldOptions().fragmentCharSize());
        } else if ("span".equals(field.fieldOptions().fragmenter())) {
            fragmenter = new SimpleSpanFragmenter(queryScorer, field.fieldOptions().fragmentCharSize());
        } else {
            throw new ElasticsearchIllegalArgumentException(
                    "unknown fragmenter option [" + field.fieldOptions().fragmenter() + "] for the field ["
                            + highlighterContext.fieldName + "]");
        }
        Formatter formatter = new SimpleHTMLFormatter(field.fieldOptions().preTags()[0],
                field.fieldOptions().postTags()[0]);

        entry = new org.apache.lucene.search.highlight.Highlighter(formatter, encoder, queryScorer);
        entry.setTextFragmenter(fragmenter);
        // always highlight across all data
        entry.setMaxDocCharsToAnalyze(Integer.MAX_VALUE);

        cache.put(mapper, entry);
    }

    // a HACK to make highlighter do highlighting, even though its using the single frag list builder
    int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ? 1
            : field.fieldOptions().numberOfFragments();
    ArrayList<TextFragment> fragsList = new ArrayList<TextFragment>();
    List<Object> textsToHighlight;

    try {
        textsToHighlight = HighlightUtils.loadFieldValues(field, mapper, context, hitContext);

        for (Object textToHighlight : textsToHighlight) {
            String text = textToHighlight.toString();
            Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().type()).mappers()
                    .indexAnalyzer();
            TokenStream tokenStream = analyzer.tokenStream(mapper.names().indexName(), text);
            if (!tokenStream.hasAttribute(CharTermAttribute.class)
                    || !tokenStream.hasAttribute(OffsetAttribute.class)) {
                // can't perform highlighting if the stream has no terms (binary token stream) or no offsets
                continue;
            }
            TextFragment[] bestTextFragments = entry.getBestTextFragments(tokenStream, text, false,
                    numberOfFragments);
            for (TextFragment bestTextFragment : bestTextFragments) {
                if (bestTextFragment != null && bestTextFragment.getScore() > 0) {
                    fragsList.add(bestTextFragment);
                }
            }
        }
    } catch (Exception e) {
        throw new FetchPhaseExecutionException(context,
                "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
    }
    if (field.fieldOptions().scoreOrdered()) {
        CollectionUtil.introSort(fragsList, new Comparator<TextFragment>() {
            public int compare(TextFragment o1, TextFragment o2) {
                return Math.round(o2.getScore() - o1.getScore());
            }
        });
    }
    String[] fragments;
    // number_of_fragments is set to 0 but we have a multivalued field
    if (field.fieldOptions().numberOfFragments() == 0 && textsToHighlight.size() > 1 && fragsList.size() > 0) {
        fragments = new String[fragsList.size()];
        for (int i = 0; i < fragsList.size(); i++) {
            fragments[i] = fragsList.get(i).toString();
        }
    } else {
        // refine numberOfFragments if needed
        numberOfFragments = fragsList.size() < numberOfFragments ? fragsList.size() : numberOfFragments;
        fragments = new String[numberOfFragments];
        for (int i = 0; i < fragments.length; i++) {
            fragments[i] = fragsList.get(i).toString();
        }
    }

    if (fragments.length > 0) {
        return new HighlightField(highlighterContext.fieldName, StringText.convertFromStringArray(fragments));
    }

    int noMatchSize = highlighterContext.field.fieldOptions().noMatchSize();
    if (noMatchSize > 0 && textsToHighlight.size() > 0) {
        // Pull an excerpt from the beginning of the string but make sure to split the string on a term boundary.
        String fieldContents = textsToHighlight.get(0).toString();
        Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().type()).mappers()
                .indexAnalyzer();
        int end;
        try {
            end = findGoodEndForNoHighlightExcerpt(noMatchSize,
                    analyzer.tokenStream(mapper.names().indexName(), fieldContents));
        } catch (Exception e) {
            throw new FetchPhaseExecutionException(context,
                    "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
        }
        if (end > 0) {
            return new HighlightField(highlighterContext.fieldName,
                    new Text[] { new StringText(fieldContents.substring(0, end)) });
        }
    }
    return null;
}

From source file:org.elasticsearch.search.highlight.PlainHighlighter.java

License:Apache License

private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, TokenStream tokenStream)
        throws IOException {
    try {/*from w  w w. jav a  2  s.com*/
        if (!tokenStream.hasAttribute(OffsetAttribute.class)) {
            // Can't split on term boundaries without offsets
            return -1;
        }
        int end = -1;
        tokenStream.reset();
        while (tokenStream.incrementToken()) {
            OffsetAttribute attr = tokenStream.getAttribute(OffsetAttribute.class);
            if (attr.endOffset() >= noMatchSize) {
                // Jump to the end of this token if it wouldn't put us past the boundary
                if (attr.endOffset() == noMatchSize) {
                    end = noMatchSize;
                }
                return end;
            }
            end = attr.endOffset();
        }
        // We've exhausted the token stream so we should just highlight everything.
        return end;
    } finally {
        tokenStream.end();
        tokenStream.close();
    }
}

From source file:org.fastcatsearch.ir.index.SearchIndexWriter.java

License:Apache License

private void indexValue(int docNo, int i, Object value, boolean isIgnoreCase, int positionIncrementGap)
        throws IOException, IRException {
    if (value == null) {
        return;/* w  w w. ja  va2s . c  om*/
    }
    char[] fieldValue = value.toString().toCharArray();
    TokenStream tokenStream = indexAnalyzerList[i].tokenStream(indexId, new CharArrayReader(fieldValue),
            indexingAnalyzerOption);
    tokenStream.reset();
    CharsRefTermAttribute termAttribute = null;
    PositionIncrementAttribute positionAttribute = null;
    StopwordAttribute stopwordAttribute = null;
    AdditionalTermAttribute additionalTermAttribute = null;
    CharTermAttribute charTermAttribute = null;
    //? ?  .

    if (tokenStream.hasAttribute(CharsRefTermAttribute.class)) {
        termAttribute = tokenStream.getAttribute(CharsRefTermAttribute.class);
    }
    if (tokenStream.hasAttribute(PositionIncrementAttribute.class)) {
        positionAttribute = tokenStream.getAttribute(PositionIncrementAttribute.class);
    }
    if (tokenStream.hasAttribute(AdditionalTermAttribute.class)) {
        additionalTermAttribute = tokenStream.getAttribute(AdditionalTermAttribute.class);
    }

    // stopword .
    if (tokenStream.hasAttribute(StopwordAttribute.class)) {
        stopwordAttribute = tokenStream.getAttribute(StopwordAttribute.class);
    }
    if (tokenStream.hasAttribute(CharTermAttribute.class)) {
        charTermAttribute = tokenStream.getAttribute(CharTermAttribute.class);
    }

    int lastPosition = 0;

    while (tokenStream.incrementToken()) {
        CharVector key = null;
        if (termAttribute != null) {
            CharsRef charRef = termAttribute.charsRef();
            char[] buffer = new char[charRef.length()];
            System.arraycopy(charRef.chars, charRef.offset, buffer, 0, charRef.length);
            key = new CharVector(buffer, 0, buffer.length);
        } else {
            key = new CharVector(charTermAttribute.buffer(), 0, charTermAttribute.length());
        }

        int position = -1;
        if (positionAttribute != null) {
            position = positionAttribute.getPositionIncrement() + positionIncrementGap;
            lastPosition = position;
        }
        //         logger.debug("FIELD#{}: {} >> {} ({})", indexId, key, docNo, position);
        if (stopwordAttribute != null && stopwordAttribute.isStopword()) {
            //ignore
        } else {
            memoryPosting.add(key, docNo, position);
        }
        //         if(synonymAttribute != null) {
        //            CharVector[] synonym = synonymAttribute.getSynonym();
        //            if(synonym != null) {
        //               for(CharVector token : synonym) {
        //                  memoryPosting.add(token, docNo, position);
        //               }
        //            }
        //         }
        if (additionalTermAttribute != null && additionalTermAttribute.size() > 0) {
            Iterator<String> iter = additionalTermAttribute.iterateAdditionalTerms();
            while (iter.hasNext()) {
                CharVector token = new CharVector(iter.next().toCharArray());
                memoryPosting.add(token, docNo, lastPosition);
            }
        }
    }
}

From source file:org.fastcatsearch.plugin.analysis.RunAnalyzer.java

public static void main(String[] args) throws IOException {
    if (args.length != 3) {
        printUsage();/*from   w ww.j av a 2  s .  co  m*/
        System.exit(0);
    }

    File pluginDir = new File(args[0]);
    String pluginClassName = args[1];
    String analyzerId = args[2];
    RunAnalyzer runAnalyzer = new RunAnalyzer(pluginDir, pluginClassName);
    AnalyzerPool analyzerPool = runAnalyzer.getAnalyzerPool(analyzerId);
    Analyzer analyzer = null;

    try {
        analyzer = analyzerPool.getFromPool();
        //? ? ? ?.

        Scanner sc = new Scanner(System.in);
        System.out.println("==================================");
        System.out.println(" Fastcat analyzer");
        System.out.println(" Enter 'quit' for exit program. ");
        System.out.println("==================================");
        System.out.print("Input String: ");
        while (sc.hasNextLine()) {
            String str = sc.nextLine();
            if (str.equalsIgnoreCase("quit")) {
                break;
            }
            try {
                char[] value = str.toCharArray();
                TokenStream tokenStream = analyzer.tokenStream("", new CharArrayReader(value),
                        new AnalyzerOption());
                tokenStream.reset();

                CharsRefTermAttribute termAttribute = null;
                if (tokenStream.hasAttribute(CharsRefTermAttribute.class)) {
                    termAttribute = tokenStream.getAttribute(CharsRefTermAttribute.class);
                }
                SynonymAttribute synonymAttribute = null;
                if (tokenStream.hasAttribute(SynonymAttribute.class)) {
                    synonymAttribute = tokenStream.getAttribute(SynonymAttribute.class);
                }
                AdditionalTermAttribute additionalTermAttribute = null;
                if (tokenStream.hasAttribute(AdditionalTermAttribute.class)) {
                    additionalTermAttribute = tokenStream.getAttribute(AdditionalTermAttribute.class);
                }

                StopwordAttribute stopwordAttribute = null;
                if (tokenStream.hasAttribute(StopwordAttribute.class)) {
                    stopwordAttribute = tokenStream.getAttribute(StopwordAttribute.class);
                }

                CharTermAttribute charTermAttribute = tokenStream.getAttribute(CharTermAttribute.class);

                while (tokenStream.incrementToken()) {
                    String word = "";
                    //? ??  CharsRefTermAttribute ? .
                    if (termAttribute != null) {
                        word = termAttribute.toString();
                    } else {
                        //CharsRefTermAttribute ?   ??  CharTermAttribute ?  ?.
                        word = charTermAttribute.toString();
                    }

                    // ?? .
                    if (stopwordAttribute.isStopword()) {
                        continue;
                    }

                    //
                    // ??  .
                    //
                    System.out.print(">> ");
                    System.out.println(word);

                    //   .
                    if (synonymAttribute != null) {
                        List synonyms = synonymAttribute.getSynonyms();
                        if (synonyms != null) {
                            for (Object synonymObj : synonyms) {
                                if (synonymObj instanceof CharVector) {
                                    CharVector synonym = (CharVector) synonymObj;
                                    System.out.print("S> ");
                                    System.out.println(synonym);
                                } else if (synonymObj instanceof List) {
                                    List synonymList = (List) synonymObj;
                                    for (Object synonym : synonymList) {
                                        System.out.print("S> ");
                                        System.out.println(synonym);
                                    }
                                }
                            }
                        }
                    }

                    //  .
                    // ??? ? ?  ?? ?, ??  .
                    if (additionalTermAttribute != null && additionalTermAttribute.size() > 0) {
                        Iterator<String> termIter = additionalTermAttribute.iterateAdditionalTerms();
                        while (termIter.hasNext()) {
                            String token = termIter.next();
                            System.out.print("A> ");
                            System.out.println(word);
                        }
                    }
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
            System.out.print("Input String: ");
        }
    } finally {
        if (analyzer != null) {
            analyzerPool.releaseToPool(analyzer);
        }
    }
    System.out.print("Bye!");
}

From source file:org.sc.probro.lucene.BiothesaurusSearcher.java

License:Apache License

public String[] tokenize(String input) {
    ArrayList<String> tokens = new ArrayList<String>();
    try {// w w w . j  av  a  2  s .  c om
        TokenStream stream = analyzer.tokenStream(null, new StringReader(input));
        TermAttribute termattr = (TermAttribute) stream.getAttribute(TermAttribute.class);
        //stream = new LowerCaseFilter(stream);

        stream.reset();

        while (stream.incrementToken()) {
            if (stream.hasAttribute(TermAttribute.class)) {
                String term = termattr.term();
                tokens.add(term);
            }
        }

        stream.end();
        stream.close();

    } catch (IllegalArgumentException e) {
        System.err.println(String.format("Phrase: \"%s\"", input));
        e.printStackTrace(System.err);
    } catch (IOException e) {
        System.err.println(String.format("Phrase: \"%s\"", input));
        e.printStackTrace();
    }

    return tokens.toArray(new String[0]);
}