Example usage for org.apache.lucene.analysis TokenStream hasAttribute

List of usage examples for org.apache.lucene.analysis TokenStream hasAttribute

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream hasAttribute.

Prototype

public final boolean hasAttribute(Class<? extends Attribute> attClass) 

Source Link

Document

The caller must pass in a Class<?

Usage

From source file:org.alfresco.solr.query.Solr4QueryParser.java

License:Open Source License

/**
 * @param first/* www  .j  ava 2 s.  co  m*/
 * @param field
 * @return SpanOrQuery
 * @throws IOException
 */
private SpanQuery buildSpanOrQuery(String first, FieldInstance field) throws IOException {
    ArrayList<SpanQuery> spanOrQueryParts = new ArrayList<SpanQuery>();

    PackedTokenAttributeImpl nextToken;
    TokenStream source = null;

    try {
        source = getAnalyzer().tokenStream(field.getField(), new StringReader(first));
        source.reset();
        while (source.incrementToken()) {
            CharTermAttribute cta = source.getAttribute(CharTermAttribute.class);
            OffsetAttribute offsetAtt = source.getAttribute(OffsetAttribute.class);
            TypeAttribute typeAtt = null;
            if (source.hasAttribute(TypeAttribute.class)) {
                typeAtt = source.getAttribute(TypeAttribute.class);
            }
            PositionIncrementAttribute posIncAtt = null;
            if (source.hasAttribute(PositionIncrementAttribute.class)) {
                posIncAtt = source.getAttribute(PositionIncrementAttribute.class);
            }
            nextToken = new PackedTokenAttributeImpl();
            nextToken.setEmpty().copyBuffer(cta.buffer(), 0, cta.length());
            nextToken.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
            if (typeAtt != null) {
                nextToken.setType(typeAtt.type());
            }
            if (posIncAtt != null) {
                nextToken.setPositionIncrement(posIncAtt.getPositionIncrement());
            }

            SpanQuery termQuery = new SpanTermQuery(new Term(field.getField(), nextToken.toString()));
            spanOrQueryParts.add(termQuery);
        }
    } finally {
        try {
            if (source != null) {
                source.close();
            }
        } catch (IOException e) {
            // ignore
        }
    }

    if (spanOrQueryParts.size() == 1) {
        return spanOrQueryParts.get(0);
    } else {
        return new SpanOrQuery(spanOrQueryParts.toArray(new SpanQuery[] {}));
    }
}

From source file:org.apache.solr.handler.component.WordCloudComponent.java

License:Apache License

@Override
public void process(ResponseBuilder rb) throws IOException {
    SolrQueryRequest req = rb.req;//from ww w .j  a v  a2s .  c o  m
    SolrParams params = req.getParams();
    if (!params.getBool(COMPONENT_NAME, true)) {
        return;
    }

    String wcFields = null;
    if ((wcFields = params.get("wordcloud.fl", null)) == null) {
        return;
    }

    Set<String> flds = new HashSet<String>(StrUtils.splitSmart(wcFields, ','));
    DocList ids = rb.getResults().docList;

    SolrIndexSearcher searcher = rb.req.getSearcher();
    IndexSchema schema = rb.req.getCore().getLatestSchema();

    final Analyzer analyzer = rb.req.getCore().getLatestSchema().getAnalyzer();
    final HashMap<String, FieldType> fieldsToLoad = new HashMap<String, FieldType>();

    CharTermAttribute termAtt;
    Map<String, Map<String, Integer>> tokens = new HashMap<String, Map<String, Integer>>();

    for (String f : flds) {
        SchemaField field = schema.getFieldOrNull(f);
        if (field == null || !field.stored()) {
            continue; // ignore this field
        }
        fieldsToLoad.put(f, field.getType());
        tokens.put(f, new HashMap<String, Integer>());
    }

    DocIterator iterator = ids.iterator();
    String w;
    Integer v;
    int sz = ids.size();
    for (int i = 0; i < sz; i++) {
        int id = iterator.nextDoc();
        Document doc = searcher.doc(id, fieldsToLoad.keySet());
        for (Entry<String, FieldType> en : fieldsToLoad.entrySet()) {
            Map<String, Integer> toks = tokens.get(en.getKey());
            String[] vals = doc.getValues(en.getKey());
            FieldType fType = en.getValue();

            if (vals != null) {
                for (String s : vals) {
                    TokenStream buffer = analyzer.tokenStream(en.getKey(),
                            new StringReader(fType.indexedToReadable(s)));

                    if (!buffer.hasAttribute(CharTermAttribute.class)) {
                        continue; // empty stream
                    }

                    termAtt = buffer.getAttribute(CharTermAttribute.class);
                    buffer.reset();

                    while (buffer.incrementToken()) {
                        w = termAtt.toString();
                        v = toks.get(w);
                        if (v == null)
                            v = 0;
                        toks.put(w, ++v);
                    }

                    buffer.close();
                }
            }
        }
    }

    // TODO: filter out the tokens (use some sort of a range 0.1-0.9 by frequency)

    AtomicReader reader = searcher.getAtomicReader();
    BytesRef term;
    int df;
    String f;

    Map<String, Map<String, Double>> docFreqs = new HashMap<String, Map<String, Double>>();
    for (Entry<String, Map<String, Integer>> field : tokens.entrySet()) {
        HashMap<String, Double> idfs = new HashMap<String, Double>();
        f = field.getKey();
        docFreqs.put(f, idfs);
        int N = reader.getDocCount(f);

        for (Entry<String, Integer> token : field.getValue().entrySet()) {
            w = token.getKey();
            df = reader.docFreq(new Term(f, new BytesRef(w)));
            if (df != 0) {
                idfs.put(w, Math.log10(N / df));
            }
        }
    }

    HashMap<String, Object> ret = new HashMap<String, Object>();
    for (String fi : fieldsToLoad.keySet()) {
        HashMap<String, Object> va = new HashMap<String, Object>();
        va.put("tf", tokens.get(fi));
        va.put("idf", docFreqs.get(fi));
        ret.put(fi, va);
    }
    rb.rsp.add("wordcloud", ret);

}

From source file:org.apache.solr.schema.JsonPreAnalyzedParser.java

License:Apache License

@Override
public String toFormattedString(Field f) throws IOException {
    Map<String, Object> map = new LinkedHashMap<String, Object>();
    map.put(VERSION_KEY, VERSION);//from   www. j av a2s .com
    if (f.fieldType().stored()) {
        String stringValue = f.stringValue();
        if (stringValue != null) {
            map.put(STRING_KEY, stringValue);
        }
        BytesRef binaryValue = f.binaryValue();
        if (binaryValue != null) {
            map.put(BINARY_KEY,
                    Base64.byteArrayToBase64(binaryValue.bytes, binaryValue.offset, binaryValue.length));
        }
    }
    TokenStream ts = f.tokenStreamValue();
    if (ts != null) {
        List<Map<String, Object>> tokens = new LinkedList<Map<String, Object>>();
        while (ts.incrementToken()) {
            Iterator<Class<? extends Attribute>> it = ts.getAttributeClassesIterator();
            String cTerm = null;
            String tTerm = null;
            Map<String, Object> tok = new TreeMap<String, Object>();
            while (it.hasNext()) {
                Class<? extends Attribute> cl = it.next();
                if (!ts.hasAttribute(cl)) {
                    continue;
                }
                Attribute att = ts.getAttribute(cl);
                if (cl.isAssignableFrom(CharTermAttribute.class)) {
                    CharTermAttribute catt = (CharTermAttribute) att;
                    cTerm = new String(catt.buffer(), 0, catt.length());
                } else if (cl.isAssignableFrom(TermToBytesRefAttribute.class)) {
                    TermToBytesRefAttribute tatt = (TermToBytesRefAttribute) att;
                    tTerm = tatt.getBytesRef().utf8ToString();
                } else {
                    if (cl.isAssignableFrom(FlagsAttribute.class)) {
                        tok.put(FLAGS_KEY, Integer.toHexString(((FlagsAttribute) att).getFlags()));
                    } else if (cl.isAssignableFrom(OffsetAttribute.class)) {
                        tok.put(OFFSET_START_KEY, ((OffsetAttribute) att).startOffset());
                        tok.put(OFFSET_END_KEY, ((OffsetAttribute) att).endOffset());
                    } else if (cl.isAssignableFrom(PayloadAttribute.class)) {
                        BytesRef p = ((PayloadAttribute) att).getPayload();
                        if (p != null && p.length > 0) {
                            tok.put(PAYLOAD_KEY, Base64.byteArrayToBase64(p.bytes, p.offset, p.length));
                        }
                    } else if (cl.isAssignableFrom(PositionIncrementAttribute.class)) {
                        tok.put(POSINCR_KEY, ((PositionIncrementAttribute) att).getPositionIncrement());
                    } else if (cl.isAssignableFrom(TypeAttribute.class)) {
                        tok.put(TYPE_KEY, ((TypeAttribute) att).type());
                    } else {
                        tok.put(cl.getName(), att.toString());
                    }
                }
            }
            String term = null;
            if (cTerm != null) {
                term = cTerm;
            } else {
                term = tTerm;
            }
            if (term != null && term.length() > 0) {
                tok.put(TOKEN_KEY, term);
            }
            tokens.add(tok);
        }
        map.put(TOKENS_KEY, tokens);
    }
    return JSONUtil.toJSON(map, -1);
}

From source file:org.apache.solr.schema.SimplePreAnalyzedParser.java

License:Apache License

@Override
public String toFormattedString(Field f) throws IOException {
    StringBuilder sb = new StringBuilder();
    sb.append(VERSION + " ");
    if (f.fieldType().stored()) {
        String s = f.stringValue();
        if (s != null) {
            // encode the equals sign
            s = s.replaceAll("=", "\\=");
            sb.append('=');
            sb.append(s);//from   www. j  ava2s .com
            sb.append('=');
        }
    }
    TokenStream ts = f.tokenStreamValue();
    if (ts != null) {
        StringBuilder tok = new StringBuilder();
        boolean next = false;
        while (ts.incrementToken()) {
            if (next) {
                sb.append(' ');
            } else {
                next = true;
            }
            tok.setLength(0);
            Iterator<Class<? extends Attribute>> it = ts.getAttributeClassesIterator();
            String cTerm = null;
            String tTerm = null;
            while (it.hasNext()) {
                Class<? extends Attribute> cl = it.next();
                if (!ts.hasAttribute(cl)) {
                    continue;
                }
                Attribute att = ts.getAttribute(cl);
                if (cl.isAssignableFrom(CharTermAttribute.class)) {
                    CharTermAttribute catt = (CharTermAttribute) att;
                    cTerm = escape(catt.buffer(), catt.length());
                } else if (cl.isAssignableFrom(TermToBytesRefAttribute.class)) {
                    TermToBytesRefAttribute tatt = (TermToBytesRefAttribute) att;
                    char[] tTermChars = tatt.getBytesRef().utf8ToString().toCharArray();
                    tTerm = escape(tTermChars, tTermChars.length);
                } else {
                    if (tok.length() > 0)
                        tok.append(',');
                    if (cl.isAssignableFrom(FlagsAttribute.class)) {
                        tok.append("f=" + Integer.toHexString(((FlagsAttribute) att).getFlags()));
                    } else if (cl.isAssignableFrom(OffsetAttribute.class)) {
                        tok.append("s=" + ((OffsetAttribute) att).startOffset() + ",e="
                                + ((OffsetAttribute) att).endOffset());
                    } else if (cl.isAssignableFrom(PayloadAttribute.class)) {
                        BytesRef p = ((PayloadAttribute) att).getPayload();
                        if (p != null && p.length > 0) {
                            tok.append("p=" + bytesToHex(p.bytes, p.offset, p.length));
                        } else if (tok.length() > 0) {
                            tok.setLength(tok.length() - 1); // remove the last comma
                        }
                    } else if (cl.isAssignableFrom(PositionIncrementAttribute.class)) {
                        tok.append("i=" + ((PositionIncrementAttribute) att).getPositionIncrement());
                    } else if (cl.isAssignableFrom(TypeAttribute.class)) {
                        tok.append("y=" + escape(((TypeAttribute) att).type()));
                    } else {

                        tok.append(cl.getName() + "=" + escape(att.toString()));
                    }
                }
            }
            String term = null;
            if (cTerm != null) {
                term = cTerm;
            } else {
                term = tTerm;
            }
            if (term != null && term.length() > 0) {
                if (tok.length() > 0) {
                    tok.insert(0, term + ",");
                } else {
                    tok.insert(0, term);
                }
            }
            sb.append(tok);
        }
    }
    return sb.toString();
}

From source file:org.elasticsearch.analysis.common.ShingleTokenFilterTests.java

License:Apache License

public void testPreConfiguredShingleFilterDisableGraphAttribute() throws Exception {
    ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(
            Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
                    .put("index.analysis.filter.my_ascii_folding.type", "asciifolding").build(),
            new CommonAnalysisPlugin());
    TokenFilterFactory tokenFilter = analysis.tokenFilter.get("shingle");
    Tokenizer tokenizer = new WhitespaceTokenizer();
    tokenizer.setReader(new StringReader("this is a test"));
    TokenStream tokenStream = tokenFilter.create(tokenizer);
    assertTrue(tokenStream.hasAttribute(DisableGraphAttribute.class));
}

From source file:org.elasticsearch.search.highlight.PlainHighlighter.java

License:Apache License

public HighlightField highlight(HighlighterContext highlighterContext) {
    SearchContextHighlight.Field field = highlighterContext.field;
    SearchContext context = highlighterContext.context;
    FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
    FieldMapper<?> mapper = highlighterContext.mapper;

    Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML
            : HighlightUtils.Encoders.DEFAULT;

    if (!hitContext.cache().containsKey(CACHE_KEY)) {
        Map<FieldMapper<?>, org.apache.lucene.search.highlight.Highlighter> mappers = Maps.newHashMap();
        hitContext.cache().put(CACHE_KEY, mappers);
    }//from ww w  .j a  v a 2 s .  co  m
    @SuppressWarnings("unchecked")
    Map<FieldMapper<?>, org.apache.lucene.search.highlight.Highlighter> cache = (Map<FieldMapper<?>, org.apache.lucene.search.highlight.Highlighter>) hitContext
            .cache().get(CACHE_KEY);

    org.apache.lucene.search.highlight.Highlighter entry = cache.get(mapper);
    if (entry == null) {
        Query query = highlighterContext.query.originalQuery();
        QueryScorer queryScorer = new CustomQueryScorer(query,
                field.fieldOptions().requireFieldMatch() ? mapper.names().indexName() : null);
        queryScorer.setExpandMultiTermQuery(true);
        Fragmenter fragmenter;
        if (field.fieldOptions().numberOfFragments() == 0) {
            fragmenter = new NullFragmenter();
        } else if (field.fieldOptions().fragmenter() == null) {
            fragmenter = new SimpleSpanFragmenter(queryScorer, field.fieldOptions().fragmentCharSize());
        } else if ("simple".equals(field.fieldOptions().fragmenter())) {
            fragmenter = new SimpleFragmenter(field.fieldOptions().fragmentCharSize());
        } else if ("span".equals(field.fieldOptions().fragmenter())) {
            fragmenter = new SimpleSpanFragmenter(queryScorer, field.fieldOptions().fragmentCharSize());
        } else {
            throw new ElasticsearchIllegalArgumentException(
                    "unknown fragmenter option [" + field.fieldOptions().fragmenter() + "] for the field ["
                            + highlighterContext.fieldName + "]");
        }
        Formatter formatter = new SimpleHTMLFormatter(field.fieldOptions().preTags()[0],
                field.fieldOptions().postTags()[0]);

        entry = new org.apache.lucene.search.highlight.Highlighter(formatter, encoder, queryScorer);
        entry.setTextFragmenter(fragmenter);
        // always highlight across all data
        entry.setMaxDocCharsToAnalyze(Integer.MAX_VALUE);

        cache.put(mapper, entry);
    }

    // a HACK to make highlighter do highlighting, even though its using the single frag list builder
    int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ? 1
            : field.fieldOptions().numberOfFragments();
    ArrayList<TextFragment> fragsList = new ArrayList<TextFragment>();
    List<Object> textsToHighlight;

    try {
        textsToHighlight = HighlightUtils.loadFieldValues(field, mapper, context, hitContext);

        for (Object textToHighlight : textsToHighlight) {
            String text = textToHighlight.toString();
            Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().type()).mappers()
                    .indexAnalyzer();
            TokenStream tokenStream = analyzer.tokenStream(mapper.names().indexName(), text);
            if (!tokenStream.hasAttribute(CharTermAttribute.class)
                    || !tokenStream.hasAttribute(OffsetAttribute.class)) {
                // can't perform highlighting if the stream has no terms (binary token stream) or no offsets
                continue;
            }
            TextFragment[] bestTextFragments = entry.getBestTextFragments(tokenStream, text, false,
                    numberOfFragments);
            for (TextFragment bestTextFragment : bestTextFragments) {
                if (bestTextFragment != null && bestTextFragment.getScore() > 0) {
                    fragsList.add(bestTextFragment);
                }
            }
        }
    } catch (Exception e) {
        throw new FetchPhaseExecutionException(context,
                "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
    }
    if (field.fieldOptions().scoreOrdered()) {
        CollectionUtil.introSort(fragsList, new Comparator<TextFragment>() {
            public int compare(TextFragment o1, TextFragment o2) {
                return Math.round(o2.getScore() - o1.getScore());
            }
        });
    }
    String[] fragments;
    // number_of_fragments is set to 0 but we have a multivalued field
    if (field.fieldOptions().numberOfFragments() == 0 && textsToHighlight.size() > 1 && fragsList.size() > 0) {
        fragments = new String[fragsList.size()];
        for (int i = 0; i < fragsList.size(); i++) {
            fragments[i] = fragsList.get(i).toString();
        }
    } else {
        // refine numberOfFragments if needed
        numberOfFragments = fragsList.size() < numberOfFragments ? fragsList.size() : numberOfFragments;
        fragments = new String[numberOfFragments];
        for (int i = 0; i < fragments.length; i++) {
            fragments[i] = fragsList.get(i).toString();
        }
    }

    if (fragments.length > 0) {
        return new HighlightField(highlighterContext.fieldName, StringText.convertFromStringArray(fragments));
    }

    int noMatchSize = highlighterContext.field.fieldOptions().noMatchSize();
    if (noMatchSize > 0 && textsToHighlight.size() > 0) {
        // Pull an excerpt from the beginning of the string but make sure to split the string on a term boundary.
        String fieldContents = textsToHighlight.get(0).toString();
        Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().type()).mappers()
                .indexAnalyzer();
        int end;
        try {
            end = findGoodEndForNoHighlightExcerpt(noMatchSize,
                    analyzer.tokenStream(mapper.names().indexName(), fieldContents));
        } catch (Exception e) {
            throw new FetchPhaseExecutionException(context,
                    "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
        }
        if (end > 0) {
            return new HighlightField(highlighterContext.fieldName,
                    new Text[] { new StringText(fieldContents.substring(0, end)) });
        }
    }
    return null;
}

From source file:org.elasticsearch.search.highlight.PlainHighlighter.java

License:Apache License

private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, TokenStream tokenStream)
        throws IOException {
    try {/*from w  w w. jav a  2  s.com*/
        if (!tokenStream.hasAttribute(OffsetAttribute.class)) {
            // Can't split on term boundaries without offsets
            return -1;
        }
        int end = -1;
        tokenStream.reset();
        while (tokenStream.incrementToken()) {
            OffsetAttribute attr = tokenStream.getAttribute(OffsetAttribute.class);
            if (attr.endOffset() >= noMatchSize) {
                // Jump to the end of this token if it wouldn't put us past the boundary
                if (attr.endOffset() == noMatchSize) {
                    end = noMatchSize;
                }
                return end;
            }
            end = attr.endOffset();
        }
        // We've exhausted the token stream so we should just highlight everything.
        return end;
    } finally {
        tokenStream.end();
        tokenStream.close();
    }
}

From source file:org.fastcatsearch.ir.index.SearchIndexWriter.java

License:Apache License

private void indexValue(int docNo, int i, Object value, boolean isIgnoreCase, int positionIncrementGap)
        throws IOException, IRException {
    if (value == null) {
        return;/* w  w w. ja  va2s . c  om*/
    }
    char[] fieldValue = value.toString().toCharArray();
    TokenStream tokenStream = indexAnalyzerList[i].tokenStream(indexId, new CharArrayReader(fieldValue),
            indexingAnalyzerOption);
    tokenStream.reset();
    CharsRefTermAttribute termAttribute = null;
    PositionIncrementAttribute positionAttribute = null;
    StopwordAttribute stopwordAttribute = null;
    AdditionalTermAttribute additionalTermAttribute = null;
    CharTermAttribute charTermAttribute = null;
    //? ?  .

    if (tokenStream.hasAttribute(CharsRefTermAttribute.class)) {
        termAttribute = tokenStream.getAttribute(CharsRefTermAttribute.class);
    }
    if (tokenStream.hasAttribute(PositionIncrementAttribute.class)) {
        positionAttribute = tokenStream.getAttribute(PositionIncrementAttribute.class);
    }
    if (tokenStream.hasAttribute(AdditionalTermAttribute.class)) {
        additionalTermAttribute = tokenStream.getAttribute(AdditionalTermAttribute.class);
    }

    // stopword .
    if (tokenStream.hasAttribute(StopwordAttribute.class)) {
        stopwordAttribute = tokenStream.getAttribute(StopwordAttribute.class);
    }
    if (tokenStream.hasAttribute(CharTermAttribute.class)) {
        charTermAttribute = tokenStream.getAttribute(CharTermAttribute.class);
    }

    int lastPosition = 0;

    while (tokenStream.incrementToken()) {
        CharVector key = null;
        if (termAttribute != null) {
            CharsRef charRef = termAttribute.charsRef();
            char[] buffer = new char[charRef.length()];
            System.arraycopy(charRef.chars, charRef.offset, buffer, 0, charRef.length);
            key = new CharVector(buffer, 0, buffer.length);
        } else {
            key = new CharVector(charTermAttribute.buffer(), 0, charTermAttribute.length());
        }

        int position = -1;
        if (positionAttribute != null) {
            position = positionAttribute.getPositionIncrement() + positionIncrementGap;
            lastPosition = position;
        }
        //         logger.debug("FIELD#{}: {} >> {} ({})", indexId, key, docNo, position);
        if (stopwordAttribute != null && stopwordAttribute.isStopword()) {
            //ignore
        } else {
            memoryPosting.add(key, docNo, position);
        }
        //         if(synonymAttribute != null) {
        //            CharVector[] synonym = synonymAttribute.getSynonym();
        //            if(synonym != null) {
        //               for(CharVector token : synonym) {
        //                  memoryPosting.add(token, docNo, position);
        //               }
        //            }
        //         }
        if (additionalTermAttribute != null && additionalTermAttribute.size() > 0) {
            Iterator<String> iter = additionalTermAttribute.iterateAdditionalTerms();
            while (iter.hasNext()) {
                CharVector token = new CharVector(iter.next().toCharArray());
                memoryPosting.add(token, docNo, lastPosition);
            }
        }
    }
}

From source file:org.fastcatsearch.plugin.analysis.RunAnalyzer.java

public static void main(String[] args) throws IOException {
    if (args.length != 3) {
        printUsage();/*from   w ww.j av a 2  s .  co  m*/
        System.exit(0);
    }

    File pluginDir = new File(args[0]);
    String pluginClassName = args[1];
    String analyzerId = args[2];
    RunAnalyzer runAnalyzer = new RunAnalyzer(pluginDir, pluginClassName);
    AnalyzerPool analyzerPool = runAnalyzer.getAnalyzerPool(analyzerId);
    Analyzer analyzer = null;

    try {
        analyzer = analyzerPool.getFromPool();
        //? ? ? ?.

        Scanner sc = new Scanner(System.in);
        System.out.println("==================================");
        System.out.println(" Fastcat analyzer");
        System.out.println(" Enter 'quit' for exit program. ");
        System.out.println("==================================");
        System.out.print("Input String: ");
        while (sc.hasNextLine()) {
            String str = sc.nextLine();
            if (str.equalsIgnoreCase("quit")) {
                break;
            }
            try {
                char[] value = str.toCharArray();
                TokenStream tokenStream = analyzer.tokenStream("", new CharArrayReader(value),
                        new AnalyzerOption());
                tokenStream.reset();

                CharsRefTermAttribute termAttribute = null;
                if (tokenStream.hasAttribute(CharsRefTermAttribute.class)) {
                    termAttribute = tokenStream.getAttribute(CharsRefTermAttribute.class);
                }
                SynonymAttribute synonymAttribute = null;
                if (tokenStream.hasAttribute(SynonymAttribute.class)) {
                    synonymAttribute = tokenStream.getAttribute(SynonymAttribute.class);
                }
                AdditionalTermAttribute additionalTermAttribute = null;
                if (tokenStream.hasAttribute(AdditionalTermAttribute.class)) {
                    additionalTermAttribute = tokenStream.getAttribute(AdditionalTermAttribute.class);
                }

                StopwordAttribute stopwordAttribute = null;
                if (tokenStream.hasAttribute(StopwordAttribute.class)) {
                    stopwordAttribute = tokenStream.getAttribute(StopwordAttribute.class);
                }

                CharTermAttribute charTermAttribute = tokenStream.getAttribute(CharTermAttribute.class);

                while (tokenStream.incrementToken()) {
                    String word = "";
                    //? ??  CharsRefTermAttribute ? .
                    if (termAttribute != null) {
                        word = termAttribute.toString();
                    } else {
                        //CharsRefTermAttribute ?   ??  CharTermAttribute ?  ?.
                        word = charTermAttribute.toString();
                    }

                    // ?? .
                    if (stopwordAttribute.isStopword()) {
                        continue;
                    }

                    //
                    // ??  .
                    //
                    System.out.print(">> ");
                    System.out.println(word);

                    //   .
                    if (synonymAttribute != null) {
                        List synonyms = synonymAttribute.getSynonyms();
                        if (synonyms != null) {
                            for (Object synonymObj : synonyms) {
                                if (synonymObj instanceof CharVector) {
                                    CharVector synonym = (CharVector) synonymObj;
                                    System.out.print("S> ");
                                    System.out.println(synonym);
                                } else if (synonymObj instanceof List) {
                                    List synonymList = (List) synonymObj;
                                    for (Object synonym : synonymList) {
                                        System.out.print("S> ");
                                        System.out.println(synonym);
                                    }
                                }
                            }
                        }
                    }

                    //  .
                    // ??? ? ?  ?? ?, ??  .
                    if (additionalTermAttribute != null && additionalTermAttribute.size() > 0) {
                        Iterator<String> termIter = additionalTermAttribute.iterateAdditionalTerms();
                        while (termIter.hasNext()) {
                            String token = termIter.next();
                            System.out.print("A> ");
                            System.out.println(word);
                        }
                    }
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
            System.out.print("Input String: ");
        }
    } finally {
        if (analyzer != null) {
            analyzerPool.releaseToPool(analyzer);
        }
    }
    System.out.print("Bye!");
}

From source file:org.sc.probro.lucene.BiothesaurusSearcher.java

License:Apache License

public String[] tokenize(String input) {
    ArrayList<String> tokens = new ArrayList<String>();
    try {// w w w . j  av  a  2  s .  c om
        TokenStream stream = analyzer.tokenStream(null, new StringReader(input));
        TermAttribute termattr = (TermAttribute) stream.getAttribute(TermAttribute.class);
        //stream = new LowerCaseFilter(stream);

        stream.reset();

        while (stream.incrementToken()) {
            if (stream.hasAttribute(TermAttribute.class)) {
                String term = termattr.term();
                tokens.add(term);
            }
        }

        stream.end();
        stream.close();

    } catch (IllegalArgumentException e) {
        System.err.println(String.format("Phrase: \"%s\"", input));
        e.printStackTrace(System.err);
    } catch (IOException e) {
        System.err.println(String.format("Phrase: \"%s\"", input));
        e.printStackTrace();
    }

    return tokens.toArray(new String[0]);
}