Example usage for org.apache.lucene.analysis TokenStream reset

List of usage examples for org.apache.lucene.analysis TokenStream reset

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream reset.

Prototype

public void reset() throws IOException 

Source Link

Document

This method is called by a consumer before it begins consumption using #incrementToken() .

Usage

From source file:org.exist.indexing.lucene.XMLToQuery.java

License:Open Source License

private SpanQuery nearQuery(String field, Element node, Analyzer analyzer) throws XPathException {
    int slop = getSlop(node);
    if (slop < 0)
        slop = 0;// w  ww.  j  ava2s. c  o  m
    boolean inOrder = true;
    if (node.hasAttribute("ordered"))
        inOrder = node.getAttribute("ordered").equals("yes");

    if (!hasElementContent(node)) {
        String qstr = getText(node);
        List<SpanTermQuery> list = new ArrayList<>(8);
        try {
            TokenStream stream = analyzer.tokenStream(field, new StringReader(qstr));
            CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class);
            stream.reset();
            while (stream.incrementToken()) {
                list.add(new SpanTermQuery(new Term(field, termAttr.toString())));
            }
            stream.end();
            stream.close();
        } catch (IOException e) {
            throw new XPathException("Error while parsing phrase query: " + qstr);
        }
        return new SpanNearQuery(list.toArray(new SpanTermQuery[list.size()]), slop, inOrder);
    }
    SpanQuery[] children = parseSpanChildren(field, node, analyzer);
    return new SpanNearQuery(children, slop, inOrder);
}

From source file:org.exist.indexing.lucene.XMLToQuery.java

License:Open Source License

private String getTerm(String field, String text, Analyzer analyzer) throws XPathException {
    String term = null;//  ww  w  .j av  a2  s .  co m
    try {
        TokenStream stream = analyzer.tokenStream(field, new StringReader(text));
        CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class);
        stream.reset();
        if (stream.incrementToken()) {
            term = termAttr.toString();
        }
        stream.end();
        stream.close();
        return term;
    } catch (IOException e) {
        throw new XPathException("Lucene index error while creating query: " + e.getMessage(), e);
    }
}

From source file:org.exist.indexing.range.RangeIndexWorker.java

License:Open Source License

protected BytesRef analyzeContent(String field, QName qname, String data, DocumentSet docs)
        throws XPathException {
    final Analyzer analyzer = getAnalyzer(qname, field, docs);
    if (!isCaseSensitive(qname, field, docs)) {
        data = data.toLowerCase();//w ww  .  j a  v  a2 s.c o  m
    }
    if (analyzer == null) {
        return new BytesRef(data);
    }
    try {
        TokenStream stream = analyzer.tokenStream(field, new StringReader(data));
        TermToBytesRefAttribute termAttr = stream.addAttribute(TermToBytesRefAttribute.class);
        BytesRef token = null;
        try {
            stream.reset();
            if (stream.incrementToken()) {
                termAttr.fillBytesRef();
                token = termAttr.getBytesRef();
            }
            stream.end();
        } finally {
            stream.close();
        }
        return token;
    } catch (IOException e) {
        throw new XPathException("Error analyzing the query string: " + e.getMessage(), e);
    }
}

From source file:org.fastcatsearch.ir.index.SearchIndexWriter.java

License:Apache License

private void indexValue(int docNo, int i, Object value, boolean isIgnoreCase, int positionIncrementGap)
        throws IOException, IRException {
    if (value == null) {
        return;//from  www.j a  v a2  s  .  c  om
    }
    char[] fieldValue = value.toString().toCharArray();
    TokenStream tokenStream = indexAnalyzerList[i].tokenStream(indexId, new CharArrayReader(fieldValue),
            indexingAnalyzerOption);
    tokenStream.reset();
    CharsRefTermAttribute termAttribute = null;
    PositionIncrementAttribute positionAttribute = null;
    StopwordAttribute stopwordAttribute = null;
    AdditionalTermAttribute additionalTermAttribute = null;
    CharTermAttribute charTermAttribute = null;
    //? ?  .

    if (tokenStream.hasAttribute(CharsRefTermAttribute.class)) {
        termAttribute = tokenStream.getAttribute(CharsRefTermAttribute.class);
    }
    if (tokenStream.hasAttribute(PositionIncrementAttribute.class)) {
        positionAttribute = tokenStream.getAttribute(PositionIncrementAttribute.class);
    }
    if (tokenStream.hasAttribute(AdditionalTermAttribute.class)) {
        additionalTermAttribute = tokenStream.getAttribute(AdditionalTermAttribute.class);
    }

    // stopword .
    if (tokenStream.hasAttribute(StopwordAttribute.class)) {
        stopwordAttribute = tokenStream.getAttribute(StopwordAttribute.class);
    }
    if (tokenStream.hasAttribute(CharTermAttribute.class)) {
        charTermAttribute = tokenStream.getAttribute(CharTermAttribute.class);
    }

    int lastPosition = 0;

    while (tokenStream.incrementToken()) {
        CharVector key = null;
        if (termAttribute != null) {
            CharsRef charRef = termAttribute.charsRef();
            char[] buffer = new char[charRef.length()];
            System.arraycopy(charRef.chars, charRef.offset, buffer, 0, charRef.length);
            key = new CharVector(buffer, 0, buffer.length);
        } else {
            key = new CharVector(charTermAttribute.buffer(), 0, charTermAttribute.length());
        }

        int position = -1;
        if (positionAttribute != null) {
            position = positionAttribute.getPositionIncrement() + positionIncrementGap;
            lastPosition = position;
        }
        //         logger.debug("FIELD#{}: {} >> {} ({})", indexId, key, docNo, position);
        if (stopwordAttribute != null && stopwordAttribute.isStopword()) {
            //ignore
        } else {
            memoryPosting.add(key, docNo, position);
        }
        //         if(synonymAttribute != null) {
        //            CharVector[] synonym = synonymAttribute.getSynonym();
        //            if(synonym != null) {
        //               for(CharVector token : synonym) {
        //                  memoryPosting.add(token, docNo, position);
        //               }
        //            }
        //         }
        if (additionalTermAttribute != null && additionalTermAttribute.size() > 0) {
            Iterator<String> iter = additionalTermAttribute.iterateAdditionalTerms();
            while (iter.hasNext()) {
                CharVector token = new CharVector(iter.next().toCharArray());
                memoryPosting.add(token, docNo, lastPosition);
            }
        }
    }
}

From source file:org.fastcatsearch.plugin.analysis.RunAnalyzer.java

public static void main(String[] args) throws IOException {
    if (args.length != 3) {
        printUsage();//from ww  w.  j av a 2 s  .co  m
        System.exit(0);
    }

    File pluginDir = new File(args[0]);
    String pluginClassName = args[1];
    String analyzerId = args[2];
    RunAnalyzer runAnalyzer = new RunAnalyzer(pluginDir, pluginClassName);
    AnalyzerPool analyzerPool = runAnalyzer.getAnalyzerPool(analyzerId);
    Analyzer analyzer = null;

    try {
        analyzer = analyzerPool.getFromPool();
        //? ? ? ?.

        Scanner sc = new Scanner(System.in);
        System.out.println("==================================");
        System.out.println(" Fastcat analyzer");
        System.out.println(" Enter 'quit' for exit program. ");
        System.out.println("==================================");
        System.out.print("Input String: ");
        while (sc.hasNextLine()) {
            String str = sc.nextLine();
            if (str.equalsIgnoreCase("quit")) {
                break;
            }
            try {
                char[] value = str.toCharArray();
                TokenStream tokenStream = analyzer.tokenStream("", new CharArrayReader(value),
                        new AnalyzerOption());
                tokenStream.reset();

                CharsRefTermAttribute termAttribute = null;
                if (tokenStream.hasAttribute(CharsRefTermAttribute.class)) {
                    termAttribute = tokenStream.getAttribute(CharsRefTermAttribute.class);
                }
                SynonymAttribute synonymAttribute = null;
                if (tokenStream.hasAttribute(SynonymAttribute.class)) {
                    synonymAttribute = tokenStream.getAttribute(SynonymAttribute.class);
                }
                AdditionalTermAttribute additionalTermAttribute = null;
                if (tokenStream.hasAttribute(AdditionalTermAttribute.class)) {
                    additionalTermAttribute = tokenStream.getAttribute(AdditionalTermAttribute.class);
                }

                StopwordAttribute stopwordAttribute = null;
                if (tokenStream.hasAttribute(StopwordAttribute.class)) {
                    stopwordAttribute = tokenStream.getAttribute(StopwordAttribute.class);
                }

                CharTermAttribute charTermAttribute = tokenStream.getAttribute(CharTermAttribute.class);

                while (tokenStream.incrementToken()) {
                    String word = "";
                    //? ??  CharsRefTermAttribute ? .
                    if (termAttribute != null) {
                        word = termAttribute.toString();
                    } else {
                        //CharsRefTermAttribute ?   ??  CharTermAttribute ?  ?.
                        word = charTermAttribute.toString();
                    }

                    // ?? .
                    if (stopwordAttribute.isStopword()) {
                        continue;
                    }

                    //
                    // ??  .
                    //
                    System.out.print(">> ");
                    System.out.println(word);

                    //   .
                    if (synonymAttribute != null) {
                        List synonyms = synonymAttribute.getSynonyms();
                        if (synonyms != null) {
                            for (Object synonymObj : synonyms) {
                                if (synonymObj instanceof CharVector) {
                                    CharVector synonym = (CharVector) synonymObj;
                                    System.out.print("S> ");
                                    System.out.println(synonym);
                                } else if (synonymObj instanceof List) {
                                    List synonymList = (List) synonymObj;
                                    for (Object synonym : synonymList) {
                                        System.out.print("S> ");
                                        System.out.println(synonym);
                                    }
                                }
                            }
                        }
                    }

                    //  .
                    // ??? ? ?  ?? ?, ??  .
                    if (additionalTermAttribute != null && additionalTermAttribute.size() > 0) {
                        Iterator<String> termIter = additionalTermAttribute.iterateAdditionalTerms();
                        while (termIter.hasNext()) {
                            String token = termIter.next();
                            System.out.print("A> ");
                            System.out.println(word);
                        }
                    }
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
            System.out.print("Input String: ");
        }
    } finally {
        if (analyzer != null) {
            analyzerPool.releaseToPool(analyzer);
        }
    }
    System.out.print("Bye!");
}

From source file:org.gbif.namefinder.analysis.sciname.SciNameAnalyzerTest.java

License:Apache License

private TokenStream getTokens(Reader input) throws IOException {
    SciNameAnalyzer ana = new SciNameAnalyzer();
    TokenStream tokens = ana.tokenStream(null, input);
    tokens.reset();
    return tokens;
}

From source file:org.genemania.completion.lucene.GeneCompletionProvider.java

License:Open Source License

public Long getNodeId(String symbol) {
    try {// ww  w  .  ja  v a2  s .c o  m
        TokenStream tokens = analyze(symbol);
        PhraseQuery query = new PhraseQuery();
        tokens.reset();
        while (tokens.incrementToken()) {
            TermAttribute term = tokens.getAttribute(TermAttribute.class);
            query.add(new Term(GeneIndexBuilder.GENE_FIELD, term.term()));
        }
        tokens.end();
        tokens.close();

        final Set<Long> nodes = new HashSet<Long>();
        searcher.search(query, new AbstractCollector() {
            @Override
            public void handleHit(int id) {
                try {
                    Document document = searcher.doc(id);
                    nodes.add(Long.parseLong(document.get(GeneIndexBuilder.NODE_ID_FIELD)));
                } catch (IOException e) {
                    log(e);
                }
            }
        });
        if (nodes.size() > 0) {
            return nodes.iterator().next();
        }
    } catch (IOException e) {
        log(e);
    }
    return null;
}

From source file:org.genemania.data.classification.lucene.LuceneGeneClassifier.java

License:Open Source License

public void classify(final String symbol, final IGeneClassificationHandler handler)
        throws ApplicationException {
    try {// w  ww.  ja v a  2s.  c  o m
        TokenStream tokens = analyze(symbol);
        PhraseQuery query = new PhraseQuery();
        tokens.reset();
        while (tokens.incrementToken()) {
            TermAttribute term = tokens.getAttribute(TermAttribute.class);
            query.add(new Term(LuceneMediator.GENE_SYMBOL, term.term()));
        }
        tokens.end();
        tokens.close();

        searcher.search(query, new AbstractCollector() {
            @Override
            public void handleHit(int doc) {
                try {
                    Document document = searcher.doc(doc);
                    long organismId = Long.parseLong(document.get(LuceneMediator.GENE_ORGANISM_ID));
                    handler.handleClassification(symbol, organismId);
                } catch (IOException e) {
                    log(e);
                }
            }
        });
    } catch (IOException e) {
        throw new ApplicationException(e);
    }
}

From source file:org.genemania.mediator.lucene.LuceneMediator.java

License:Open Source License

protected PhraseQuery createPhraseQuery(String field, String phrase) throws IOException {
    TokenStream stream = analyze(phrase);
    stream.reset();
    PhraseQuery query = new PhraseQuery();
    while (stream.incrementToken()) {
        TermAttribute term = stream.getAttribute(TermAttribute.class);
        query.add(new Term(field, term.term()));
    }//from   ww  w  .jav  a  2s . com
    stream.end();
    stream.close();
    return query;
}

From source file:org.gridkit.coherence.search.lucene.CapturedTokenStream.java

License:Apache License

public void append(TokenStream ts, int positionGap, int offsetShift) throws IOException {
    PositionIncrementAttribute pi = null;
    pi = ts.getAttribute(PositionIncrementAttribute.class);
    OffsetAttribute off = null;// www .  ja v a 2s  .co m
    if (offsetShift != 0) {
        off = ts.getAttribute(OffsetAttribute.class);
    }
    ts.reset();
    while (ts.incrementToken()) {
        if (positionGap != 0) {
            pi.setPositionIncrement(positionGap);
            positionGap = 0;
        }
        if (off != null) {
            off.setOffset(offsetShift + off.startOffset(), offsetShift + off.endOffset());
        }
        tokens.add(ts.captureState());
        lastPos += pi.getPositionIncrement();
    }
}