Example usage for org.apache.lucene.analysis.standard ClassicTokenizer ClassicTokenizer

List of usage examples for org.apache.lucene.analysis.standard ClassicTokenizer ClassicTokenizer

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.standard ClassicTokenizer ClassicTokenizer.

Prototype

public ClassicTokenizer() 

Source Link

Document

Creates a new instance of the ClassicTokenizer .

Usage

From source file:org.apache.nutch.scoring.similarity.util.LuceneAnalyzerUtil.java

License:Apache License

@Override
protected TokenStreamComponents createComponents(String fieldName) {
    Tokenizer source = new ClassicTokenizer();
    TokenStream filter = new LowerCaseFilter(source);
    if (stopSet != null) {
        filter = new StopFilter(filter, stopSet);
    }//w  w  w.j av  a 2s .  co m

    switch (stemFilterType) {
    case PORTERSTEM_FILTER:
        filter = new PorterStemFilter(filter);
        break;
    case ENGLISHMINIMALSTEM_FILTER:
        filter = new EnglishMinimalStemFilter(filter);
        break;
    default:
        break;
    }
    return new TokenStreamComponents(source, filter);
}

From source file:org.apache.nutch.scoring.similarity.util.LuceneTokenizer.java

License:Apache License

private TokenStream generateTokenStreamFromText(String content, TokenizerType tokenizerType) {
    Tokenizer tokenizer = null;//from   www . ja  va 2s . c om
    switch (tokenizerType) {
    case CLASSIC:
        tokenizer = new ClassicTokenizer();
        break;

    case STANDARD:
    default:
        tokenizer = new StandardTokenizer();
    }

    tokenizer.setReader(new StringReader(content));

    tokenStream = tokenizer;

    return tokenStream;
}

From source file:org.elasticsearch.analysis.common.ClassicTokenizerFactory.java

License:Apache License

@Override
public Tokenizer create() {
    ClassicTokenizer tokenizer = new ClassicTokenizer();
    tokenizer.setMaxTokenLength(maxTokenLength);
    return tokenizer;
}

From source file:org.wltea.analyzer.sample.LuceneTokenizerDemo.java

License:Apache License

/**
 * ?ClassTokenizer/*from  w w  w .j av a 2  s . com*/
 */
public void testCT() {
    Tokenizer tokenizer = new ClassicTokenizer();
    try {
        tokenizer.setReader(new StringReader(
                "?????IKAnalyer can analysis english text too"));
    } catch (IOException e) {
        throw new RuntimeException();
    }
    TokenStreamComponents tsc = new TokenStreamComponents(tokenizer);
    TokenStream ts = tsc.getTokenStream();
    OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
    CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
    TypeAttribute type = ts.addAttribute(TypeAttribute.class);
    try {
        ts.reset();
        while (ts.incrementToken()) {
            System.out.println(term.toString() + "->" + offset.startOffset() + "-" + offset.endOffset() + "->"
                    + type.type());
        }
        ts.end();
    } catch (IOException e) {
        throw new RuntimeException();
    }
}

From source file:uk.gov.nationalarchives.discovery.taxonomy.common.repository.lucene.analyzer.IAViewTextCasNoPuncAnalyser.java

License:Mozilla Public License

@Override
protected TokenStreamComponents createComponents(final String fieldName) {
    Tokenizer source = new ClassicTokenizer();

    TokenStream result = null;//from  www.  ja v  a 2  s . com

    if (AnalyzerType.QUERY.equals(analyzerType)) {
        if (synonymFilterFactory != null) {
            result = this.synonymFilterFactory.create(source);
        } else {
            logger.warn(".createComponents: synonymFilter disabled");
        }
    }
    result = this.wordDelimiterFilterFactory.create(result == null ? source : result);

    result = new EnglishPossessiveFilter(result);

    result = new ASCIIFoldingFilter(result);

    return new TokenStreamComponents(source, result);
}

From source file:uk.gov.nationalarchives.discovery.taxonomy.common.repository.lucene.analyzer.IAViewTextGenAnalyser.java

License:Mozilla Public License

@Override
protected TokenStreamComponents createComponents(final String fieldName) {
    Tokenizer source = new ClassicTokenizer();

    TokenStream result = null;/*from  ww  w . ja  v a 2s. c o m*/

    if (AnalyzerType.QUERY.equals(analyzerType)) {
        if (synonymFilterFactory != null) {
            result = this.synonymFilterFactory.create(source);
        } else {
            logger.warn(".createComponents: synonymFilter disabled");
        }
    }
    result = this.wordDelimiterFilterFactory.create(result == null ? source : result);

    result = new EnglishPossessiveFilter(result);

    result = new LowerCaseFilter(result);

    result = new ASCIIFoldingFilter(result);

    return new TokenStreamComponents(source, result);
}

From source file:uk.gov.nationalarchives.discovery.taxonomy.common.repository.lucene.analyzer.IAViewTextNoCasNoPuncAnalyser.java

License:Mozilla Public License

@Override
protected TokenStreamComponents createComponents(final String fieldName) {
    Tokenizer source = new ClassicTokenizer();

    TokenStream result = null;//  w  w  w . j a  va 2 s .co m

    if (AnalyzerType.QUERY.equals(analyzerType)) {
        if (synonymFilterFactory != null) {
            result = this.synonymFilterFactory.create(source);
        } else {
            logger.warn(".createComponents: synonymFilter disabled");
        }
    }
    result = this.wordDelimiterFilterFactory.create(result == null ? source : result);

    result = new EnglishPossessiveFilter(result);

    result = new ASCIIFoldingFilter(result);

    result = new LowerCaseFilter(result);

    return new TokenStreamComponents(source, result);
}