List of usage examples for org.apache.lucene.analysis.standard ClassicTokenizer ClassicTokenizer
public ClassicTokenizer()
From source file:org.apache.nutch.scoring.similarity.util.LuceneAnalyzerUtil.java
License:Apache License
@Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer source = new ClassicTokenizer(); TokenStream filter = new LowerCaseFilter(source); if (stopSet != null) { filter = new StopFilter(filter, stopSet); }//w w w.j av a 2s . co m switch (stemFilterType) { case PORTERSTEM_FILTER: filter = new PorterStemFilter(filter); break; case ENGLISHMINIMALSTEM_FILTER: filter = new EnglishMinimalStemFilter(filter); break; default: break; } return new TokenStreamComponents(source, filter); }
From source file:org.apache.nutch.scoring.similarity.util.LuceneTokenizer.java
License:Apache License
private TokenStream generateTokenStreamFromText(String content, TokenizerType tokenizerType) { Tokenizer tokenizer = null;//from www . ja va 2s . c om switch (tokenizerType) { case CLASSIC: tokenizer = new ClassicTokenizer(); break; case STANDARD: default: tokenizer = new StandardTokenizer(); } tokenizer.setReader(new StringReader(content)); tokenStream = tokenizer; return tokenStream; }
From source file:org.elasticsearch.analysis.common.ClassicTokenizerFactory.java
License:Apache License
@Override public Tokenizer create() { ClassicTokenizer tokenizer = new ClassicTokenizer(); tokenizer.setMaxTokenLength(maxTokenLength); return tokenizer; }
From source file:org.wltea.analyzer.sample.LuceneTokenizerDemo.java
License:Apache License
/** * ?ClassTokenizer/*from w w w .j av a 2 s . com*/ */ public void testCT() { Tokenizer tokenizer = new ClassicTokenizer(); try { tokenizer.setReader(new StringReader( "?????IKAnalyer can analysis english text too")); } catch (IOException e) { throw new RuntimeException(); } TokenStreamComponents tsc = new TokenStreamComponents(tokenizer); TokenStream ts = tsc.getTokenStream(); OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class); CharTermAttribute term = ts.addAttribute(CharTermAttribute.class); TypeAttribute type = ts.addAttribute(TypeAttribute.class); try { ts.reset(); while (ts.incrementToken()) { System.out.println(term.toString() + "->" + offset.startOffset() + "-" + offset.endOffset() + "->" + type.type()); } ts.end(); } catch (IOException e) { throw new RuntimeException(); } }
From source file:uk.gov.nationalarchives.discovery.taxonomy.common.repository.lucene.analyzer.IAViewTextCasNoPuncAnalyser.java
License:Mozilla Public License
@Override protected TokenStreamComponents createComponents(final String fieldName) { Tokenizer source = new ClassicTokenizer(); TokenStream result = null;//from www. ja v a 2 s . com if (AnalyzerType.QUERY.equals(analyzerType)) { if (synonymFilterFactory != null) { result = this.synonymFilterFactory.create(source); } else { logger.warn(".createComponents: synonymFilter disabled"); } } result = this.wordDelimiterFilterFactory.create(result == null ? source : result); result = new EnglishPossessiveFilter(result); result = new ASCIIFoldingFilter(result); return new TokenStreamComponents(source, result); }
From source file:uk.gov.nationalarchives.discovery.taxonomy.common.repository.lucene.analyzer.IAViewTextGenAnalyser.java
License:Mozilla Public License
@Override protected TokenStreamComponents createComponents(final String fieldName) { Tokenizer source = new ClassicTokenizer(); TokenStream result = null;/*from ww w . ja v a 2s. c o m*/ if (AnalyzerType.QUERY.equals(analyzerType)) { if (synonymFilterFactory != null) { result = this.synonymFilterFactory.create(source); } else { logger.warn(".createComponents: synonymFilter disabled"); } } result = this.wordDelimiterFilterFactory.create(result == null ? source : result); result = new EnglishPossessiveFilter(result); result = new LowerCaseFilter(result); result = new ASCIIFoldingFilter(result); return new TokenStreamComponents(source, result); }
From source file:uk.gov.nationalarchives.discovery.taxonomy.common.repository.lucene.analyzer.IAViewTextNoCasNoPuncAnalyser.java
License:Mozilla Public License
@Override protected TokenStreamComponents createComponents(final String fieldName) { Tokenizer source = new ClassicTokenizer(); TokenStream result = null;// w w w . j a va 2 s .co m if (AnalyzerType.QUERY.equals(analyzerType)) { if (synonymFilterFactory != null) { result = this.synonymFilterFactory.create(source); } else { logger.warn(".createComponents: synonymFilter disabled"); } } result = this.wordDelimiterFilterFactory.create(result == null ? source : result); result = new EnglishPossessiveFilter(result); result = new ASCIIFoldingFilter(result); result = new LowerCaseFilter(result); return new TokenStreamComponents(source, result); }