List of usage examples for org.apache.lucene.analysis.tr TurkishLowerCaseFilter TurkishLowerCaseFilter
public TurkishLowerCaseFilter(TokenStream in)
From source file:com.hourglassapps.cpi_ii.stem.snowball.lucene.SnowballAnalyzer.java
License:Apache License
/** Constructs a {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter}, and a {@link SnowballFilter} */ @Override/*from w w w. ja v a 2s. com*/ public TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new StandardTokenizer(matchVersion, reader); TokenStream result = new StandardFilter(matchVersion, tokenizer); // remove the possessive 's for english stemmers if (matchVersion.onOrAfter(Version.LUCENE_3_1) && (name.equals("English") || name.equals("Porter") || name.equals("Lovins"))) result = new EnglishPossessiveFilter(result); // Use a special lowercase filter for turkish, the stemmer expects it. if (matchVersion.onOrAfter(Version.LUCENE_3_1) && name.equals("Turkish")) result = new TurkishLowerCaseFilter(result); else result = new LowerCaseFilter(matchVersion, result); if (stopSet != null) result = new StopFilter(matchVersion, result, stopSet); result = new SnowballFilter(result, name); return new TokenStreamComponents(tokenizer, result); }
From source file:org.apache.solr.analysis.TurkishLowerCaseFilterFactory.java
License:Apache License
public TokenStream create(TokenStream input) { return new TurkishLowerCaseFilter(input); }
From source file:org.codelibs.elasticsearch.index.analysis.SnowballAnalyzer.java
License:Apache License
/** Constructs a {StandardTokenizer} filtered by a {@link StandardFilter}, a {LowerCaseFilter}, a {StopFilter}, and a {SnowballFilter} *//*from w ww. j a v a2 s .com*/ @Override public TokenStreamComponents createComponents(String fieldName) { final Tokenizer tokenizer = new StandardTokenizer(); TokenStream result = tokenizer; // remove the possessive 's for english stemmers if (name.equals("English") || name.equals("Porter") || name.equals("Lovins")) { result = new EnglishPossessiveFilter(result); } // Use a special lowercase filter for turkish, the stemmer expects it. if (name.equals("Turkish")) { result = new TurkishLowerCaseFilter(result); } else { result = new LowerCaseFilter(result); } if (stopSet != null) { result = new StopFilter(result, stopSet); } result = new SnowballFilter(result, name); return new TokenStreamComponents(tokenizer, result); }
From source file:org.elasticsearch.analysis.common.LowerCaseTokenFilterFactory.java
License:Apache License
@Override public TokenStream create(TokenStream tokenStream) { if (lang == null) { return new LowerCaseFilter(tokenStream); } else if (lang.equalsIgnoreCase("greek")) { return new GreekLowerCaseFilter(tokenStream); } else if (lang.equalsIgnoreCase("irish")) { return new IrishLowerCaseFilter(tokenStream); } else if (lang.equalsIgnoreCase("turkish")) { return new TurkishLowerCaseFilter(tokenStream); } else {/* www . j a va 2 s . c om*/ throw new IllegalArgumentException("language [" + lang + "] not support for lower case"); } }
From source file:org.elasticsearch.analysis.common.SnowballAnalyzer.java
License:Apache License
/** Constructs a {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter}, and a {@link SnowballFilter} */ @Override/*ww w . ja va 2 s. c om*/ public TokenStreamComponents createComponents(String fieldName) { final Tokenizer tokenizer = new StandardTokenizer(); TokenStream result = tokenizer; // remove the possessive 's for english stemmers if (name.equals("English") || name.equals("Porter") || name.equals("Lovins")) result = new EnglishPossessiveFilter(result); // Use a special lowercase filter for turkish, the stemmer expects it. if (name.equals("Turkish")) result = new TurkishLowerCaseFilter(result); else result = new LowerCaseFilter(result); if (stopSet != null) result = new StopFilter(result, stopSet); result = new SnowballFilter(result, name); return new TokenStreamComponents(tokenizer, result); }
From source file:org.elasticsearch.index.analysis.LowerCaseTokenFilterFactory.java
License:Apache License
@Override public TokenStream create(TokenStream tokenStream) { if (lang == null) { return new LowerCaseFilter(version, tokenStream); } else if (lang.equalsIgnoreCase("greek")) { return new GreekLowerCaseFilter(version, tokenStream); } else if (lang.equalsIgnoreCase("turkish")) { return new TurkishLowerCaseFilter(tokenStream); } else {/* w w w.j av a 2 s .c o m*/ throw new ElasticsearchIllegalArgumentException("language [" + lang + "] not support for lower case"); } }
From source file:org.elasticsearch.index.analysis.SnowballAnalyzer.java
License:Apache License
/** Constructs a {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter}, and a {@link SnowballFilter} */ @Override/*from w w w . ja v a 2 s . c o m*/ public TokenStreamComponents createComponents(String fieldName) { final Tokenizer tokenizer; if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) { tokenizer = new StandardTokenizer(); } else { tokenizer = new StandardTokenizer40(); } TokenStream result = tokenizer; // remove the possessive 's for english stemmers if (name.equals("English") || name.equals("Porter") || name.equals("Lovins")) result = new EnglishPossessiveFilter(result); // Use a special lowercase filter for turkish, the stemmer expects it. if (name.equals("Turkish")) result = new TurkishLowerCaseFilter(result); else result = new LowerCaseFilter(result); if (stopSet != null) result = new StopFilter(result, stopSet); result = new SnowballFilter(result, name); return new TokenStreamComponents(tokenizer, result); }