List of usage examples for org.apache.lucene.analysis.core LetterTokenizer LetterTokenizer
public LetterTokenizer()
From source file:com.impetus.kundera.index.DocumentIndexer.java
License:Apache License
/** * Instantiates a new lucandra indexer./* w w w .j ava 2 s . co m*/ * * @param analyzer * the analyzer */ public DocumentIndexer() { final String empty = ""; this.analyzer = new StandardAnalyzer(); tokenizer = new LetterTokenizer(); }
From source file:ntu.searchengine.MyCustomAnalyzer.java
@Override protected TokenStreamComponents createComponents(String string) { Tokenizer source = new LetterTokenizer(); CharArraySet stopWords = new CharArraySet(0, true); if (Main.useStopWords) { stopWords = StandardAnalyzer.STOP_WORDS_SET; }/*from w ww . j ava 2s. c om*/ TokenStream filter = new StopFilter(source, stopWords); if (!Main.upperCaseSensitivity) filter = new LowerCaseFilter(source); if (Main.stemning) filter = new PorterStemFilter(filter); return new TokenStreamComponents(source, filter); }
From source file:org.apache.jena.query.text.analyzer.ConfigurableAnalyzer.java
License:Apache License
private Tokenizer getTokenizer(String tokenizerName) { switch (tokenizerName) { case "KeywordTokenizer": return new KeywordTokenizer(); case "LetterTokenizer": return new LetterTokenizer(); case "StandardTokenizer": return new StandardTokenizer(); case "WhitespaceTokenizer": return new WhitespaceTokenizer(); default://from w ww . j av a 2s. c o m throw new TextIndexException("Unknown tokenizer : " + tokenizerName); } }
From source file:org.elasticsearch.analysis.common.LetterTokenizerFactory.java
License:Apache License
@Override public Tokenizer create() { return new LetterTokenizer(); }
From source file:uk.ac.ebi.biostudies.api.util.analyzer.LowercaseAnalyzer.java
License:Apache License
@Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer source = new LetterTokenizer(); return new TokenStreamComponents(source); }