List of usage examples for org.apache.lucene.analysis.standard ClassicFilter ClassicFilter
public ClassicFilter(TokenStream in)
From source file:lab_mri.CustomAnalyzer.java
@Override protected TokenStreamComponents createComponents(String string, Reader reader) { CharArraySet stopWords = EnglishAnalyzer.getDefaultStopSet(); Tokenizer tokenizer = new WikipediaTokenizer(reader); TokenStream filter = new ClassicFilter(tokenizer); filter = new StandardFilter(filter); filter = new StopFilter(filter, stopWords); filter = new PorterStemFilter(filter); filter = new LowerCaseFilter(filter); return new TokenStreamComponents(tokenizer, filter); }
From source file:org.apache.solr.analysis.ClassicFilterFactory.java
License:Apache License
public TokenFilter create(TokenStream input) { return new ClassicFilter(input); }
From source file:org.elasticsearch.analysis.common.ClassicFilterFactory.java
License:Apache License
@Override public TokenStream create(TokenStream tokenStream) { return new ClassicFilter(tokenStream); }
From source file:org.geotoolkit.lucene.analysis.standard.ClassicAnalyzer.java
License:Apache License
@Override protected TokenStreamComponents createComponents(final String fieldName) { final ClassicTokenizer src = new ClassicTokenizer(); src.setMaxTokenLength(maxTokenLength); TokenStream tok = new ClassicFilter(src); tok = new LowerCaseFilter(tok); tok = new StopFilter(tok, stopwords); return new TokenStreamComponents(src, tok) { @Override/*from w w w. java 2 s .com*/ protected void setReader(final Reader reader) { src.setMaxTokenLength(ClassicAnalyzer.this.maxTokenLength); super.setReader(reader); } }; }
From source file:org.meresco.lucene.analysis.MerescoStandardAnalyzer.java
License:Open Source License
protected Analyzer.TokenStreamComponents pre_analyzer(Reader reader) { final ClassicTokenizer src = new ClassicTokenizer(reader); TokenStream tok = new ClassicFilter(src); tok = new ASCIIFoldingFilter(tok); tok = new LowerCaseFilter(tok); return new Analyzer.TokenStreamComponents(src, tok); }
From source file:summarizer.KeywordsGuesser.java
License:Open Source License
public static List<Keyword> guessFromString(String input) throws IOException { input = input.replaceAll("-+", "-0"); input = input.replaceAll("[\\p{Punct}&&[^'-]]+", " "); input = input.replaceAll("(?:'(?:[tdsm]|[vr]e|ll))+\\b", ""); TokenStream tokenStream = new ClassicTokenizer(LUCENE_VERSION, new StringReader(input)); tokenStream = new LowerCaseFilter(LUCENE_VERSION, tokenStream); tokenStream = new ClassicFilter(tokenStream); tokenStream = new ASCIIFoldingFilter(tokenStream); tokenStream = new StopFilter(LUCENE_VERSION, tokenStream, EnglishAnalyzer.getDefaultStopSet()); List<Keyword> keywords = new LinkedList<Keyword>(); CharTermAttribute token = tokenStream.getAttribute(CharTermAttribute.class); tokenStream.reset();/* ww w . ja v a 2 s .co m*/ while (tokenStream.incrementToken()) { String term = token.toString(); String stem = stemmize(term); if (stem != null) { Keyword keyword = find(keywords, new Keyword(stem.replaceAll("-0", "-"))); keyword.add(term.replaceAll("-0", "-")); } } Collections.sort(keywords); return keywords; }