List of usage examples for org.apache.lucene.analysis.miscellaneous RemoveDuplicatesTokenFilter RemoveDuplicatesTokenFilter
public RemoveDuplicatesTokenFilter(TokenStream in)
From source file:at.ac.univie.mminf.luceneSKOS.analysis.MeSHAnalyzer.java
License:Apache License
@Override protected TokenStreamComponents createComponents(String fileName, Reader reader) { if (expansionType.equals(ExpansionType.URI)) { final KeywordTokenizer src = new KeywordTokenizer(reader); TokenStream tok = new MeSHURIFilter(src, skosEngine, new StandardAnalyzer(matchVersion), types); tok = new LowerCaseFilter(matchVersion, tok); return new TokenStreamComponents(src, tok); } else {/*from w ww . j a va 2 s . c o m*/ final StandardTokenizer src = new StandardTokenizer(matchVersion, reader); src.setMaxTokenLength(maxTokenLength); TokenStream tok = new StandardFilter(matchVersion, src); // prior to this we get the classic behavior, standardfilter does it for // us. tok = new MeSHLabelFilter(tok, skosEngine, new StandardAnalyzer(matchVersion), bufferSize, types); tok = new LowerCaseFilter(matchVersion, tok); tok = new StopFilter(matchVersion, tok, stopwords); tok = new RemoveDuplicatesTokenFilter(tok); return new TokenStreamComponents(src, tok) { @Override protected void setReader(final Reader reader) throws IOException { src.setMaxTokenLength(maxTokenLength); super.setReader(reader); } }; } }
From source file:at.ac.univie.mminf.luceneSKOS.analysis.SNOMEDAnalyzer.java
License:Apache License
@Override protected TokenStreamComponents createComponents(String fileName, Reader reader) { final StandardTokenizer src = new StandardTokenizer(matchVersion, reader); src.setMaxTokenLength(maxTokenLength); TokenStream tok = new StandardFilter(matchVersion, src); // prior to this we get the classic behavior, standardfilter does it for // us.//from ww w . j av a2s .c o m tok = new SNOMEDFilter(tok, skosEngine, new StandardAnalyzer(matchVersion), bufferSize, types); tok = new LowerCaseFilter(matchVersion, tok); tok = new StopFilter(matchVersion, tok, stopwords); tok = new RemoveDuplicatesTokenFilter(tok); return new TokenStreamComponents(src, tok) { @Override protected void setReader(final Reader reader) throws IOException { src.setMaxTokenLength(maxTokenLength); super.setReader(reader); } }; }
From source file:de.walware.statet.r.internal.core.rhelp.index.DefaultAnalyzer.java
License:Open Source License
@Override protected TokenStreamComponents createComponents(final String fieldName, Reader reader) { if (this.charFilterFactory != null) { reader = this.charFilterFactory.create(reader); }// w w w . j av a 2 s . c o m final Tokenizer source = new StandardTokenizer(reader); TokenStream result = source; result = new EnglishPossessiveFilter(getVersion(), result); result = new LowerCaseFilter(result); result = new StopFilter(result, this.stopwords); result = new KeywordRepeatFilter(result); result = new SnowballFilter(result, new EnglishStemmer()); result = new RemoveDuplicatesTokenFilter(result); return new TokenStreamComponents(source, result); }
From source file:org.apache.jackrabbit.oak.plugins.index.solr.configuration.DefaultAnalyzersConfigurationTest.java
License:Apache License
@Before public void setUp() throws Exception { this.exactPathAnalyzer = new Analyzer() { @Override/*from ww w.jav a 2 s .c o m*/ protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new KeywordTokenizer(reader); return new TokenStreamComponents(source); } }; this.parentPathIndexingAnalyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new KeywordTokenizer(reader); return new TokenStreamComponents(source); } }; this.parentPathSearchingAnalyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new KeywordTokenizer(reader); TokenStream filter = new ReverseStringFilter(Version.LUCENE_47, source); filter = new PatternReplaceFilter(filter, Pattern.compile("[^\\/]+\\/"), "", false); filter = new ReverseStringFilter(Version.LUCENE_47, filter); return new TokenStreamComponents(source, filter); } }; this.directChildrenPathIndexingAnalyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new KeywordTokenizer(reader); TokenStream filter = new ReverseStringFilter(Version.LUCENE_47, source); filter = new LengthFilter(Version.LUCENE_47, filter, 2, Integer.MAX_VALUE); filter = new PatternReplaceFilter(filter, Pattern.compile("([^\\/]+)(\\/)"), "$2", false); filter = new PatternReplaceFilter(filter, Pattern.compile("(\\/)(.+)"), "$2", false); filter = new ReverseStringFilter(Version.LUCENE_47, filter); return new TokenStreamComponents(source, filter); } }; this.directChildrenPathSearchingAnalyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new KeywordTokenizer(reader); return new TokenStreamComponents(source); } }; this.allChildrenPathIndexingAnalyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new PathHierarchyTokenizer(reader); TokenStream filter = new PatternCaptureGroupTokenFilter(source, false, Pattern.compile("((\\/).*)")); filter = new RemoveDuplicatesTokenFilter(filter); return new TokenStreamComponents(source, filter); } }; this.allChildrenPathSearchingAnalyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new KeywordTokenizer(reader); return new TokenStreamComponents(source); } }; }
From source file:org.elasticsearch.analysis.common.MultiplexerTokenFilterFactory.java
License:Apache License
@Override public TokenStream create(TokenStream tokenStream) { List<Function<TokenStream, TokenStream>> functions = new ArrayList<>(); for (TokenFilterFactory tff : filters) { functions.add(tff::create);// w ww. j a va2 s . co m } return new RemoveDuplicatesTokenFilter(new MultiplexTokenFilter(tokenStream, functions)); }
From source file:org.elasticsearch.analysis.common.RemoveDuplicatesTokenFilterFactory.java
License:Apache License
@Override public TokenStream create(TokenStream tokenStream) { return new RemoveDuplicatesTokenFilter(tokenStream); }
From source file:org.elasticsearch.index.analysis.RemoveDuplicatesTokenFilterFactory.java
License:Apache License
@Override public RemoveDuplicatesTokenFilter create(TokenStream input) { return new RemoveDuplicatesTokenFilter(input); }
From source file:org.elasticsearch.index.analysis.skos.SKOSAnalyzer.java
License:Apache License
@Override protected TokenStreamComponents createComponents(String fileName, Reader reader) { if (expansionType.equals(ExpansionType.URI)) { final KeywordTokenizer src = new KeywordTokenizer(reader); TokenStream tok = new SKOSURIFilter(src, skosEngine, new StandardAnalyzer(matchVersion), types); tok = new LowerCaseFilter(matchVersion, tok); return new TokenStreamComponents(src, tok); } else {// w w w.j av a 2 s . co m final StandardTokenizer src = new StandardTokenizer(matchVersion, reader); src.setMaxTokenLength(maxTokenLength); TokenStream tok = new StandardFilter(matchVersion, src); // prior to this we get the classic behavior, standardfilter does it for // us. tok = new SKOSLabelFilter(tok, skosEngine, new StandardAnalyzer(matchVersion), bufferSize, types); tok = new LowerCaseFilter(matchVersion, tok); tok = new StopFilter(matchVersion, tok, stopwords); tok = new RemoveDuplicatesTokenFilter(tok); return new TokenStreamComponents(src, tok) { @Override protected boolean reset(final Reader reader) throws IOException { src.setMaxTokenLength(maxTokenLength); return super.reset(reader); } }; } }
From source file:org.meresco.lucene.analysis.MerescoDutchStemmingAnalyzer.java
License:Open Source License
@Override public TokenStream post_analyzer(String fieldName, TokenStream tok) { if (stemmingFields != null && stemmingFields.indexOf(fieldName) == -1) return tok; tok = new KeywordRepeatFilter(tok); // repeat every word as term and as keyword tok = new SnowballFilter(tok, new DutchStemmer()); // ignores keywords tok = new RemoveDuplicatesTokenFilter(tok); // removes one if keyword and term are still the same return tok;/*www . j a v a 2 s. c om*/ }
From source file:org.xbib.elasticsearch.index.analysis.skos.SKOSAnalyzer.java
License:Apache License
@Override protected TokenStreamComponents createComponents(String fileName, Reader reader) { if (expansionType.equals(ExpansionType.URI)) { final KeywordTokenizer src = new KeywordTokenizer(reader); TokenStream tok = new SKOSURIFilter(src, skosEngine, new StandardAnalyzer(), types); tok = new LowerCaseFilter(tok); return new TokenStreamComponents(src, tok); } else {// ww w . java 2 s . co m final StandardTokenizer src = new StandardTokenizer(reader); src.setMaxTokenLength(maxTokenLength); TokenStream tok = new StandardFilter(src); // prior to this we get the classic behavior, standardfilter does it for // us. tok = new SKOSLabelFilter(tok, skosEngine, new StandardAnalyzer(), bufferSize, types); tok = new LowerCaseFilter(tok); tok = new StopFilter(tok, stopwords); tok = new RemoveDuplicatesTokenFilter(tok); return new TokenStreamComponents(src, tok) { @Override protected void setReader(final Reader reader) throws IOException { src.setMaxTokenLength(maxTokenLength); super.setReader(reader); } }; } }