Example usage for org.apache.lucene.analysis.miscellaneous RemoveDuplicatesTokenFilter RemoveDuplicatesTokenFilter

List of usage examples for org.apache.lucene.analysis.miscellaneous RemoveDuplicatesTokenFilter RemoveDuplicatesTokenFilter

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.miscellaneous RemoveDuplicatesTokenFilter RemoveDuplicatesTokenFilter.

Prototype

public RemoveDuplicatesTokenFilter(TokenStream in) 

Source Link

Document

Creates a new RemoveDuplicatesTokenFilter

Usage

From source file:at.ac.univie.mminf.luceneSKOS.analysis.MeSHAnalyzer.java

License:Apache License

@Override
protected TokenStreamComponents createComponents(String fileName, Reader reader) {
    if (expansionType.equals(ExpansionType.URI)) {
        final KeywordTokenizer src = new KeywordTokenizer(reader);
        TokenStream tok = new MeSHURIFilter(src, skosEngine, new StandardAnalyzer(matchVersion), types);
        tok = new LowerCaseFilter(matchVersion, tok);
        return new TokenStreamComponents(src, tok);
    } else {/*from w  ww  .  j  a  va 2 s  .  c  o  m*/
        final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
        src.setMaxTokenLength(maxTokenLength);
        TokenStream tok = new StandardFilter(matchVersion, src);
        // prior to this we get the classic behavior, standardfilter does it for
        // us.
        tok = new MeSHLabelFilter(tok, skosEngine, new StandardAnalyzer(matchVersion), bufferSize, types);
        tok = new LowerCaseFilter(matchVersion, tok);
        tok = new StopFilter(matchVersion, tok, stopwords);
        tok = new RemoveDuplicatesTokenFilter(tok);
        return new TokenStreamComponents(src, tok) {
            @Override
            protected void setReader(final Reader reader) throws IOException {
                src.setMaxTokenLength(maxTokenLength);
                super.setReader(reader);
            }
        };
    }
}

From source file:at.ac.univie.mminf.luceneSKOS.analysis.SNOMEDAnalyzer.java

License:Apache License

@Override
protected TokenStreamComponents createComponents(String fileName, Reader reader) {

    final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
    src.setMaxTokenLength(maxTokenLength);
    TokenStream tok = new StandardFilter(matchVersion, src);
    // prior to this we get the classic behavior, standardfilter does it for
    // us.//from   ww  w  .  j av  a2s .c o  m
    tok = new SNOMEDFilter(tok, skosEngine, new StandardAnalyzer(matchVersion), bufferSize, types);
    tok = new LowerCaseFilter(matchVersion, tok);
    tok = new StopFilter(matchVersion, tok, stopwords);
    tok = new RemoveDuplicatesTokenFilter(tok);
    return new TokenStreamComponents(src, tok) {
        @Override
        protected void setReader(final Reader reader) throws IOException {
            src.setMaxTokenLength(maxTokenLength);
            super.setReader(reader);
        }
    };
}

From source file:de.walware.statet.r.internal.core.rhelp.index.DefaultAnalyzer.java

License:Open Source License

@Override
protected TokenStreamComponents createComponents(final String fieldName, Reader reader) {
    if (this.charFilterFactory != null) {
        reader = this.charFilterFactory.create(reader);
    }// w w  w  . j  av  a  2  s .  c o  m
    final Tokenizer source = new StandardTokenizer(reader);
    TokenStream result = source;
    result = new EnglishPossessiveFilter(getVersion(), result);
    result = new LowerCaseFilter(result);
    result = new StopFilter(result, this.stopwords);
    result = new KeywordRepeatFilter(result);
    result = new SnowballFilter(result, new EnglishStemmer());
    result = new RemoveDuplicatesTokenFilter(result);
    return new TokenStreamComponents(source, result);
}

From source file:org.apache.jackrabbit.oak.plugins.index.solr.configuration.DefaultAnalyzersConfigurationTest.java

License:Apache License

@Before
public void setUp() throws Exception {
    this.exactPathAnalyzer = new Analyzer() {
        @Override/*from ww w.jav a 2  s  .c o m*/
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer source = new KeywordTokenizer(reader);
            return new TokenStreamComponents(source);
        }
    };
    this.parentPathIndexingAnalyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer source = new KeywordTokenizer(reader);
            return new TokenStreamComponents(source);
        }
    };
    this.parentPathSearchingAnalyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer source = new KeywordTokenizer(reader);
            TokenStream filter = new ReverseStringFilter(Version.LUCENE_47, source);
            filter = new PatternReplaceFilter(filter, Pattern.compile("[^\\/]+\\/"), "", false);
            filter = new ReverseStringFilter(Version.LUCENE_47, filter);
            return new TokenStreamComponents(source, filter);
        }
    };

    this.directChildrenPathIndexingAnalyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer source = new KeywordTokenizer(reader);
            TokenStream filter = new ReverseStringFilter(Version.LUCENE_47, source);
            filter = new LengthFilter(Version.LUCENE_47, filter, 2, Integer.MAX_VALUE);
            filter = new PatternReplaceFilter(filter, Pattern.compile("([^\\/]+)(\\/)"), "$2", false);
            filter = new PatternReplaceFilter(filter, Pattern.compile("(\\/)(.+)"), "$2", false);
            filter = new ReverseStringFilter(Version.LUCENE_47, filter);
            return new TokenStreamComponents(source, filter);
        }
    };
    this.directChildrenPathSearchingAnalyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer source = new KeywordTokenizer(reader);
            return new TokenStreamComponents(source);
        }
    };

    this.allChildrenPathIndexingAnalyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer source = new PathHierarchyTokenizer(reader);
            TokenStream filter = new PatternCaptureGroupTokenFilter(source, false,
                    Pattern.compile("((\\/).*)"));
            filter = new RemoveDuplicatesTokenFilter(filter);
            return new TokenStreamComponents(source, filter);
        }
    };
    this.allChildrenPathSearchingAnalyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer source = new KeywordTokenizer(reader);
            return new TokenStreamComponents(source);
        }
    };
}

From source file:org.elasticsearch.analysis.common.MultiplexerTokenFilterFactory.java

License:Apache License

@Override
public TokenStream create(TokenStream tokenStream) {
    List<Function<TokenStream, TokenStream>> functions = new ArrayList<>();
    for (TokenFilterFactory tff : filters) {
        functions.add(tff::create);// w ww. j a va2 s .  co  m
    }
    return new RemoveDuplicatesTokenFilter(new MultiplexTokenFilter(tokenStream, functions));
}

From source file:org.elasticsearch.analysis.common.RemoveDuplicatesTokenFilterFactory.java

License:Apache License

@Override
public TokenStream create(TokenStream tokenStream) {
    return new RemoveDuplicatesTokenFilter(tokenStream);
}

From source file:org.elasticsearch.index.analysis.RemoveDuplicatesTokenFilterFactory.java

License:Apache License

@Override
public RemoveDuplicatesTokenFilter create(TokenStream input) {
    return new RemoveDuplicatesTokenFilter(input);
}

From source file:org.elasticsearch.index.analysis.skos.SKOSAnalyzer.java

License:Apache License

@Override
protected TokenStreamComponents createComponents(String fileName, Reader reader) {
    if (expansionType.equals(ExpansionType.URI)) {
        final KeywordTokenizer src = new KeywordTokenizer(reader);
        TokenStream tok = new SKOSURIFilter(src, skosEngine, new StandardAnalyzer(matchVersion), types);
        tok = new LowerCaseFilter(matchVersion, tok);
        return new TokenStreamComponents(src, tok);
    } else {// w  w w.j  av a 2  s . co  m
        final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
        src.setMaxTokenLength(maxTokenLength);
        TokenStream tok = new StandardFilter(matchVersion, src);
        // prior to this we get the classic behavior, standardfilter does it for
        // us.
        tok = new SKOSLabelFilter(tok, skosEngine, new StandardAnalyzer(matchVersion), bufferSize, types);
        tok = new LowerCaseFilter(matchVersion, tok);
        tok = new StopFilter(matchVersion, tok, stopwords);
        tok = new RemoveDuplicatesTokenFilter(tok);
        return new TokenStreamComponents(src, tok) {
            @Override
            protected boolean reset(final Reader reader) throws IOException {
                src.setMaxTokenLength(maxTokenLength);
                return super.reset(reader);
            }
        };
    }
}

From source file:org.meresco.lucene.analysis.MerescoDutchStemmingAnalyzer.java

License:Open Source License

@Override
public TokenStream post_analyzer(String fieldName, TokenStream tok) {
    if (stemmingFields != null && stemmingFields.indexOf(fieldName) == -1)
        return tok;
    tok = new KeywordRepeatFilter(tok); // repeat every word as term and as keyword
    tok = new SnowballFilter(tok, new DutchStemmer()); // ignores keywords
    tok = new RemoveDuplicatesTokenFilter(tok); // removes one if keyword and term are still the same
    return tok;/*www  .  j a  v a  2 s. c om*/
}

From source file:org.xbib.elasticsearch.index.analysis.skos.SKOSAnalyzer.java

License:Apache License

@Override
protected TokenStreamComponents createComponents(String fileName, Reader reader) {
    if (expansionType.equals(ExpansionType.URI)) {
        final KeywordTokenizer src = new KeywordTokenizer(reader);
        TokenStream tok = new SKOSURIFilter(src, skosEngine, new StandardAnalyzer(), types);
        tok = new LowerCaseFilter(tok);
        return new TokenStreamComponents(src, tok);
    } else {//  ww w . java 2 s .  co  m
        final StandardTokenizer src = new StandardTokenizer(reader);
        src.setMaxTokenLength(maxTokenLength);
        TokenStream tok = new StandardFilter(src);
        // prior to this we get the classic behavior, standardfilter does it for
        // us.
        tok = new SKOSLabelFilter(tok, skosEngine, new StandardAnalyzer(), bufferSize, types);
        tok = new LowerCaseFilter(tok);
        tok = new StopFilter(tok, stopwords);
        tok = new RemoveDuplicatesTokenFilter(tok);
        return new TokenStreamComponents(src, tok) {
            @Override
            protected void setReader(final Reader reader) throws IOException {
                src.setMaxTokenLength(maxTokenLength);
                super.setReader(reader);
            }
        };
    }
}