Example usage for org.apache.lucene.analysis.miscellaneous RemoveDuplicatesTokenFilterFactory RemoveDuplicatesTokenFilterFactory

List of usage examples for org.apache.lucene.analysis.miscellaneous RemoveDuplicatesTokenFilterFactory RemoveDuplicatesTokenFilterFactory

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.miscellaneous RemoveDuplicatesTokenFilterFactory RemoveDuplicatesTokenFilterFactory.

Prototype

public RemoveDuplicatesTokenFilterFactory(Map<String, String> args) 

Source Link

Document

Creates a new RemoveDuplicatesTokenFilterFactory

Usage

From source file:com.basistech.IndexFiles.java

License:Open Source License

static Analyzer setupAnalyzer(final boolean filters) {
    Map<String, String> tokenizerFactoryArgs = Maps.newHashMap();
    tokenizerFactoryArgs.put("rlpContext", "rlp-context.xml");
    tokenizerFactoryArgs.put("lang", "kor");
    tokenizerFactoryArgs.put("postLemma", "true");
    tokenizerFactoryArgs.put("postCompoundComponents", "true");
    tokenizerFactoryArgs.put("postPartOfSpeech", "true");

    final RLPTokenizerFactory tokenizerFactory = new RLPTokenizerFactory(tokenizerFactoryArgs);
    Map<String, String> emptyOptions = Maps.newHashMap();
    final HTMLStripCharFilterFactory charFilterFactory = new HTMLStripCharFilterFactory(emptyOptions);
    final ICUFoldingFilterFactory foldingFilterFactory = new ICUFoldingFilterFactory(emptyOptions);
    final ReversedWildcardFilterFactory reversedWildcardFilterFactory = new ReversedWildcardFilterFactory(
            emptyOptions);//from  ww w  .  j a  va 2s.c  o  m
    final RemoveDuplicatesTokenFilterFactory removeDuplicatesTokenFilterFactory = new RemoveDuplicatesTokenFilterFactory(
            emptyOptions);

    return new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            final Tokenizer source = tokenizerFactory.create(reader);
            if (filters) {
                TokenStream filter = foldingFilterFactory.create(source);
                filter = reversedWildcardFilterFactory.create(filter);
                filter = removeDuplicatesTokenFilterFactory.create(filter);
                return new TokenStreamComponents(source, filter);
            } else {
                return new TokenStreamComponents(source);
            }
        }

        @Override
        protected Reader initReader(String fieldName, Reader reader) {
            if (filters) {
                return charFilterFactory.create(reader);
            } else {
                return reader;
            }
        }
    };
}

From source file:org.apache.solr.analysis.ko.TestKoreanTokenizerFactory.java

License:Apache License

@Override
public void setUp() throws Exception {
    super.setUp();
    // initCore();
    Map<String, String> args = new HashMap<>();
    Map<String, String> kfArgs = new HashMap<>();
    {/* w  w  w .  j a v  a 2 s .  c  o  m*/
        kfArgs.put("hasOrigin", "true");
        kfArgs.put("hasCNoun", "true");
        kfArgs.put("bigrammable", "false");
        kfArgs.put("queryMode", "false");
    }

    kt = new KoreanTokenizerFactory(args);
    lc = new LowerCaseFilterFactory(args);
    kf = new KoreanFilterFactory(kfArgs);
    hmf = new HanjaMappingFilterFactory(args);
    kmf = new KeywordMarkerFilterFactory(args);
    pdf = new PunctuationDelimitFilterFactory(args);
    krf = new KeywordRepeatFilterFactory(args);
    epf = new EnglishPossessiveFilterFactory(args);
    psf = new PorterStemFilterFactory(args);
    rdt = new RemoveDuplicatesTokenFilterFactory(args);
}