Example usage for org.apache.lucene.analysis MockTokenFilter MockTokenFilter

List of usage examples for org.apache.lucene.analysis MockTokenFilter MockTokenFilter

Introduction

In this page you can find the example usage for org.apache.lucene.analysis MockTokenFilter MockTokenFilter.

Prototype

public MockTokenFilter(TokenStream input, CharacterRunAutomaton filter) 

Source Link

Document

Create a new MockTokenFilter.

Usage

From source file:org.apache.solr.analysis.MockTokenFilterFactory.java

License:Apache License

@Override
public MockTokenFilter create(TokenStream stream) {
    return new MockTokenFilter(stream, filter);
}

From source file:org.elasticsearch.action.admin.indices.TransportAnalyzeActionTests.java

License:Apache License

@Override
public void setUp() throws Exception {
    super.setUp();
    Settings settings = Settings.builder()
            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();

    Settings indexSettings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
            .put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID())
            .put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
            .put("index.analysis.analyzer.custom_analyzer.filter", "mock")
            .put("index.analysis.normalizer.my_normalizer.type", "custom")
            .putList("index.analysis.normalizer.my_normalizer.filter", "lowercase").build();
    IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
    environment = new Environment(settings);
    AnalysisPlugin plugin = new AnalysisPlugin() {
        class MockFactory extends AbstractTokenFilterFactory {
            MockFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
                super(indexSettings, name, settings);
            }//from   ww w .  j a  va  2  s  .  c  om

            @Override
            public TokenStream create(TokenStream tokenStream) {
                return new MockTokenFilter(tokenStream, MockTokenFilter.ENGLISH_STOPSET);
            }
        }

        class AppendCharFilterFactory extends AbstractCharFilterFactory {
            AppendCharFilterFactory(IndexSettings indexSettings, Environment environment, String name,
                    Settings settings) {
                super(indexSettings, name);
            }

            @Override
            public Reader create(Reader reader) {
                return new AppendCharFilter(reader, "bar");
            }
        }

        @Override
        public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
            return singletonMap("append", AppendCharFilterFactory::new);
        }

        @Override
        public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
            return singletonMap("mock", MockFactory::new);
        }

        @Override
        public List<PreConfiguredCharFilter> getPreConfiguredCharFilters() {
            return singletonList(PreConfiguredCharFilter.singleton("append_foo", false,
                    reader -> new AppendCharFilter(reader, "foo")));
        }
    };
    registry = new AnalysisModule(environment, singletonList(plugin)).getAnalysisRegistry();
    indexAnalyzers = registry.build(idxSettings);
}

From source file:org.tallison.lucene.queryparser.spans.TestSpanOnlyQueryParser.java

License:Apache License

@BeforeClass
public static void beforeClass() throws Exception {

    lcMultiTermAnalyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, true);
    noopMultiTermAnalyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
    noStopAnalyzer = new Analyzer() {
        @Override//from w ww  . j a  v a2  s . c  o m
        public TokenStream normalize(String fieldName, TokenStream in) {
            return new MockStandardTokenizerFilter(new LowerCaseFilter(in));
        }

        @Override
        public TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true);
            TokenFilter filter = new MockStandardTokenizerFilter(tokenizer);
            return new TokenStreamComponents(tokenizer, filter);
        }
    };

    stopAnalyzer = new Analyzer() {
        @Override
        public TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true);
            TokenFilter filter = new MockStandardTokenizerFilter(tokenizer);
            filter = new MockTokenFilter(filter, STOP_WORDS);
            return new TokenStreamComponents(tokenizer, filter);
        }

        @Override
        protected TokenStream normalize(String fieldName, TokenStream in) {
            return new LowerCaseFilter(in);
        }
    };

    directory = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(stopAnalyzer)
            .setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000)).setMergePolicy(newLogMergePolicy()));
    String[] docs = new String[] { "the quick brown fox ",
            "jumped over the lazy brown dog and the brown green cat", "quick green fox", "abcdefghijk",
            "over green lazy",
            // longish doc for recursion test
            "eheu fugaces postume postume labuntur anni nec " + "pietas moram rugis et instanti senectae "
                    + "adferet indomitaeque morti",
            // non-whitespace language
            "\u666E \u6797 \u65AF \u987F \u5927 \u5B66", "reg/exp", "/regex/", "fuzzy~2", "wil*card",
            "wil?card", "prefi*", "single'quote"

    };

    for (int i = 0; i < docs.length; i++) {
        Document doc = new Document();
        doc.add(newTextField(FIELD, docs[i], Field.Store.YES));
        writer.addDocument(doc);
    }
    reader = writer.getReader();
    searcher = newSearcher(reader);
    writer.close();
}

From source file:org.tallison.lucene.search.concordance.ConcordanceTestBase.java

License:Apache License

public static Analyzer getAnalyzer(final CharacterRunAutomaton stops, final int posIncGap,
        final int charOffsetGap) {

    Analyzer analyzer = new Analyzer() {

        @Override/*from  www.  j a  v  a2  s .  com*/
        public TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true);
            TokenFilter filter = new MockTokenFilter(tokenizer, stops);
            return new TokenStreamComponents(tokenizer, filter);
        }

        @Override
        public int getPositionIncrementGap(String fieldName) {
            return posIncGap;
        }

        @Override
        public int getOffsetGap(String fieldName) {
            return charOffsetGap;
        }
    };
    return analyzer;
}

From source file:org.tallison.lucene.search.concordance.ConcordanceTestBase.java

License:Apache License

public static Analyzer getBigramAnalyzer(final CharacterRunAutomaton stops, final int posIncGap,
        final int charOffsetGap, final boolean includeUnigrams) {
    return new Analyzer() {

        @Override/* w ww . ja va2s  . co m*/
        public TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true);
            TokenFilter filter = new MockTokenFilter(tokenizer, stops);
            filter = new MockBigramFilter(filter, includeUnigrams);
            return new TokenStreamComponents(tokenizer, filter);
        }

        @Override
        public int getPositionIncrementGap(String fieldName) {
            return posIncGap;
        }

        @Override
        public int getOffsetGap(String fieldName) {
            return charOffsetGap;
        }
    };

}