List of usage examples for org.apache.lucene.analysis MockTokenFilter MockTokenFilter
public MockTokenFilter(TokenStream input, CharacterRunAutomaton filter)
From source file:org.apache.solr.analysis.MockTokenFilterFactory.java
License:Apache License
@Override public MockTokenFilter create(TokenStream stream) { return new MockTokenFilter(stream, filter); }
From source file:org.elasticsearch.action.admin.indices.TransportAnalyzeActionTests.java
License:Apache License
@Override public void setUp() throws Exception { super.setUp(); Settings settings = Settings.builder() .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build(); Settings indexSettings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID()) .put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard") .put("index.analysis.analyzer.custom_analyzer.filter", "mock") .put("index.analysis.normalizer.my_normalizer.type", "custom") .putList("index.analysis.normalizer.my_normalizer.filter", "lowercase").build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings); environment = new Environment(settings); AnalysisPlugin plugin = new AnalysisPlugin() { class MockFactory extends AbstractTokenFilterFactory { MockFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); }//from ww w . j a va 2 s . c om @Override public TokenStream create(TokenStream tokenStream) { return new MockTokenFilter(tokenStream, MockTokenFilter.ENGLISH_STOPSET); } } class AppendCharFilterFactory extends AbstractCharFilterFactory { AppendCharFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { super(indexSettings, name); } @Override public Reader create(Reader reader) { return new AppendCharFilter(reader, "bar"); } } @Override public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() { return singletonMap("append", AppendCharFilterFactory::new); } @Override public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() { return singletonMap("mock", MockFactory::new); } @Override public List<PreConfiguredCharFilter> getPreConfiguredCharFilters() { return singletonList(PreConfiguredCharFilter.singleton("append_foo", false, reader -> new AppendCharFilter(reader, "foo"))); } }; registry = new AnalysisModule(environment, singletonList(plugin)).getAnalysisRegistry(); indexAnalyzers = registry.build(idxSettings); }
From source file:org.tallison.lucene.queryparser.spans.TestSpanOnlyQueryParser.java
License:Apache License
@BeforeClass public static void beforeClass() throws Exception { lcMultiTermAnalyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, true); noopMultiTermAnalyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false); noStopAnalyzer = new Analyzer() { @Override//from w ww . j a v a2 s . c o m public TokenStream normalize(String fieldName, TokenStream in) { return new MockStandardTokenizerFilter(new LowerCaseFilter(in)); } @Override public TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true); TokenFilter filter = new MockStandardTokenizerFilter(tokenizer); return new TokenStreamComponents(tokenizer, filter); } }; stopAnalyzer = new Analyzer() { @Override public TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true); TokenFilter filter = new MockStandardTokenizerFilter(tokenizer); filter = new MockTokenFilter(filter, STOP_WORDS); return new TokenStreamComponents(tokenizer, filter); } @Override protected TokenStream normalize(String fieldName, TokenStream in) { return new LowerCaseFilter(in); } }; directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(stopAnalyzer) .setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000)).setMergePolicy(newLogMergePolicy())); String[] docs = new String[] { "the quick brown fox ", "jumped over the lazy brown dog and the brown green cat", "quick green fox", "abcdefghijk", "over green lazy", // longish doc for recursion test "eheu fugaces postume postume labuntur anni nec " + "pietas moram rugis et instanti senectae " + "adferet indomitaeque morti", // non-whitespace language "\u666E \u6797 \u65AF \u987F \u5927 \u5B66", "reg/exp", "/regex/", "fuzzy~2", "wil*card", "wil?card", "prefi*", "single'quote" }; for (int i = 0; i < docs.length; i++) { Document doc = new Document(); doc.add(newTextField(FIELD, docs[i], Field.Store.YES)); writer.addDocument(doc); } reader = writer.getReader(); searcher = newSearcher(reader); writer.close(); }
From source file:org.tallison.lucene.search.concordance.ConcordanceTestBase.java
License:Apache License
public static Analyzer getAnalyzer(final CharacterRunAutomaton stops, final int posIncGap, final int charOffsetGap) { Analyzer analyzer = new Analyzer() { @Override/*from www. j a v a2 s . com*/ public TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true); TokenFilter filter = new MockTokenFilter(tokenizer, stops); return new TokenStreamComponents(tokenizer, filter); } @Override public int getPositionIncrementGap(String fieldName) { return posIncGap; } @Override public int getOffsetGap(String fieldName) { return charOffsetGap; } }; return analyzer; }
From source file:org.tallison.lucene.search.concordance.ConcordanceTestBase.java
License:Apache License
public static Analyzer getBigramAnalyzer(final CharacterRunAutomaton stops, final int posIncGap, final int charOffsetGap, final boolean includeUnigrams) { return new Analyzer() { @Override/* w ww . ja va2s . co m*/ public TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true); TokenFilter filter = new MockTokenFilter(tokenizer, stops); filter = new MockBigramFilter(filter, includeUnigrams); return new TokenStreamComponents(tokenizer, filter); } @Override public int getPositionIncrementGap(String fieldName) { return posIncGap; } @Override public int getOffsetGap(String fieldName) { return charOffsetGap; } }; }