List of usage examples for org.apache.lucene.analysis.core LowerCaseFilterFactory LowerCaseFilterFactory
public LowerCaseFilterFactory(Map<String, String> args)
From source file:org.aksw.palmetto.corpus.lucene.SimpleAnalyzer.java
License:Open Source License
public SimpleAnalyzer(boolean lowerCase) { Map<String, String> parameters = new HashMap<String, String>(); parameters.put(PatternTokenizerFactory.PATTERN, PATTERN); parameters.put(PatternTokenizerFactory.GROUP, "0"); parameters.put(AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM, version.name()); tokenizerFactory = new PatternTokenizerFactory(parameters); if (lowerCase) { parameters = new HashMap<String, String>(); parameters.put(AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM, version.name()); lowerCaseFilterFactory = new LowerCaseFilterFactory(parameters); } else {/* w ww.j a v a2s. c om*/ lowerCaseFilterFactory = null; } }
From source file:org.apache.solr.analysis.ko.TestKoreanTokenizerFactory.java
License:Apache License
@Override public void setUp() throws Exception { super.setUp(); // initCore(); Map<String, String> args = new HashMap<>(); Map<String, String> kfArgs = new HashMap<>(); {// w w w . ja v a2 s. co m kfArgs.put("hasOrigin", "true"); kfArgs.put("hasCNoun", "true"); kfArgs.put("bigrammable", "false"); kfArgs.put("queryMode", "false"); } kt = new KoreanTokenizerFactory(args); lc = new LowerCaseFilterFactory(args); kf = new KoreanFilterFactory(kfArgs); hmf = new HanjaMappingFilterFactory(args); kmf = new KeywordMarkerFilterFactory(args); pdf = new PunctuationDelimitFilterFactory(args); krf = new KeywordRepeatFilterFactory(args); epf = new EnglishPossessiveFilterFactory(args); psf = new PorterStemFilterFactory(args); rdt = new RemoveDuplicatesTokenFilterFactory(args); }
From source file:org.apache.solr.analysis.TokenizerChainTest.java
License:Apache License
@Test public void testNormalization() throws Exception { String fieldName = "f"; TokenFilterFactory[] tff = new TokenFilterFactory[2]; tff[0] = new LowerCaseFilterFactory(Collections.EMPTY_MAP); tff[1] = new ASCIIFoldingFilterFactory(Collections.EMPTY_MAP); TokenizerChain tokenizerChain = new TokenizerChain(new MockTokenizerFactory(Collections.EMPTY_MAP), tff); assertEquals(new BytesRef("fooba"), tokenizerChain.normalize(fieldName, "FOOB\u00c4")); }