List of usage examples for org.apache.lucene.analysis.miscellaneous WordDelimiterFilterFactory WordDelimiterFilterFactory
public WordDelimiterFilterFactory(Map<String, String> args)
From source file:com.xiaomi.linden.lucene.analyzer.LindenWordDelimiterAnalyzer.java
License:Apache License
/** * generateWordParts/*from w w w . jav a 2s . c o m*/ * Causes parts of words to be generated: * <p/> * "PowerShot" => "Power" "Shot" * <p> * generateNumberParts * Causes number subwords to be generated: * <p/> * "500-42" => "500" "42" * <p> * catenateWords * Causes maximum runs of word parts to be catenated: * <p/> * "wi-fi" => "wifi" * <p> * catenateNumbers * Causes maximum runs of word parts to be catenated: * <p/> * "500-42" => "50042" * <p> * catenateAll * Causes all subword parts to be catenated: * <p/> * "wi-fi-4000" => "wifi4000" * <p> * preserveOriginal * Causes original words are preserved and added to the subword list (Defaults to false) * <p/> * "500-42" => "500" "42" "500-42" * <p> * splitOnCaseChange * If not set, causes case changes to be ignored (subwords will only be generated * given SUBWORD_DELIM tokens) * <p> * splitOnNumerics * If not set, causes numeric changes to be ignored (subwords will only be generated * given SUBWORD_DELIM tokens). * <p> * stemEnglishPossessive * Causes trailing "'s" to be removed for each subword * <p/> * "O'Neil's" => "O", "Neil" */ public LindenWordDelimiterAnalyzer(Map<String, String> params) { if (params.containsKey(SET_STOP_WORDS)) { this.setStopWords = Boolean.parseBoolean(params.get(SET_STOP_WORDS)); params.remove(SET_STOP_WORDS); } if (params.containsKey(TO_LOWER_CASE)) { this.toLowerCase = Boolean.parseBoolean(params.get(TO_LOWER_CASE)); params.remove(TO_LOWER_CASE); } factoryDefault = new WordDelimiterFilterFactory(params); }
From source file:org.apache.solr.analysis.TestWordDelimiterFilterFactory.java
License:Apache License
@Test public void testCustomTypes() throws Exception { String testText = "I borrowed $5,400.00 at 25% interest-rate"; ResourceLoader loader = new SolrResourceLoader("solr/collection1"); Map<String, String> args = new HashMap<String, String>(); args.put("generateWordParts", "1"); args.put("generateNumberParts", "1"); args.put("catenateWords", "1"); args.put("catenateNumbers", "1"); args.put("catenateAll", "0"); args.put("splitOnCaseChange", "1"); /* default behavior */ WordDelimiterFilterFactory factoryDefault = new WordDelimiterFilterFactory(args); factoryDefault.inform(loader);// w w w . j a va2s . c o m TokenStream ts = factoryDefault .create(new MockTokenizer(new StringReader(testText), MockTokenizer.WHITESPACE, false)); BaseTokenStreamTestCase.assertTokenStreamContents(ts, new String[] { "I", "borrowed", "5", "400", "00", "540000", "at", "25", "interest", "rate", "interestrate" }); ts = factoryDefault .create(new MockTokenizer(new StringReader("foo\u200Dbar"), MockTokenizer.WHITESPACE, false)); BaseTokenStreamTestCase.assertTokenStreamContents(ts, new String[] { "foo", "bar", "foobar" }); /* custom behavior */ args = new HashMap<String, String>(); // use a custom type mapping args.put("generateWordParts", "1"); args.put("generateNumberParts", "1"); args.put("catenateWords", "1"); args.put("catenateNumbers", "1"); args.put("catenateAll", "0"); args.put("splitOnCaseChange", "1"); args.put("types", "wdftypes.txt"); WordDelimiterFilterFactory factoryCustom = new WordDelimiterFilterFactory(args); factoryCustom.inform(loader); ts = factoryCustom.create(new MockTokenizer(new StringReader(testText), MockTokenizer.WHITESPACE, false)); BaseTokenStreamTestCase.assertTokenStreamContents(ts, new String[] { "I", "borrowed", "$5,400.00", "at", "25%", "interest", "rate", "interestrate" }); /* test custom behavior with a char > 0x7F, because we had to make a larger byte[] */ ts = factoryCustom .create(new MockTokenizer(new StringReader("foo\u200Dbar"), MockTokenizer.WHITESPACE, false)); BaseTokenStreamTestCase.assertTokenStreamContents(ts, new String[] { "foo\u200Dbar" }); }
From source file:uk.gov.nationalarchives.discovery.taxonomy.common.config.LuceneIAViewConfiguration.java
License:Mozilla Public License
public @Bean WordDelimiterFilterFactory wordDelimiterFilterFactory() { Map<String, String> wordDelimiterFilterArgs = new HashMap<String, String>(); wordDelimiterFilterArgs.put("preserveOriginal", "1"); wordDelimiterFilterArgs.put("generateWordParts", "1"); wordDelimiterFilterArgs.put("catenateWords", "1"); wordDelimiterFilterArgs.put("luceneMatchVersion", version); WordDelimiterFilterFactory wordDelimiterFilterFactory = new WordDelimiterFilterFactory( wordDelimiterFilterArgs);/* www. j a v a2 s .com*/ try { ResourceLoader loader = new ClasspathResourceLoader(getClass()); wordDelimiterFilterFactory.inform(loader); } catch (IOException e) { logger.error(".wordDelimiterFilterFactory: an error occured while creating the Filter factory: {}", e.getMessage()); } return wordDelimiterFilterFactory; }