List of usage examples for org.apache.lucene.analysis.miscellaneous WordDelimiterFilterFactory inform
@Override
public void inform(ResourceLoader loader) throws IOException
From source file:org.apache.solr.analysis.TestWordDelimiterFilterFactory.java
License:Apache License
@Test public void testCustomTypes() throws Exception { String testText = "I borrowed $5,400.00 at 25% interest-rate"; ResourceLoader loader = new SolrResourceLoader("solr/collection1"); Map<String, String> args = new HashMap<String, String>(); args.put("generateWordParts", "1"); args.put("generateNumberParts", "1"); args.put("catenateWords", "1"); args.put("catenateNumbers", "1"); args.put("catenateAll", "0"); args.put("splitOnCaseChange", "1"); /* default behavior */ WordDelimiterFilterFactory factoryDefault = new WordDelimiterFilterFactory(args); factoryDefault.inform(loader); TokenStream ts = factoryDefault//from w w w . ja v a 2s . c o m .create(new MockTokenizer(new StringReader(testText), MockTokenizer.WHITESPACE, false)); BaseTokenStreamTestCase.assertTokenStreamContents(ts, new String[] { "I", "borrowed", "5", "400", "00", "540000", "at", "25", "interest", "rate", "interestrate" }); ts = factoryDefault .create(new MockTokenizer(new StringReader("foo\u200Dbar"), MockTokenizer.WHITESPACE, false)); BaseTokenStreamTestCase.assertTokenStreamContents(ts, new String[] { "foo", "bar", "foobar" }); /* custom behavior */ args = new HashMap<String, String>(); // use a custom type mapping args.put("generateWordParts", "1"); args.put("generateNumberParts", "1"); args.put("catenateWords", "1"); args.put("catenateNumbers", "1"); args.put("catenateAll", "0"); args.put("splitOnCaseChange", "1"); args.put("types", "wdftypes.txt"); WordDelimiterFilterFactory factoryCustom = new WordDelimiterFilterFactory(args); factoryCustom.inform(loader); ts = factoryCustom.create(new MockTokenizer(new StringReader(testText), MockTokenizer.WHITESPACE, false)); BaseTokenStreamTestCase.assertTokenStreamContents(ts, new String[] { "I", "borrowed", "$5,400.00", "at", "25%", "interest", "rate", "interestrate" }); /* test custom behavior with a char > 0x7F, because we had to make a larger byte[] */ ts = factoryCustom .create(new MockTokenizer(new StringReader("foo\u200Dbar"), MockTokenizer.WHITESPACE, false)); BaseTokenStreamTestCase.assertTokenStreamContents(ts, new String[] { "foo\u200Dbar" }); }
From source file:uk.gov.nationalarchives.discovery.taxonomy.common.config.LuceneIAViewConfiguration.java
License:Mozilla Public License
public @Bean WordDelimiterFilterFactory wordDelimiterFilterFactory() { Map<String, String> wordDelimiterFilterArgs = new HashMap<String, String>(); wordDelimiterFilterArgs.put("preserveOriginal", "1"); wordDelimiterFilterArgs.put("generateWordParts", "1"); wordDelimiterFilterArgs.put("catenateWords", "1"); wordDelimiterFilterArgs.put("luceneMatchVersion", version); WordDelimiterFilterFactory wordDelimiterFilterFactory = new WordDelimiterFilterFactory( wordDelimiterFilterArgs);/*from w ww.ja v a 2 s . c o m*/ try { ResourceLoader loader = new ClasspathResourceLoader(getClass()); wordDelimiterFilterFactory.inform(loader); } catch (IOException e) { logger.error(".wordDelimiterFilterFactory: an error occured while creating the Filter factory: {}", e.getMessage()); } return wordDelimiterFilterFactory; }