List of usage examples for org.apache.lucene.analysis.synonym SynonymFilter SynonymFilter
public SynonymFilter(TokenStream input, SynonymMap synonyms, boolean ignoreCase)
From source file:brightsolid.solr.plugins.TestTargetPositionQuerySynonyms.java
License:Apache License
@Override public void setUp() throws Exception { super.setUp(); String testFile = "one, uno, un\n" + "two, dos, too\n" + "three, free, tres"; SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random())); parser.parse(new StringReader(testFile)); final SynonymMap map = parser.build(); Analyzer analyzer = new Analyzer() { @Override//from w w w . ja v a2 s.co m protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, false)); } }; directory = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), directory, analyzer); Document doc = new Document(); FieldType newType = new FieldType(org.apache.lucene.document.TextField.TYPE_STORED); newType.setOmitNorms(true); Field field = newField("field", "", newType); field.fieldType().setOmitNorms(true); doc.add(field); field.setStringValue("one two three"); iw.addDocument(doc); field.setStringValue("two three one"); iw.addDocument(doc); field.setStringValue("three one two"); iw.addDocument(doc); reader = iw.getReader(); iw.close(); searcher = newSearcher(reader); }
From source file:com.bizosys.unstructured.CustomAnalyzerExample.java
License:Apache License
@Override public TokenStream tokenStream(String field, Reader reader) { Tokenizer tokenizer = new StandardTokenizer(Version.LUCENE_36, reader); TokenStream ts = new LowerCaseFilter(Version.LUCENE_36, tokenizer); ts = new PorterStemFilter(ts); Set<String> stopwords = new HashSet<String>(); stopwords.add("a"); stopwords.add("in"); ts = new StopFilter(Version.LUCENE_36, ts, stopwords); SynonymMap smap = null;//from w ww.j av a 2 s.com try { SynonymMap.Builder sb = new SynonymMap.Builder(true); String base1 = "abinash"; String syn1 = "abinasha"; String syn11 = "abinashak"; sb.add(new CharsRef(base1), new CharsRef(syn1), true); sb.add(new CharsRef(base1), new CharsRef(syn11), true); String base2 = "bangalor"; String syn2 = "bangaloru"; sb.add(new CharsRef(base2), new CharsRef(syn2), true); smap = sb.build(); } catch (IOException ex) { ex.printStackTrace(System.err); } ts = new SynonymFilter(ts, smap, true); return ts; }
From source file:com.bizosys.unstructured.StopwordAndSynonymAnalyzer.java
License:Apache License
@Override public TokenStream tokenStream(String field, Reader reader) { TokenStream ts = new HSearchTokenizer(Version.LUCENE_36, reader); ts = new LowerCaseFilter(Version.LUCENE_36, ts); SynonymMap smap = null;// www. j av a2s . c o m try { if (null != conceptWithPipeSeparatedSynonums) { SynonymMap.Builder sb = new SynonymMap.Builder(true); List<String> tempList = new ArrayList<String>(); for (String concept : conceptWithPipeSeparatedSynonums.keySet()) { tempList.clear(); LineReaderUtil.fastSplit(tempList, conceptWithPipeSeparatedSynonums.get(concept), this.conceptWordSeparator); for (String syn : tempList) { int synLen = (null == syn) ? 0 : syn.length(); if (synLen == 0) continue; sb.add(new CharsRef(syn), new CharsRef(concept), false); } } if (conceptWithPipeSeparatedSynonums.size() > 0) { smap = sb.build(); if (null != smap) ts = new SynonymFilter(ts, smap, true); } } if (isStopFilterEnabled) { int stopwordsT = (null == stopwords) ? 0 : stopwords.size(); if (stopwordsT > 0) { ts = new StopFilter(Version.LUCENE_36, ts, stopwords); } } if (isAccentFilterEnabled) ts = new ASCIIFoldingFilter(ts); if (isSnoballStemEnabled) ts = new SnowballFilter(ts, new EnglishStemmer()); return ts; } catch (IOException ex) { ex.printStackTrace(System.err); throw new NullPointerException(ex.toString()); } }
From source file:com.isotrol.impe3.lucene.PortalSpanishAnalyzer.java
License:Open Source License
@Override public TokenStream tokenStream(String fieldName, Reader reader) { TokenStream src = base.tokenStream(fieldName, reader); if (postSynonymMap != null) { return new SynonymFilter(src, postSynonymMap, true); }/*from ww w . j av a2s . c o m*/ return src; }
From source file:de.berlinbuzzwords.FrenchSynonymAnalyzer.java
License:Apache License
@Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { // Set up Tokenizer final Tokenizer source = new StandardTokenizer(matchVersion, reader); // Add filters TokenStream result = new LowerCaseFilter(matchVersion, source); // Lowercase result = new SynonymFilter(result, synonymMap, false); // Synonyms result = new FrenchLightStemFilter(result); // Stemming return new TokenStreamComponents(source, result); }
From source file:org.apache.solr.analysis.FSTSynonymFilterFactory.java
License:Apache License
public TokenStream create(TokenStream input) { // if the fst is null, it means there's actually no synonyms... just return the original stream // as there is nothing to do here. return map.fst == null ? input : new SynonymFilter(input, map, ignoreCase); }
From source file:org.apache.solr.rest.schema.analysis.FSTSynonymFilterFactory.java
License:Apache License
@Override public TokenStream create(TokenStream input) { // if the fst is null, it means there's actually no synonyms... just return the original stream // as there is nothing to do here. return map.fst == null ? input : new SynonymFilter(input, map, ignoreCase); }
From source file:org.elasticsearch.index.analysis.ESSolrSynonymParserTests.java
License:Apache License
public void testLenientParser() throws IOException, ParseException { ESSolrSynonymParser parser = new ESSolrSynonymParser(true, false, true, new StandardAnalyzer()); String rules = "&,and\n" + "come,advance,approach\n"; StringReader rulesReader = new StringReader(rules); parser.parse(rulesReader);//from w w w .ja v a2s. c om SynonymMap synonymMap = parser.build(); Tokenizer tokenizer = new StandardTokenizer(); tokenizer.setReader(new StringReader("approach quietly then advance & destroy")); TokenStream ts = new SynonymFilter(tokenizer, synonymMap, false); assertTokenStreamContents(ts, new String[] { "come", "quietly", "then", "come", "destroy" }); }
From source file:org.elasticsearch.index.analysis.ESSolrSynonymParserTests.java
License:Apache License
public void testLenientParserWithSomeIncorrectLines() throws IOException, ParseException { CharArraySet stopSet = new CharArraySet(1, true); stopSet.add("bar"); ESSolrSynonymParser parser = new ESSolrSynonymParser(true, false, true, new StandardAnalyzer(stopSet)); String rules = "foo,bar,baz"; StringReader rulesReader = new StringReader(rules); parser.parse(rulesReader);/*from w w w . j a v a 2 s .c om*/ SynonymMap synonymMap = parser.build(); Tokenizer tokenizer = new StandardTokenizer(); tokenizer.setReader(new StringReader("first word is foo, then bar and lastly baz")); TokenStream ts = new SynonymFilter(new StopFilter(tokenizer, stopSet), synonymMap, false); assertTokenStreamContents(ts, new String[] { "first", "word", "is", "foo", "then", "and", "lastly", "foo" }); }
From source file:org.elasticsearch.index.analysis.ESWordnetSynonymParserTests.java
License:Apache License
public void testLenientParser() throws IOException, ParseException { ESWordnetSynonymParser parser = new ESWordnetSynonymParser(true, false, true, new StandardAnalyzer()); String rules = "s(100000001,1,'&',a,1,0).\n" + "s(100000001,2,'and',a,1,0).\n" + "s(100000002,1,'come',v,1,0).\n" + "s(100000002,2,'advance',v,1,0).\n" + "s(100000002,3,'approach',v,1,0)."; StringReader rulesReader = new StringReader(rules); parser.parse(rulesReader);/* w ww. j a v a 2 s. co m*/ SynonymMap synonymMap = parser.build(); Tokenizer tokenizer = new StandardTokenizer(); tokenizer.setReader(new StringReader("approach quietly then advance & destroy")); TokenStream ts = new SynonymFilter(tokenizer, synonymMap, false); assertTokenStreamContents(ts, new String[] { "come", "quietly", "then", "come", "destroy" }); }