Example usage for org.apache.lucene.analysis.synonym SynonymFilter SynonymFilter

List of usage examples for org.apache.lucene.analysis.synonym SynonymFilter SynonymFilter

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.synonym SynonymFilter SynonymFilter.

Prototype

public SynonymFilter(TokenStream input, SynonymMap synonyms, boolean ignoreCase) 

Source Link

Usage

From source file:brightsolid.solr.plugins.TestTargetPositionQuerySynonyms.java

License:Apache License

@Override
public void setUp() throws Exception {
    super.setUp();

    String testFile = "one, uno, un\n" + "two, dos, too\n" + "three, free, tres";

    SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random()));
    parser.parse(new StringReader(testFile));

    final SynonymMap map = parser.build();
    Analyzer analyzer = new Analyzer() {
        @Override//from   w  w  w  .  ja  v  a2 s.co m
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
            return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, false));
        }
    };

    directory = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), directory, analyzer);
    Document doc = new Document();
    FieldType newType = new FieldType(org.apache.lucene.document.TextField.TYPE_STORED);
    newType.setOmitNorms(true);
    Field field = newField("field", "", newType);
    field.fieldType().setOmitNorms(true);

    doc.add(field);

    field.setStringValue("one two three");
    iw.addDocument(doc);
    field.setStringValue("two three one");
    iw.addDocument(doc);
    field.setStringValue("three one two");
    iw.addDocument(doc);

    reader = iw.getReader();
    iw.close();
    searcher = newSearcher(reader);
}

From source file:com.bizosys.unstructured.CustomAnalyzerExample.java

License:Apache License

@Override
public TokenStream tokenStream(String field, Reader reader) {
    Tokenizer tokenizer = new StandardTokenizer(Version.LUCENE_36, reader);
    TokenStream ts = new LowerCaseFilter(Version.LUCENE_36, tokenizer);
    ts = new PorterStemFilter(ts);

    Set<String> stopwords = new HashSet<String>();
    stopwords.add("a");
    stopwords.add("in");
    ts = new StopFilter(Version.LUCENE_36, ts, stopwords);

    SynonymMap smap = null;//from w ww.j  av a 2  s.com
    try {
        SynonymMap.Builder sb = new SynonymMap.Builder(true);

        String base1 = "abinash";
        String syn1 = "abinasha";
        String syn11 = "abinashak";
        sb.add(new CharsRef(base1), new CharsRef(syn1), true);
        sb.add(new CharsRef(base1), new CharsRef(syn11), true);

        String base2 = "bangalor";
        String syn2 = "bangaloru";
        sb.add(new CharsRef(base2), new CharsRef(syn2), true);

        smap = sb.build();

    } catch (IOException ex) {
        ex.printStackTrace(System.err);
    }

    ts = new SynonymFilter(ts, smap, true);

    return ts;
}

From source file:com.bizosys.unstructured.StopwordAndSynonymAnalyzer.java

License:Apache License

@Override
public TokenStream tokenStream(String field, Reader reader) {

    TokenStream ts = new HSearchTokenizer(Version.LUCENE_36, reader);
    ts = new LowerCaseFilter(Version.LUCENE_36, ts);

    SynonymMap smap = null;// www. j  av  a2s  .  c  o  m
    try {
        if (null != conceptWithPipeSeparatedSynonums) {
            SynonymMap.Builder sb = new SynonymMap.Builder(true);
            List<String> tempList = new ArrayList<String>();

            for (String concept : conceptWithPipeSeparatedSynonums.keySet()) {
                tempList.clear();
                LineReaderUtil.fastSplit(tempList, conceptWithPipeSeparatedSynonums.get(concept),
                        this.conceptWordSeparator);
                for (String syn : tempList) {
                    int synLen = (null == syn) ? 0 : syn.length();
                    if (synLen == 0)
                        continue;
                    sb.add(new CharsRef(syn), new CharsRef(concept), false);
                }
            }
            if (conceptWithPipeSeparatedSynonums.size() > 0) {
                smap = sb.build();
                if (null != smap)
                    ts = new SynonymFilter(ts, smap, true);
            }
        }

        if (isStopFilterEnabled) {
            int stopwordsT = (null == stopwords) ? 0 : stopwords.size();
            if (stopwordsT > 0) {
                ts = new StopFilter(Version.LUCENE_36, ts, stopwords);
            }
        }

        if (isAccentFilterEnabled)
            ts = new ASCIIFoldingFilter(ts);
        if (isSnoballStemEnabled)
            ts = new SnowballFilter(ts, new EnglishStemmer());

        return ts;

    } catch (IOException ex) {
        ex.printStackTrace(System.err);
        throw new NullPointerException(ex.toString());
    }
}

From source file:com.isotrol.impe3.lucene.PortalSpanishAnalyzer.java

License:Open Source License

@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream src = base.tokenStream(fieldName, reader);
    if (postSynonymMap != null) {
        return new SynonymFilter(src, postSynonymMap, true);
    }/*from   ww  w  . j  av  a2s  .  c  o m*/
    return src;
}

From source file:de.berlinbuzzwords.FrenchSynonymAnalyzer.java

License:Apache License

@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {

    // Set up Tokenizer
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);

    // Add filters
    TokenStream result = new LowerCaseFilter(matchVersion, source); // Lowercase
    result = new SynonymFilter(result, synonymMap, false); // Synonyms
    result = new FrenchLightStemFilter(result); // Stemming

    return new TokenStreamComponents(source, result);
}

From source file:org.apache.solr.analysis.FSTSynonymFilterFactory.java

License:Apache License

public TokenStream create(TokenStream input) {
    // if the fst is null, it means there's actually no synonyms... just return the original stream
    // as there is nothing to do here.
    return map.fst == null ? input : new SynonymFilter(input, map, ignoreCase);
}

From source file:org.apache.solr.rest.schema.analysis.FSTSynonymFilterFactory.java

License:Apache License

@Override
public TokenStream create(TokenStream input) {
    // if the fst is null, it means there's actually no synonyms... just return the original stream
    // as there is nothing to do here.
    return map.fst == null ? input : new SynonymFilter(input, map, ignoreCase);
}

From source file:org.elasticsearch.index.analysis.ESSolrSynonymParserTests.java

License:Apache License

public void testLenientParser() throws IOException, ParseException {
    ESSolrSynonymParser parser = new ESSolrSynonymParser(true, false, true, new StandardAnalyzer());
    String rules = "&,and\n" + "come,advance,approach\n";
    StringReader rulesReader = new StringReader(rules);
    parser.parse(rulesReader);//from w w w  .ja  v  a2s. c om
    SynonymMap synonymMap = parser.build();
    Tokenizer tokenizer = new StandardTokenizer();
    tokenizer.setReader(new StringReader("approach quietly then advance & destroy"));
    TokenStream ts = new SynonymFilter(tokenizer, synonymMap, false);
    assertTokenStreamContents(ts, new String[] { "come", "quietly", "then", "come", "destroy" });
}

From source file:org.elasticsearch.index.analysis.ESSolrSynonymParserTests.java

License:Apache License

public void testLenientParserWithSomeIncorrectLines() throws IOException, ParseException {
    CharArraySet stopSet = new CharArraySet(1, true);
    stopSet.add("bar");
    ESSolrSynonymParser parser = new ESSolrSynonymParser(true, false, true, new StandardAnalyzer(stopSet));
    String rules = "foo,bar,baz";
    StringReader rulesReader = new StringReader(rules);
    parser.parse(rulesReader);/*from  w w w . j  a  v a  2  s  .c om*/
    SynonymMap synonymMap = parser.build();
    Tokenizer tokenizer = new StandardTokenizer();
    tokenizer.setReader(new StringReader("first word is foo, then bar and lastly baz"));
    TokenStream ts = new SynonymFilter(new StopFilter(tokenizer, stopSet), synonymMap, false);
    assertTokenStreamContents(ts,
            new String[] { "first", "word", "is", "foo", "then", "and", "lastly", "foo" });
}

From source file:org.elasticsearch.index.analysis.ESWordnetSynonymParserTests.java

License:Apache License

public void testLenientParser() throws IOException, ParseException {
    ESWordnetSynonymParser parser = new ESWordnetSynonymParser(true, false, true, new StandardAnalyzer());
    String rules = "s(100000001,1,'&',a,1,0).\n" + "s(100000001,2,'and',a,1,0).\n"
            + "s(100000002,1,'come',v,1,0).\n" + "s(100000002,2,'advance',v,1,0).\n"
            + "s(100000002,3,'approach',v,1,0).";
    StringReader rulesReader = new StringReader(rules);
    parser.parse(rulesReader);/* w  ww. j a v  a  2 s.  co m*/
    SynonymMap synonymMap = parser.build();
    Tokenizer tokenizer = new StandardTokenizer();
    tokenizer.setReader(new StringReader("approach quietly then advance & destroy"));
    TokenStream ts = new SynonymFilter(tokenizer, synonymMap, false);
    assertTokenStreamContents(ts, new String[] { "come", "quietly", "then", "come", "destroy" });
}