Example usage for org.apache.lucene.analysis.synonym WordnetSynonymParser WordnetSynonymParser

List of usage examples for org.apache.lucene.analysis.synonym WordnetSynonymParser WordnetSynonymParser

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.synonym WordnetSynonymParser WordnetSynonymParser.

Prototype

public WordnetSynonymParser(boolean dedup, boolean expand, Analyzer analyzer) 

Source Link

Usage

From source file:com.github.le11.nls.solr.TypeAwareSynonymFilterFactory.java

License:Apache License

/**
 * Load synonyms from the wordnet format, "format=wordnet".
 */// ww w . j a  va  2 s.  co  m
private SynonymMap loadWordnetSynonyms(ResourceLoader loader, boolean dedup, Analyzer analyzer)
        throws IOException, ParseException {
    final boolean expand = getBoolean("expand", true);
    String synonyms = args.get("synonyms");
    if (synonyms == null)
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Missing required argument 'synonyms'.");

    CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder().onMalformedInput(CodingErrorAction.REPORT)
            .onUnmappableCharacter(CodingErrorAction.REPORT);

    WordnetSynonymParser parser = new WordnetSynonymParser(dedup, expand, analyzer);
    File synonymFile = new File(synonyms);
    if (synonymFile.exists()) {
        decoder.reset();
        parser.add(new InputStreamReader(loader.openResource(synonyms), decoder));
    } else {
        List<String> files = StrUtils.splitFileNames(synonyms);
        for (String file : files) {
            decoder.reset();
            parser.add(new InputStreamReader(loader.openResource(file), decoder));
        }
    }
    return parser.build();
}

From source file:org.elasticsearch.index.analysis.SynonymTokenFilterFactory.java

License:Apache License

@Inject
public SynonymTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env,
        IndicesAnalysisService indicesAnalysisService, Map<String, TokenizerFactoryFactory> tokenizerFactories,
        @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettings, name, settings);

    Reader rulesReader = null;// ww  w  .  ja va 2  s  .  com
    if (settings.getAsArray("synonyms", null) != null) {
        List<String> rules = Analysis.getWordList(env, settings, "synonyms");
        StringBuilder sb = new StringBuilder();
        for (String line : rules) {
            sb.append(line).append(System.getProperty("line.separator"));
        }
        rulesReader = new FastStringReader(sb.toString());
    } else if (settings.get("synonyms_path") != null) {
        rulesReader = Analysis.getReaderFromFile(env, settings, "synonyms_path");
    } else {
        throw new ElasticsearchIllegalArgumentException(
                "synonym requires either `synonyms` or `synonyms_path` to be configured");
    }

    this.ignoreCase = settings.getAsBoolean("ignore_case", false);
    boolean expand = settings.getAsBoolean("expand", true);

    String tokenizerName = settings.get("tokenizer", "whitespace");

    TokenizerFactoryFactory tokenizerFactoryFactory = tokenizerFactories.get(tokenizerName);
    if (tokenizerFactoryFactory == null) {
        tokenizerFactoryFactory = indicesAnalysisService.tokenizerFactoryFactory(tokenizerName);
    }
    if (tokenizerFactoryFactory == null) {
        throw new ElasticsearchIllegalArgumentException(
                "failed to find tokenizer [" + tokenizerName + "] for synonym token filter");
    }
    final TokenizerFactory tokenizerFactory = tokenizerFactoryFactory.create(tokenizerName, settings);

    Analyzer analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer tokenizer = tokenizerFactory == null
                    ? new WhitespaceTokenizer(Lucene.ANALYZER_VERSION, reader)
                    : tokenizerFactory.create(reader);
            TokenStream stream = ignoreCase ? new LowerCaseFilter(Lucene.ANALYZER_VERSION, tokenizer)
                    : tokenizer;
            return new TokenStreamComponents(tokenizer, stream);
        }
    };

    try {
        SynonymMap.Builder parser = null;

        if ("wordnet".equalsIgnoreCase(settings.get("format"))) {
            parser = new WordnetSynonymParser(true, expand, analyzer);
            ((WordnetSynonymParser) parser).parse(rulesReader);
        } else {
            parser = new SolrSynonymParser(true, expand, analyzer);
            ((SolrSynonymParser) parser).parse(rulesReader);
        }

        synonymMap = parser.build();
    } catch (Exception e) {
        throw new ElasticsearchIllegalArgumentException("failed to build synonyms", e);
    }
}

From source file:org.elasticsearch.plugin.analysis.SynonymWithPayloadsTokenFilterFactory.java

License:Apache License

@Inject
public SynonymWithPayloadsTokenFilterFactory(Index index, @IndexSettings Settings indexSettings,
        Environment env, IndicesAnalysisService indicesAnalysisService,
        Map<String, TokenizerFactoryFactory> tokenizerFactories, @Assisted String name,
        @Assisted Settings settings) {// w  w  w  .  ja va2s.  co m
    super(index, indexSettings, name, settings);
    Reader rulesReader = null;
    if (settings.getAsArray("synonyms", null) != null) {
        List<String> rules = Analysis.getWordList(env, settings, "synonyms");
        StringBuilder sb = new StringBuilder();
        for (String line : rules) {
            sb.append(line).append(System.getProperty("line.separator"));
        }
        rulesReader = new FastStringReader(sb.toString());
    } else if (settings.get("synonyms_path") != null) {
        rulesReader = Analysis.getReaderFromFile(env, settings, "synonyms_path");
    } else {
        throw new ElasticsearchIllegalArgumentException(
                "synonym requires either `synonyms` or `synonyms_path` to be configured");
    }
    this.ignoreCase = settings.getAsBoolean("ignore_case", false);
    boolean expand = settings.getAsBoolean("expand", true);
    String tokenizerName = settings.get("tokenizer", "whitespace");
    TokenizerFactoryFactory tokenizerFactoryFactory = tokenizerFactories.get(tokenizerName);
    if (tokenizerFactoryFactory == null) {
        tokenizerFactoryFactory = indicesAnalysisService.tokenizerFactoryFactory(tokenizerName);
    }
    if (tokenizerFactoryFactory == null) {
        throw new ElasticsearchIllegalArgumentException(
                "failed to find tokenizer [" + tokenizerName + "] for synonym token filter");
    }
    final TokenizerFactory tokenizerFactory = tokenizerFactoryFactory.create(tokenizerName, indexSettings);
    Analyzer analyzer = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer tokenizer = tokenizerFactory == null
                    ? new WhitespaceTokenizer(Lucene.ANALYZER_VERSION, reader)
                    : tokenizerFactory.create(reader);
            TokenStream stream = ignoreCase ? new LowerCaseFilter(Lucene.ANALYZER_VERSION, tokenizer)
                    : tokenizer;
            return new TokenStreamComponents(tokenizer, stream);
        }
    };
    try {
        SynonymMap.Builder parser = null;
        if ("wordnet".equalsIgnoreCase(settings.get("format"))) {
            parser = new WordnetSynonymParser(true, expand, analyzer);
            ((WordnetSynonymParser) parser).parse(rulesReader);
        } else {
            parser = new SolrSynonymParser(true, expand, analyzer);
            ((SolrSynonymParser) parser).parse(rulesReader);
        }
        synonymMap = parser.build();
    } catch (Exception e) {
        throw new ElasticsearchIllegalArgumentException("failed to build synonyms", e);
    }
}

From source file:pl.litwiniuk.rowicki.modsynonyms.ModificatedFSTSynonymFilterFactory.java

License:Apache License

/**
 * Load synonyms from the wordnet format, "format=wordnet".
 *///from   ww  w  .  j a v  a  2  s .c o  m
private SynonymMap loadWordnetSynonyms(ResourceLoader loader, boolean dedup, Analyzer analyzer)
        throws IOException, ParseException {
    CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder().onMalformedInput(CodingErrorAction.REPORT)
            .onUnmappableCharacter(CodingErrorAction.REPORT);

    WordnetSynonymParser parser = new WordnetSynonymParser(dedup, expand, analyzer);
    File synonymFile = new File(synonyms);
    if (synonymFile.exists()) {
        decoder.reset();
        parser.add(new InputStreamReader(loader.openResource(synonyms), decoder));
    } else {
        List<String> files = splitFileNames(synonyms);
        for (String file : files) {
            decoder.reset();
            parser.add(new InputStreamReader(loader.openResource(file), decoder));
        }
    }
    return parser.build();
}