List of usage examples for org.apache.lucene.analysis.synonym WordnetSynonymParser WordnetSynonymParser
public WordnetSynonymParser(boolean dedup, boolean expand, Analyzer analyzer)
From source file:com.github.le11.nls.solr.TypeAwareSynonymFilterFactory.java
License:Apache License
/** * Load synonyms from the wordnet format, "format=wordnet". */// ww w . j a va 2 s. co m private SynonymMap loadWordnetSynonyms(ResourceLoader loader, boolean dedup, Analyzer analyzer) throws IOException, ParseException { final boolean expand = getBoolean("expand", true); String synonyms = args.get("synonyms"); if (synonyms == null) throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Missing required argument 'synonyms'."); CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder().onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); WordnetSynonymParser parser = new WordnetSynonymParser(dedup, expand, analyzer); File synonymFile = new File(synonyms); if (synonymFile.exists()) { decoder.reset(); parser.add(new InputStreamReader(loader.openResource(synonyms), decoder)); } else { List<String> files = StrUtils.splitFileNames(synonyms); for (String file : files) { decoder.reset(); parser.add(new InputStreamReader(loader.openResource(file), decoder)); } } return parser.build(); }
From source file:org.elasticsearch.index.analysis.SynonymTokenFilterFactory.java
License:Apache License
@Inject public SynonymTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, IndicesAnalysisService indicesAnalysisService, Map<String, TokenizerFactoryFactory> tokenizerFactories, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); Reader rulesReader = null;// ww w . ja va 2 s . com if (settings.getAsArray("synonyms", null) != null) { List<String> rules = Analysis.getWordList(env, settings, "synonyms"); StringBuilder sb = new StringBuilder(); for (String line : rules) { sb.append(line).append(System.getProperty("line.separator")); } rulesReader = new FastStringReader(sb.toString()); } else if (settings.get("synonyms_path") != null) { rulesReader = Analysis.getReaderFromFile(env, settings, "synonyms_path"); } else { throw new ElasticsearchIllegalArgumentException( "synonym requires either `synonyms` or `synonyms_path` to be configured"); } this.ignoreCase = settings.getAsBoolean("ignore_case", false); boolean expand = settings.getAsBoolean("expand", true); String tokenizerName = settings.get("tokenizer", "whitespace"); TokenizerFactoryFactory tokenizerFactoryFactory = tokenizerFactories.get(tokenizerName); if (tokenizerFactoryFactory == null) { tokenizerFactoryFactory = indicesAnalysisService.tokenizerFactoryFactory(tokenizerName); } if (tokenizerFactoryFactory == null) { throw new ElasticsearchIllegalArgumentException( "failed to find tokenizer [" + tokenizerName + "] for synonym token filter"); } final TokenizerFactory tokenizerFactory = tokenizerFactoryFactory.create(tokenizerName, settings); Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = tokenizerFactory == null ? new WhitespaceTokenizer(Lucene.ANALYZER_VERSION, reader) : tokenizerFactory.create(reader); TokenStream stream = ignoreCase ? new LowerCaseFilter(Lucene.ANALYZER_VERSION, tokenizer) : tokenizer; return new TokenStreamComponents(tokenizer, stream); } }; try { SynonymMap.Builder parser = null; if ("wordnet".equalsIgnoreCase(settings.get("format"))) { parser = new WordnetSynonymParser(true, expand, analyzer); ((WordnetSynonymParser) parser).parse(rulesReader); } else { parser = new SolrSynonymParser(true, expand, analyzer); ((SolrSynonymParser) parser).parse(rulesReader); } synonymMap = parser.build(); } catch (Exception e) { throw new ElasticsearchIllegalArgumentException("failed to build synonyms", e); } }
From source file:org.elasticsearch.plugin.analysis.SynonymWithPayloadsTokenFilterFactory.java
License:Apache License
@Inject public SynonymWithPayloadsTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, IndicesAnalysisService indicesAnalysisService, Map<String, TokenizerFactoryFactory> tokenizerFactories, @Assisted String name, @Assisted Settings settings) {// w w w . ja va2s. co m super(index, indexSettings, name, settings); Reader rulesReader = null; if (settings.getAsArray("synonyms", null) != null) { List<String> rules = Analysis.getWordList(env, settings, "synonyms"); StringBuilder sb = new StringBuilder(); for (String line : rules) { sb.append(line).append(System.getProperty("line.separator")); } rulesReader = new FastStringReader(sb.toString()); } else if (settings.get("synonyms_path") != null) { rulesReader = Analysis.getReaderFromFile(env, settings, "synonyms_path"); } else { throw new ElasticsearchIllegalArgumentException( "synonym requires either `synonyms` or `synonyms_path` to be configured"); } this.ignoreCase = settings.getAsBoolean("ignore_case", false); boolean expand = settings.getAsBoolean("expand", true); String tokenizerName = settings.get("tokenizer", "whitespace"); TokenizerFactoryFactory tokenizerFactoryFactory = tokenizerFactories.get(tokenizerName); if (tokenizerFactoryFactory == null) { tokenizerFactoryFactory = indicesAnalysisService.tokenizerFactoryFactory(tokenizerName); } if (tokenizerFactoryFactory == null) { throw new ElasticsearchIllegalArgumentException( "failed to find tokenizer [" + tokenizerName + "] for synonym token filter"); } final TokenizerFactory tokenizerFactory = tokenizerFactoryFactory.create(tokenizerName, indexSettings); Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = tokenizerFactory == null ? new WhitespaceTokenizer(Lucene.ANALYZER_VERSION, reader) : tokenizerFactory.create(reader); TokenStream stream = ignoreCase ? new LowerCaseFilter(Lucene.ANALYZER_VERSION, tokenizer) : tokenizer; return new TokenStreamComponents(tokenizer, stream); } }; try { SynonymMap.Builder parser = null; if ("wordnet".equalsIgnoreCase(settings.get("format"))) { parser = new WordnetSynonymParser(true, expand, analyzer); ((WordnetSynonymParser) parser).parse(rulesReader); } else { parser = new SolrSynonymParser(true, expand, analyzer); ((SolrSynonymParser) parser).parse(rulesReader); } synonymMap = parser.build(); } catch (Exception e) { throw new ElasticsearchIllegalArgumentException("failed to build synonyms", e); } }
From source file:pl.litwiniuk.rowicki.modsynonyms.ModificatedFSTSynonymFilterFactory.java
License:Apache License
/** * Load synonyms from the wordnet format, "format=wordnet". *///from ww w . j a v a 2 s .c o m private SynonymMap loadWordnetSynonyms(ResourceLoader loader, boolean dedup, Analyzer analyzer) throws IOException, ParseException { CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder().onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); WordnetSynonymParser parser = new WordnetSynonymParser(dedup, expand, analyzer); File synonymFile = new File(synonyms); if (synonymFile.exists()) { decoder.reset(); parser.add(new InputStreamReader(loader.openResource(synonyms), decoder)); } else { List<String> files = splitFileNames(synonyms); for (String file : files) { decoder.reset(); parser.add(new InputStreamReader(loader.openResource(file), decoder)); } } return parser.build(); }