List of usage examples for org.apache.lucene.analysis.ja JapaneseAnalyzer JapaneseAnalyzer
public JapaneseAnalyzer()
From source file:com.github.mosuka.apache.lucene.example.cmd.SearchCommand.java
License:Apache License
@Override public void execute(Map<String, Object> attrs) { Map<String, Object> responseMap = new LinkedHashMap<String, Object>(); String responseJSON = null;//from w w w. ja va 2 s . c o m Directory indexDir = null; IndexReader reader = null; try { String index = (String) attrs.get("index"); String queryStr = (String) attrs.get("query"); indexDir = FSDirectory.open(new File(index).toPath()); QueryParser queryParser = new QueryParser("text", new JapaneseAnalyzer()); Query query = queryParser.parse(queryStr); reader = DirectoryReader.open(indexDir); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(query, 10); List<Map<String, Object>> documentList = new LinkedList<Map<String, Object>>(); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { Document document = searcher.doc(scoreDoc.doc); Map<String, Object> documentMap = new LinkedHashMap<String, Object>(); for (IndexableField f : document.getFields()) { documentMap.put(f.name(), f.stringValue()); } documentMap.put("score", scoreDoc.score); documentList.add(documentMap); } responseMap.put("status", 0); responseMap.put("message", "OK"); responseMap.put("totalHits", topDocs.totalHits); responseMap.put("maxScore", topDocs.getMaxScore()); responseMap.put("result", documentList); } catch (IOException e) { responseMap.put("status", 1); responseMap.put("message", e.getMessage()); } catch (ParseException e) { responseMap.put("status", 1); responseMap.put("message", e.getMessage()); } finally { try { if (reader != null) { reader.close(); } } catch (IOException e) { responseMap.put("status", 1); responseMap.put("message", e.getMessage()); } try { if (indexDir != null) { indexDir.close(); } } catch (IOException e) { responseMap.put("status", 1); responseMap.put("message", e.getMessage()); } } try { ObjectMapper mapper = new ObjectMapper(); responseJSON = mapper.writeValueAsString(responseMap); } catch (IOException e) { responseJSON = String.format("{\"status\":1, \"message\":\"%s\"}", e.getMessage()); } System.out.println(responseJSON); }
From source file:com.github.mosuka.apache.lucene.example.utils.LuceneExampleUtil.java
License:Apache License
public static Map<String, Analyzer> getAnalyzerMap() { Analyzer keywordAnalyzer = new KeywordAnalyzer(); Analyzer japaneseAnalyzer = new JapaneseAnalyzer(); Map<String, Analyzer> analyzerMap = new HashMap<>(); analyzerMap.put("id", keywordAnalyzer); analyzerMap.put("text", japaneseAnalyzer); return analyzerMap; }
From source file:de.citec.sc.sentence.preprocessing.lucene.CreateIndex.java
public static void main(String[] args) throws IOException { Analyzer analyzer = null;// w w w . ja v a2 s. c o m List<String> files = new ArrayList<>(); files.add("/Users/swalter/Downloads/german_sentences_reduced.txt"); String indexPath = "/Users/swalter/Index/GermanIndexReduced/"; Language language = Language.DE; Directory dir = FSDirectory.open(Paths.get(indexPath)); //files.add("/home/bettina/CITEC/MATOLL/preprocessSentences/idealSentences/idealSents_mecab_jdepp_rmvPunct_CoNLLU"); //String indexPath = "/home/bettina/CITEC/MATOLL/preprocessSentences/idealSentences/index"; //Language language = Language.JA; //Directory dir = FSDirectory.open(Paths.get(indexPath)); if (language.equals(Language.DE)) analyzer = new GermanAnalyzer(); if (language.equals(Language.ES)) analyzer = new SpanishAnalyzer(); if (language.equals(Language.EN)) analyzer = new EnglishAnalyzer(); if (language.equals(Language.JA)) analyzer = new JapaneseAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE); iwc.setRAMBufferSizeMB(12000); try (IndexWriter writer = new IndexWriter(dir, iwc)) { files.forEach(f -> { try { indexDocs(writer, Paths.get(f), language); } catch (IOException ex) { Logger.getLogger(CreateIndex.class.getName()).log(Level.SEVERE, null, ex); } }); } }
From source file:org.elasticsearch.indices.analysis.KuromojiIndicesAnalysis.java
License:Apache License
@Inject public KuromojiIndicesAnalysis(Settings settings, IndicesAnalysisService indicesAnalysisService) { super(settings); indicesAnalysisService.analyzerProviderFactories().put("kuromoji", new PreBuiltAnalyzerProviderFactory("kuromoji", AnalyzerScope.INDICES, new JapaneseAnalyzer())); indicesAnalysisService.charFilterFactories().put("kuromoji_iteration_mark", new PreBuiltCharFilterFactoryFactory(new CharFilterFactory() { @Override/* w ww. j a v a 2s . c o m*/ public String name() { return "kuromoji_iteration_mark"; } @Override public Reader create(Reader reader) { return new JapaneseIterationMarkCharFilter(reader, JapaneseIterationMarkCharFilter.NORMALIZE_KANJI_DEFAULT, JapaneseIterationMarkCharFilter.NORMALIZE_KANA_DEFAULT); } })); indicesAnalysisService.tokenizerFactories().put("kuromoji_tokenizer", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() { @Override public String name() { return "kuromoji_tokenizer"; } @Override public Tokenizer create() { return new JapaneseTokenizer(null, true, Mode.SEARCH); } })); indicesAnalysisService.tokenFilterFactories().put("kuromoji_baseform", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { @Override public String name() { return "kuromoji_baseform"; } @Override public TokenStream create(TokenStream tokenStream) { return new JapaneseBaseFormFilter(tokenStream); } })); indicesAnalysisService.tokenFilterFactories().put("kuromoji_part_of_speech", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { @Override public String name() { return "kuromoji_part_of_speech"; } @Override public TokenStream create(TokenStream tokenStream) { return new JapanesePartOfSpeechStopFilter(tokenStream, JapaneseAnalyzer.getDefaultStopTags()); } })); indicesAnalysisService.tokenFilterFactories().put("kuromoji_readingform", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { @Override public String name() { return "kuromoji_readingform"; } @Override public TokenStream create(TokenStream tokenStream) { return new JapaneseReadingFormFilter(tokenStream, true); } })); indicesAnalysisService.tokenFilterFactories().put("kuromoji_stemmer", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { @Override public String name() { return "kuromoji_stemmer"; } @Override public TokenStream create(TokenStream tokenStream) { return new JapaneseKatakanaStemFilter(tokenStream); } })); }