Example usage for org.apache.lucene.analysis.ja JapaneseAnalyzer JapaneseAnalyzer

List of usage examples for org.apache.lucene.analysis.ja JapaneseAnalyzer JapaneseAnalyzer

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.ja JapaneseAnalyzer JapaneseAnalyzer.

Prototype

public JapaneseAnalyzer() 

Source Link

Usage

From source file:com.github.mosuka.apache.lucene.example.cmd.SearchCommand.java

License:Apache License

@Override
public void execute(Map<String, Object> attrs) {
    Map<String, Object> responseMap = new LinkedHashMap<String, Object>();

    String responseJSON = null;//from  w w  w.  ja  va  2  s  . c  o  m
    Directory indexDir = null;
    IndexReader reader = null;

    try {
        String index = (String) attrs.get("index");
        String queryStr = (String) attrs.get("query");

        indexDir = FSDirectory.open(new File(index).toPath());

        QueryParser queryParser = new QueryParser("text", new JapaneseAnalyzer());
        Query query = queryParser.parse(queryStr);

        reader = DirectoryReader.open(indexDir);
        IndexSearcher searcher = new IndexSearcher(reader);

        TopDocs topDocs = searcher.search(query, 10);

        List<Map<String, Object>> documentList = new LinkedList<Map<String, Object>>();
        for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
            Document document = searcher.doc(scoreDoc.doc);

            Map<String, Object> documentMap = new LinkedHashMap<String, Object>();
            for (IndexableField f : document.getFields()) {
                documentMap.put(f.name(), f.stringValue());
            }
            documentMap.put("score", scoreDoc.score);
            documentList.add(documentMap);
        }

        responseMap.put("status", 0);
        responseMap.put("message", "OK");
        responseMap.put("totalHits", topDocs.totalHits);
        responseMap.put("maxScore", topDocs.getMaxScore());
        responseMap.put("result", documentList);
    } catch (IOException e) {
        responseMap.put("status", 1);
        responseMap.put("message", e.getMessage());
    } catch (ParseException e) {
        responseMap.put("status", 1);
        responseMap.put("message", e.getMessage());
    } finally {
        try {
            if (reader != null) {
                reader.close();
            }
        } catch (IOException e) {
            responseMap.put("status", 1);
            responseMap.put("message", e.getMessage());
        }
        try {
            if (indexDir != null) {
                indexDir.close();
            }
        } catch (IOException e) {
            responseMap.put("status", 1);
            responseMap.put("message", e.getMessage());
        }
    }

    try {
        ObjectMapper mapper = new ObjectMapper();
        responseJSON = mapper.writeValueAsString(responseMap);
    } catch (IOException e) {
        responseJSON = String.format("{\"status\":1, \"message\":\"%s\"}", e.getMessage());
    }
    System.out.println(responseJSON);
}

From source file:com.github.mosuka.apache.lucene.example.utils.LuceneExampleUtil.java

License:Apache License

public static Map<String, Analyzer> getAnalyzerMap() {
    Analyzer keywordAnalyzer = new KeywordAnalyzer();
    Analyzer japaneseAnalyzer = new JapaneseAnalyzer();

    Map<String, Analyzer> analyzerMap = new HashMap<>();
    analyzerMap.put("id", keywordAnalyzer);
    analyzerMap.put("text", japaneseAnalyzer);

    return analyzerMap;
}

From source file:de.citec.sc.sentence.preprocessing.lucene.CreateIndex.java

public static void main(String[] args) throws IOException {
    Analyzer analyzer = null;//  w w  w . ja  v  a2 s.  c o  m

    List<String> files = new ArrayList<>();
    files.add("/Users/swalter/Downloads/german_sentences_reduced.txt");
    String indexPath = "/Users/swalter/Index/GermanIndexReduced/";
    Language language = Language.DE;
    Directory dir = FSDirectory.open(Paths.get(indexPath));

    //files.add("/home/bettina/CITEC/MATOLL/preprocessSentences/idealSentences/idealSents_mecab_jdepp_rmvPunct_CoNLLU");
    //String indexPath = "/home/bettina/CITEC/MATOLL/preprocessSentences/idealSentences/index";
    //Language language = Language.JA;
    //Directory dir = FSDirectory.open(Paths.get(indexPath));

    if (language.equals(Language.DE))
        analyzer = new GermanAnalyzer();
    if (language.equals(Language.ES))
        analyzer = new SpanishAnalyzer();
    if (language.equals(Language.EN))
        analyzer = new EnglishAnalyzer();
    if (language.equals(Language.JA))
        analyzer = new JapaneseAnalyzer();

    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(OpenMode.CREATE);
    iwc.setRAMBufferSizeMB(12000);
    try (IndexWriter writer = new IndexWriter(dir, iwc)) {
        files.forEach(f -> {
            try {
                indexDocs(writer, Paths.get(f), language);
            } catch (IOException ex) {
                Logger.getLogger(CreateIndex.class.getName()).log(Level.SEVERE, null, ex);
            }
        });

    }

}

From source file:org.elasticsearch.indices.analysis.KuromojiIndicesAnalysis.java

License:Apache License

@Inject
public KuromojiIndicesAnalysis(Settings settings, IndicesAnalysisService indicesAnalysisService) {
    super(settings);

    indicesAnalysisService.analyzerProviderFactories().put("kuromoji",
            new PreBuiltAnalyzerProviderFactory("kuromoji", AnalyzerScope.INDICES, new JapaneseAnalyzer()));

    indicesAnalysisService.charFilterFactories().put("kuromoji_iteration_mark",
            new PreBuiltCharFilterFactoryFactory(new CharFilterFactory() {
                @Override/* w ww. j  a v a 2s . c  o m*/
                public String name() {
                    return "kuromoji_iteration_mark";
                }

                @Override
                public Reader create(Reader reader) {
                    return new JapaneseIterationMarkCharFilter(reader,
                            JapaneseIterationMarkCharFilter.NORMALIZE_KANJI_DEFAULT,
                            JapaneseIterationMarkCharFilter.NORMALIZE_KANA_DEFAULT);
                }
            }));

    indicesAnalysisService.tokenizerFactories().put("kuromoji_tokenizer",
            new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
                @Override
                public String name() {
                    return "kuromoji_tokenizer";
                }

                @Override
                public Tokenizer create() {
                    return new JapaneseTokenizer(null, true, Mode.SEARCH);
                }
            }));

    indicesAnalysisService.tokenFilterFactories().put("kuromoji_baseform",
            new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
                @Override
                public String name() {
                    return "kuromoji_baseform";
                }

                @Override
                public TokenStream create(TokenStream tokenStream) {
                    return new JapaneseBaseFormFilter(tokenStream);
                }
            }));

    indicesAnalysisService.tokenFilterFactories().put("kuromoji_part_of_speech",
            new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
                @Override
                public String name() {
                    return "kuromoji_part_of_speech";
                }

                @Override
                public TokenStream create(TokenStream tokenStream) {
                    return new JapanesePartOfSpeechStopFilter(tokenStream,
                            JapaneseAnalyzer.getDefaultStopTags());
                }
            }));

    indicesAnalysisService.tokenFilterFactories().put("kuromoji_readingform",
            new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
                @Override
                public String name() {
                    return "kuromoji_readingform";
                }

                @Override
                public TokenStream create(TokenStream tokenStream) {
                    return new JapaneseReadingFormFilter(tokenStream, true);
                }
            }));

    indicesAnalysisService.tokenFilterFactories().put("kuromoji_stemmer",
            new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
                @Override
                public String name() {
                    return "kuromoji_stemmer";
                }

                @Override
                public TokenStream create(TokenStream tokenStream) {
                    return new JapaneseKatakanaStemFilter(tokenStream);
                }
            }));
}