List of usage examples for org.apache.lucene.analysis.cjk CJKAnalyzer CJKAnalyzer
public CJKAnalyzer()
From source file:lucee.runtime.search.lucene2.SearchUtil.java
License:Open Source License
public static Analyzer getAnalyzer(String language) throws SearchException { if (language == null) language = "english"; else//from ww w. ja v a 2 s.c o m language = language.toLowerCase().trim(); language = lucee.runtime.search.SearchUtil.translateLanguage(language); Analyzer analyzer = analyzers.get(language); if (analyzer != null) return analyzer; if (language.equals("english")) analyzer = new StandardAnalyzer(); else if (language.equals("german")) analyzer = new GermanAnalyzer(); else if (language.equals("russian")) analyzer = new RussianAnalyzer(); else if (language.equals("dutch")) analyzer = new DutchAnalyzer(); else if (language.equals("french")) analyzer = new FrenchAnalyzer(); else if (language.equals("norwegian")) analyzer = new NorwegianAnalyzer(); else if (language.equals("portuguese")) analyzer = new PortugueseAnalyzer(); else if (language.equals("spanish")) analyzer = new SpanishAnalyzer(); else if (language.equals("brazilian")) analyzer = new BrazilianAnalyzer(); else if (language.equals("chinese")) analyzer = new ChineseAnalyzer(); else if (language.startsWith("czech")) analyzer = new CzechAnalyzer(); else if (language.equals("greek")) analyzer = new GreekAnalyzer(); else if (language.equals("thai")) analyzer = new ThaiAnalyzer(); else if (language.equals("japanese")) analyzer = new CJKAnalyzer(); else if (language.equals("korean")) analyzer = new CJKAnalyzer(); else if (language.equals("italian")) analyzer = new ItalianAnalyzer(); else if (language.equals("danish")) analyzer = new DanishAnalyzer(); else if (language.equals("norwegian")) analyzer = new NorwegianAnalyzer(); else if (language.equals("finnish")) analyzer = new SnowballAnalyzer("Finnish"); else if (language.equals("swedish")) analyzer = new SnowballAnalyzer("Swedish"); else { String clazzName = "org.apache.lucene.analysis.el." + StringUtil.ucFirst(language.trim().toLowerCase()) + "Analyzer;"; Object o = ClassUtil.loadInstance(clazzName, (Object) null); if (o == null) { clazzName = "lucee.runtime.search.lucene2.analyzer." + StringUtil.ucFirst(language.trim().toLowerCase()) + "Analyzer"; o = ClassUtil.loadInstance(clazzName, (Object) null);//Class.orName(clazzName).newInstance(); } if (o instanceof Analyzer) analyzer = (Analyzer) o; else if (o == null) throw new SearchException("can't create Language Analyzer for Lanuage " + language + ", make Analyzer [" + clazzName + "] available"); else throw new SearchException("can't create Language Analyzer for Lanuage " + language + ", Analyzer [" + clazzName + "] is of invalid type"); } analyzers.put(language, analyzer); return analyzer; }
From source file:org.eclipse.epf.search.IndexBuilder.java
License:Open Source License
public boolean createIndex(boolean jarIt) throws SearchServiceException { synchronized (IndexBuilder.class) { if (indexFolder == null || pDirectory == null) { throw new IllegalStateException("Invalid indexFolder or pDirectory"); //$NON-NLS-1$ }/*from ww w . j ava 2 s .co m*/ boolean jako = false; Locale locale = Locale.getDefault(); String lang = locale.getLanguage(); if (lang.equals(Locale.JAPANESE.getLanguage()) || lang.equals(Locale.KOREA.getLanguage())) { jako = true; } Analyzer analyzer = jako ? new CJKAnalyzer() : new TextAnalyzer(); try { // RAMDirectory ramDir = new RAMDirectory(); IndexWriter fsWriter = new IndexWriter(FSDirectory.getDirectory(indexFolder.toString(), true), analyzer, true); // IndexWriter ramWriter = new IndexWriter(ramDir, // new TextAnalyzer(), true); if ((fsWriter != null)) { // fsWriter.mergeFactor = 1000; // fsWriter.maxMergeDocs = 10000; fsWriter.setMaxFieldLength(1000000); indexDocs(new File(pDirectory), fsWriter); // fsWriter.addIndexes(new Directory[] { ramDir }); fsWriter.optimize(); // ramWriter.close(); fsWriter.close(); } } catch (Exception e) { e.printStackTrace(); } // create the version file. Date today = new Date(); long milliseconds = today.getTime(); if (!jarIt) { try { FileWriter fw = new FileWriter(indexFolder + File.separator + VERSION_FILE_NAME); BufferedWriter bw = new BufferedWriter(fw); bw.write(productName + VERSION_DELIMITER + milliseconds + "\n"); //$NON-NLS-1$ if (analyzer instanceof CJKAnalyzer) { bw.write("CJKAnalyzer" + "\n"); //$NON-NLS-1$ //$NON-NLS-2$ } bw.close(); fw.close(); } catch (IOException ioe) { throw new SearchServiceException(SearchResources.createSearchIndexError); } return true; } // jar up the created index. JarCreator.jarFolder(indexFolder.toString()); System.out.println("index Jarred successfully"); //$NON-NLS-1$ try { // delete the files now that they've been jarred. File indexDir = new File(indexFolder.toString()); File[] files = indexDir.listFiles(); for (int i = 0; i < files.length; i++) { File tempFile = files[i]; if (!tempFile.getName().equals(JarCreator.INDEX_JAR)) { tempFile.delete(); } } // String rupName = publishDir.substring(index); File newIndexJar = new File(indexFolder + File.separator + JarCreator.INDEX_JAR); if (newIndexJar.exists()) { String fileSize = "" + newIndexJar.length(); //$NON-NLS-1$ FileWriter fw = new FileWriter(indexFolder + File.separator + VERSION_FILE_NAME); BufferedWriter bw = new BufferedWriter(fw); bw.write(productName + VERSION_DELIMITER + milliseconds + VERSION_DELIMITER + fileSize + "\n"); //$NON-NLS-1$ if (analyzer instanceof CJKAnalyzer) { bw.write("CJKAnalyzer" + "\n"); //$NON-NLS-1$ //$NON-NLS-2$ } bw.close(); fw.close(); } else { throw new SearchServiceException(SearchResources.createSearchIndexError); } } catch (IOException ioe) { throw new SearchServiceException(SearchResources.createSearchIndexError); } return true; } }
From source file:org.riotfamily.search.analysis.DefaultAnalyzerFactory.java
License:Apache License
public Analyzer getAnalyzer(String language) { if (language != null) { String snowballName = (String) snowballNames.get(language); if (snowballName != null) { String[] stopWords = getStopWords(language); if (stopWords != null) { return new SnowballAnalyzer(snowballName, stopWords); }//from www. ja va2 s. c o m return new SnowballAnalyzer(snowballName); } if (language.equals("ja") || language.equals("ko") || language.endsWith("zh")) { return new CJKAnalyzer(); } if (language.equals("th")) { return new ThaiAnalyzer(); } if (language.equals("el")) { return new GreekAnalyzer(); } if (language.equals("cs")) { return new CzechAnalyzer(); } } return defaultAnalyzer; }