Example usage for org.apache.lucene.analysis.cjk CJKAnalyzer CJKAnalyzer

List of usage examples for org.apache.lucene.analysis.cjk CJKAnalyzer CJKAnalyzer

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.cjk CJKAnalyzer CJKAnalyzer.

Prototype

public CJKAnalyzer() 

Source Link

Document

Builds an analyzer which removes words in #getDefaultStopSet() .

Usage

From source file:lucee.runtime.search.lucene2.SearchUtil.java

License:Open Source License

public static Analyzer getAnalyzer(String language) throws SearchException {
    if (language == null)
        language = "english";
    else//from ww w.  ja v a  2  s.c  o  m
        language = language.toLowerCase().trim();
    language = lucee.runtime.search.SearchUtil.translateLanguage(language);

    Analyzer analyzer = analyzers.get(language);
    if (analyzer != null)
        return analyzer;

    if (language.equals("english"))
        analyzer = new StandardAnalyzer();
    else if (language.equals("german"))
        analyzer = new GermanAnalyzer();
    else if (language.equals("russian"))
        analyzer = new RussianAnalyzer();
    else if (language.equals("dutch"))
        analyzer = new DutchAnalyzer();
    else if (language.equals("french"))
        analyzer = new FrenchAnalyzer();
    else if (language.equals("norwegian"))
        analyzer = new NorwegianAnalyzer();
    else if (language.equals("portuguese"))
        analyzer = new PortugueseAnalyzer();
    else if (language.equals("spanish"))
        analyzer = new SpanishAnalyzer();
    else if (language.equals("brazilian"))
        analyzer = new BrazilianAnalyzer();
    else if (language.equals("chinese"))
        analyzer = new ChineseAnalyzer();
    else if (language.startsWith("czech"))
        analyzer = new CzechAnalyzer();
    else if (language.equals("greek"))
        analyzer = new GreekAnalyzer();
    else if (language.equals("thai"))
        analyzer = new ThaiAnalyzer();
    else if (language.equals("japanese"))
        analyzer = new CJKAnalyzer();
    else if (language.equals("korean"))
        analyzer = new CJKAnalyzer();

    else if (language.equals("italian"))
        analyzer = new ItalianAnalyzer();
    else if (language.equals("danish"))
        analyzer = new DanishAnalyzer();
    else if (language.equals("norwegian"))
        analyzer = new NorwegianAnalyzer();
    else if (language.equals("finnish"))
        analyzer = new SnowballAnalyzer("Finnish");
    else if (language.equals("swedish"))
        analyzer = new SnowballAnalyzer("Swedish");

    else {
        String clazzName = "org.apache.lucene.analysis.el." + StringUtil.ucFirst(language.trim().toLowerCase())
                + "Analyzer;";
        Object o = ClassUtil.loadInstance(clazzName, (Object) null);
        if (o == null) {
            clazzName = "lucee.runtime.search.lucene2.analyzer."
                    + StringUtil.ucFirst(language.trim().toLowerCase()) + "Analyzer";
            o = ClassUtil.loadInstance(clazzName, (Object) null);//Class.orName(clazzName).newInstance();
        }
        if (o instanceof Analyzer)
            analyzer = (Analyzer) o;
        else if (o == null)
            throw new SearchException("can't create Language Analyzer for Lanuage " + language
                    + ", make Analyzer [" + clazzName + "] available");
        else
            throw new SearchException("can't create Language Analyzer for Lanuage " + language + ", Analyzer ["
                    + clazzName + "] is of invalid type");
    }
    analyzers.put(language, analyzer);
    return analyzer;
}

From source file:org.eclipse.epf.search.IndexBuilder.java

License:Open Source License

public boolean createIndex(boolean jarIt) throws SearchServiceException {
    synchronized (IndexBuilder.class) {

        if (indexFolder == null || pDirectory == null) {
            throw new IllegalStateException("Invalid indexFolder or pDirectory"); //$NON-NLS-1$
        }/*from ww  w . j ava  2 s .co  m*/

        boolean jako = false;
        Locale locale = Locale.getDefault();
        String lang = locale.getLanguage();
        if (lang.equals(Locale.JAPANESE.getLanguage()) || lang.equals(Locale.KOREA.getLanguage())) {
            jako = true;
        }
        Analyzer analyzer = jako ? new CJKAnalyzer() : new TextAnalyzer();

        try {
            // RAMDirectory ramDir = new RAMDirectory();
            IndexWriter fsWriter = new IndexWriter(FSDirectory.getDirectory(indexFolder.toString(), true),
                    analyzer, true);

            // IndexWriter ramWriter = new IndexWriter(ramDir,
            // new TextAnalyzer(), true);

            if ((fsWriter != null)) {
                // fsWriter.mergeFactor = 1000;
                // fsWriter.maxMergeDocs = 10000;
                fsWriter.setMaxFieldLength(1000000);

                indexDocs(new File(pDirectory), fsWriter);

                // fsWriter.addIndexes(new Directory[] { ramDir });
                fsWriter.optimize();
                // ramWriter.close();
                fsWriter.close();
            }
        } catch (Exception e) {
            e.printStackTrace();
        }

        // create the version file.
        Date today = new Date();
        long milliseconds = today.getTime();

        if (!jarIt) {
            try {
                FileWriter fw = new FileWriter(indexFolder + File.separator + VERSION_FILE_NAME);
                BufferedWriter bw = new BufferedWriter(fw);
                bw.write(productName + VERSION_DELIMITER + milliseconds + "\n"); //$NON-NLS-1$
                if (analyzer instanceof CJKAnalyzer) {
                    bw.write("CJKAnalyzer" + "\n"); //$NON-NLS-1$   //$NON-NLS-2$
                }
                bw.close();
                fw.close();
            } catch (IOException ioe) {
                throw new SearchServiceException(SearchResources.createSearchIndexError);
            }

            return true;
        }

        // jar up the created index.
        JarCreator.jarFolder(indexFolder.toString());

        System.out.println("index Jarred successfully"); //$NON-NLS-1$

        try {
            // delete the files now that they've been jarred.
            File indexDir = new File(indexFolder.toString());
            File[] files = indexDir.listFiles();
            for (int i = 0; i < files.length; i++) {
                File tempFile = files[i];
                if (!tempFile.getName().equals(JarCreator.INDEX_JAR)) {
                    tempFile.delete();
                }
            }

            // String rupName = publishDir.substring(index);
            File newIndexJar = new File(indexFolder + File.separator + JarCreator.INDEX_JAR);
            if (newIndexJar.exists()) {
                String fileSize = "" + newIndexJar.length(); //$NON-NLS-1$
                FileWriter fw = new FileWriter(indexFolder + File.separator + VERSION_FILE_NAME);
                BufferedWriter bw = new BufferedWriter(fw);
                bw.write(productName + VERSION_DELIMITER + milliseconds + VERSION_DELIMITER + fileSize + "\n"); //$NON-NLS-1$
                if (analyzer instanceof CJKAnalyzer) {
                    bw.write("CJKAnalyzer" + "\n"); //$NON-NLS-1$   //$NON-NLS-2$
                }
                bw.close();
                fw.close();
            } else {
                throw new SearchServiceException(SearchResources.createSearchIndexError);
            }
        } catch (IOException ioe) {
            throw new SearchServiceException(SearchResources.createSearchIndexError);
        }

        return true;
    }
}

From source file:org.riotfamily.search.analysis.DefaultAnalyzerFactory.java

License:Apache License

public Analyzer getAnalyzer(String language) {
    if (language != null) {
        String snowballName = (String) snowballNames.get(language);
        if (snowballName != null) {
            String[] stopWords = getStopWords(language);
            if (stopWords != null) {
                return new SnowballAnalyzer(snowballName, stopWords);
            }//from  www. ja va2  s. c  o  m
            return new SnowballAnalyzer(snowballName);
        }
        if (language.equals("ja") || language.equals("ko") || language.endsWith("zh")) {
            return new CJKAnalyzer();
        }
        if (language.equals("th")) {
            return new ThaiAnalyzer();
        }
        if (language.equals("el")) {
            return new GreekAnalyzer();
        }
        if (language.equals("cs")) {
            return new CzechAnalyzer();
        }
    }
    return defaultAnalyzer;
}