List of usage examples for org.apache.lucene.analysis.ca CatalanAnalyzer CatalanAnalyzer
public CatalanAnalyzer(CharArraySet stopwords)
From source file:fr.lipn.yasemir.Yasemir.java
License:Open Source License
/** * Initialisation method to be called before every action * @param configFile//from w ww . j a v a 2s . com */ public static void init(String configFile) { System.err.println("Reading config file..."); ConfigurationHandler.init(configFile); //setting paths YASEMIR_HOME = ConfigurationHandler.YASEMIR_HOME; INDEX_DIR = YASEMIR_HOME + System.getProperty("file.separator") + ConfigurationHandler.INDEXDIR; TERM_DIR = YASEMIR_HOME + System.getProperty("file.separator") + ConfigurationHandler.TERMIDXDIR; //TERM_DIR=INDEX_DIR+System.getProperty("file.separator")+ConfigurationHandler.TERMIDXDIR; COLLECTION_DIR = ConfigurationHandler.CORPUSDIR; idField = ConfigurationHandler.DOCIDFIELD; ID_ASATTR = ConfigurationHandler.IDFIELD_ASATTR; DOC_DELIM = ConfigurationHandler.DOC_DELIM; COLLECTION_LANG = ConfigurationHandler.CORPUSLANG; if (COLLECTION_LANG.equals("fr")) analyzer = new FrenchAnalyzer(Version.LUCENE_44); else if (COLLECTION_LANG.equals("it")) analyzer = new ItalianAnalyzer(Version.LUCENE_44); else if (COLLECTION_LANG.equals("es")) analyzer = new SpanishAnalyzer(Version.LUCENE_44); else if (COLLECTION_LANG.equals("de")) analyzer = new GermanAnalyzer(Version.LUCENE_44); else if (COLLECTION_LANG.equals("pt")) analyzer = new PortugueseAnalyzer(Version.LUCENE_44); else if (COLLECTION_LANG.equals("ca")) analyzer = new CatalanAnalyzer(Version.LUCENE_44); else if (COLLECTION_LANG.equals("nl")) analyzer = new DutchAnalyzer(Version.LUCENE_44); else if (COLLECTION_LANG.equals("ar")) analyzer = new ArabicAnalyzer(Version.LUCENE_44); else analyzer = new EnglishAnalyzer(Version.LUCENE_44); //setting search mode String sm = ConfigurationHandler.SEARCH_MODE; if (sm != null) { if (sm.equalsIgnoreCase("semantic")) MODE = SEMANTIC; else if (sm.equalsIgnoreCase("hybrid")) MODE = HYBRID; else MODE = CLASSIC; } //setting concept similarity measure String smm = ConfigurationHandler.SIM_MEASURE; if (smm != null) { if (smm.equalsIgnoreCase("pg1")) SIM_MEASURE = ConceptSimilarity.PROXYGENEA1; else if (smm.equalsIgnoreCase("pg2")) SIM_MEASURE = ConceptSimilarity.PROXYGENEA2; else if (smm.equalsIgnoreCase("pg3")) SIM_MEASURE = ConceptSimilarity.PROXYGENEA3; else SIM_MEASURE = ConceptSimilarity.WU; } //setting concept weights String cw = ConfigurationHandler.CONCEPTWEIGHT; if (cw != null) { if (cw.equalsIgnoreCase("fixed")) CONCEPT_WEIGHTS = ClassWeightHandler.FIXED; else if (cw.equalsIgnoreCase("idf")) CONCEPT_WEIGHTS = ClassWeightHandler.IDF; else if (cw.equalsIgnoreCase("prob")) CONCEPT_WEIGHTS = ClassWeightHandler.PROB; else if (cw.equalsIgnoreCase("gauss")) CONCEPT_WEIGHTS = ClassWeightHandler.GAUSSPROB; } //setting annotator ANNOTATOR = ConfigurationHandler.ANNOTENGINE; annotator = new SentenceBasedAnnotator(TERM_DIR); //annotator=new KNNAnnotator(TERM_DIR); //TODO: not finished (select annotator depending on configuration file) try { Class<?> cls = Class.forName(ANNOTATOR); Constructor<?> constructor = cls.getConstructor(String.class); annotator = (SemanticAnnotator) constructor.newInstance(TERM_DIR); //Object instance = constructor.newInstance("stringparam"); } catch (Exception e) { e.printStackTrace(); System.err.println( "[YaSemIR]: failed to load the specified annotator, falling back to IndexBasedAnnotator"); annotator = annotator = new SentenceBasedAnnotator(TERM_DIR); } //setting ngrams enabled or not CKPD_ENABLED = ConfigurationHandler.NGRAMS_ENABLED; //setting semantic fields semBalises = new HashSet<String>(); semBalises.addAll(ConfigurationHandler.getSemanticFields()); //setting classic fields clsBalises = new HashSet<String>(); clsBalises.addAll(ConfigurationHandler.getClassicFields()); //setting score type SCORE = ConfigurationHandler.SCORE; //setting ontologies and terminologies System.err.println("[YaSemIR]: Loading Knowledge Battery..."); HashMap<String, String> ontoSKOSconf = ConfigurationHandler.getOntologySKOSMap(); HashMap<String, String> ontoRootconf = ConfigurationHandler.getOntologyRootMap(); for (String ontoLoc : ontoSKOSconf.keySet()) { String ontoRoot = ontoRootconf.get(ontoLoc); Ontology o = null; if (ontoRoot.trim().isEmpty()) o = new Ontology(ontoLoc); else o = new Ontology(ontoLoc, ontoRoot); System.err.println("[YaSemIR]: loaded ontology: " + o.getBaseAddr() + " at " + ontoLoc); String termPath = ontoSKOSconf.get(ontoLoc); SKOSTerminology t = null; if (!termPath.trim().isEmpty()) { System.err.println("[YaSemIR]: loading terminology from " + termPath); t = new SKOSTerminology(o.getOntologyID(), termPath); } else { System.err.println("[YaSemIR]: no terminology provided: generating trivial terminology from " + o.getBaseAddr() + "..."); t = o.generateTerminology(); } System.err.println("[YaSemIR]: loaded terminology: " + t.getTerminologyID()); KnowledgeBattery.addOntology(o, t); } if (INDEXING_MODE) KnowledgeBattery.createTermIndex(); System.err.println("[YaSemIR]: Done."); }
From source file:perLucene.Server.java
License:Open Source License
private static void initAnalyzers() { ha = new HashMap<String, Analyzer>(); ha.put("ar", new ArabicAnalyzer(Version.LUCENE_41)); ha.put("el", new GreekAnalyzer(Version.LUCENE_41)); ha.put("bg", new BulgarianAnalyzer(Version.LUCENE_41)); ha.put("br", new BrazilianAnalyzer(Version.LUCENE_41)); ha.put("ca", new CatalanAnalyzer(Version.LUCENE_41)); ha.put("cz", new CzechAnalyzer(Version.LUCENE_41)); ha.put("da", new DanishAnalyzer(Version.LUCENE_41)); ha.put("de", new GermanAnalyzer(Version.LUCENE_41)); ha.put("en", new EnglishAnalyzer(Version.LUCENE_41)); ha.put("es", new SpanishAnalyzer(Version.LUCENE_41)); ha.put("eu", new BasqueAnalyzer(Version.LUCENE_41)); ha.put("fa", new PersianAnalyzer(Version.LUCENE_41)); ha.put("fi", new FinnishAnalyzer(Version.LUCENE_41)); ha.put("fr", new FrenchAnalyzer(Version.LUCENE_41)); ha.put("ga", new IrishAnalyzer(Version.LUCENE_41)); ha.put("gl", new GalicianAnalyzer(Version.LUCENE_41)); ha.put("hi", new HindiAnalyzer(Version.LUCENE_41)); ha.put("hu", new HungarianAnalyzer(Version.LUCENE_41)); ha.put("hy", new ArmenianAnalyzer(Version.LUCENE_41)); ha.put("id", new IndonesianAnalyzer(Version.LUCENE_41)); ha.put("it", new ItalianAnalyzer(Version.LUCENE_41)); ha.put("lv", new LatvianAnalyzer(Version.LUCENE_41)); ha.put("nl", new DutchAnalyzer(Version.LUCENE_41)); ha.put("no", new NorwegianAnalyzer(Version.LUCENE_41)); ha.put("pt", new PortugueseAnalyzer(Version.LUCENE_41)); ha.put("ro", new RomanianAnalyzer(Version.LUCENE_41)); ha.put("ru", new RussianAnalyzer(Version.LUCENE_41)); ha.put("sv", new SwedishAnalyzer(Version.LUCENE_41)); ha.put("th", new ThaiAnalyzer(Version.LUCENE_41)); ha.put("tr", new TurkishAnalyzer(Version.LUCENE_41)); ha.put("cn", new SmartChineseAnalyzer(Version.LUCENE_41)); }