List of usage examples for org.apache.lucene.analysis.es SpanishAnalyzer SpanishAnalyzer
public SpanishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet)
From source file:indexer.files.LuceneWriter.java
public boolean openIndex() { try {// w w w . j a va2s .c om //Abrimos el directorio Directory dir = FSDirectory.open(new File(pathToIndex)); //Elegimos un Analyzer . Y especificamos la versin de Lucene que usamos SpanishAnalyzer analyzer = new SpanishAnalyzer(Version.LUCENE_43, new CharArraySet(Version.LUCENE_43, Arrays.asList( StringUtils.split(FileUtils.readFileToString(new File(this.pathToStopWords), "UTF-8"))), true)); //Creamos un IndexWriterConfig IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_43, analyzer); //Siempre vamos a sobreescribir el indice que tenemos en el directorio iwc.setOpenMode(OpenMode.CREATE); indexWriter = new IndexWriter(dir, iwc); return true; } catch (Exception e) { System.out.println("Ocurrio un problema abriendo el documento para escritura: " + e.getClass() + " :: " + e.getMessage()); return false; } }
From source file:org.elasticsearch.analysis.common.SpanishAnalyzerProvider.java
License:Apache License
SpanishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new SpanishAnalyzer(Analysis.parseStopWords(env, settings, SpanishAnalyzer.getDefaultStopSet()),
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
analyzer.setVersion(version);//from w ww. ja v a2s. co m
}
From source file:org.omegat.tokenizer.LuceneSpanishTokenizer.java
License:Open Source License
@Override protected TokenStream getTokenStream(final String strOrig, final boolean stemsAllowed, final boolean stopWordsAllowed) { if (stemsAllowed) { Set<?> stopWords = stopWordsAllowed ? SpanishAnalyzer.getDefaultStopSet() : Collections.EMPTY_SET; return new SpanishAnalyzer(getBehavior(), stopWords).tokenStream("", new StringReader(strOrig)); } else {/*from www . ja v a 2 s .co m*/ return new StandardTokenizer(getBehavior(), new StringReader(strOrig)); } }