List of usage examples for org.apache.lucene.analysis.es SpanishAnalyzer SpanishAnalyzer
public SpanishAnalyzer()
From source file:de.citec.lucene.CreateIndex.java
public static void main(String[] args) throws IOException { Analyzer analyzer = null;/*from www .j a va 2 s. c o m*/ List<String> files = new ArrayList<>(); files.add("/Users/swalter/Documents/EsaDeutsch/new_copus_german.txt"); String indexPath = "/Users/swalter/Documents/EsaDeutsch/Index/"; Language language = Language.DE; Directory dir = FSDirectory.open(Paths.get(indexPath)); if (language.equals(Language.DE)) analyzer = new GermanAnalyzer(); if (language.equals(Language.ES)) analyzer = new SpanishAnalyzer(); if (language.equals(Language.EN)) analyzer = new EnglishAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE); iwc.setRAMBufferSizeMB(12000); try (IndexWriter writer = new IndexWriter(dir, iwc)) { files.forEach(f -> { try { indexDocs(writer, Paths.get(f)); } catch (IOException ex) { Logger.getLogger(CreateIndex.class.getName()).log(Level.SEVERE, null, ex); } }); } System.out.println(counter); }
From source file:de.citec.sc.sentence.preprocessing.lucene.CreateIndex.java
public static void main(String[] args) throws IOException { Analyzer analyzer = null;//ww w .j av a 2 s .c o m List<String> files = new ArrayList<>(); files.add("/Users/swalter/Downloads/german_sentences_reduced.txt"); String indexPath = "/Users/swalter/Index/GermanIndexReduced/"; Language language = Language.DE; Directory dir = FSDirectory.open(Paths.get(indexPath)); //files.add("/home/bettina/CITEC/MATOLL/preprocessSentences/idealSentences/idealSents_mecab_jdepp_rmvPunct_CoNLLU"); //String indexPath = "/home/bettina/CITEC/MATOLL/preprocessSentences/idealSentences/index"; //Language language = Language.JA; //Directory dir = FSDirectory.open(Paths.get(indexPath)); if (language.equals(Language.DE)) analyzer = new GermanAnalyzer(); if (language.equals(Language.ES)) analyzer = new SpanishAnalyzer(); if (language.equals(Language.EN)) analyzer = new EnglishAnalyzer(); if (language.equals(Language.JA)) analyzer = new JapaneseAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE); iwc.setRAMBufferSizeMB(12000); try (IndexWriter writer = new IndexWriter(dir, iwc)) { files.forEach(f -> { try { indexDocs(writer, Paths.get(f), language); } catch (IOException ex) { Logger.getLogger(CreateIndex.class.getName()).log(Level.SEVERE, null, ex); } }); } }
From source file:de.mirkosertic.desktopsearch.AnalyzerCache.java
License:Open Source License
public AnalyzerCache(Configuration aConfiguration) { standardAnalyzer = configure(new StandardAnalyzer()); analyzerByLanguage = new HashMap<>(); registerIfEnabled(SupportedLanguage.ar, aConfiguration, configure(new ArabicAnalyzer())); registerIfEnabled(SupportedLanguage.bg, aConfiguration, configure(new BulgarianAnalyzer())); registerIfEnabled(SupportedLanguage.br, aConfiguration, configure(new BrazilianAnalyzer())); registerIfEnabled(SupportedLanguage.ca, aConfiguration, configure(new CatalanAnalyzer())); registerIfEnabled(SupportedLanguage.ckb, aConfiguration, configure(new SoraniAnalyzer())); registerIfEnabled(SupportedLanguage.cz, aConfiguration, configure(new CzechAnalyzer())); registerIfEnabled(SupportedLanguage.da, aConfiguration, configure(new DanishAnalyzer())); registerIfEnabled(SupportedLanguage.de, aConfiguration, configure(new GermanAnalyzer())); registerIfEnabled(SupportedLanguage.el, aConfiguration, configure(new GreekAnalyzer())); registerIfEnabled(SupportedLanguage.en, aConfiguration, configure(new EnglishAnalyzer())); registerIfEnabled(SupportedLanguage.es, aConfiguration, configure(new SpanishAnalyzer())); registerIfEnabled(SupportedLanguage.eu, aConfiguration, configure(new BasqueAnalyzer())); registerIfEnabled(SupportedLanguage.fa, aConfiguration, configure(new PersianAnalyzer())); registerIfEnabled(SupportedLanguage.fi, aConfiguration, configure(new FinnishAnalyzer())); registerIfEnabled(SupportedLanguage.fr, aConfiguration, configure(new FrenchAnalyzer())); registerIfEnabled(SupportedLanguage.ga, aConfiguration, configure(new IrishAnalyzer())); registerIfEnabled(SupportedLanguage.gl, aConfiguration, configure(new GalicianAnalyzer())); registerIfEnabled(SupportedLanguage.hi, aConfiguration, configure(new HindiAnalyzer())); registerIfEnabled(SupportedLanguage.hu, aConfiguration, configure(new HungarianAnalyzer())); registerIfEnabled(SupportedLanguage.hy, aConfiguration, configure(new ArmenianAnalyzer())); registerIfEnabled(SupportedLanguage.id, aConfiguration, configure(new IndonesianAnalyzer())); registerIfEnabled(SupportedLanguage.it, aConfiguration, configure(new ItalianAnalyzer())); registerIfEnabled(SupportedLanguage.lv, aConfiguration, configure(new LatvianAnalyzer())); registerIfEnabled(SupportedLanguage.nl, aConfiguration, configure(new DutchAnalyzer())); registerIfEnabled(SupportedLanguage.no, aConfiguration, configure(new NorwegianAnalyzer())); registerIfEnabled(SupportedLanguage.pt, aConfiguration, configure(new PortugueseAnalyzer())); registerIfEnabled(SupportedLanguage.ro, aConfiguration, configure(new RomanianAnalyzer())); registerIfEnabled(SupportedLanguage.ru, aConfiguration, configure(new RussianAnalyzer())); registerIfEnabled(SupportedLanguage.sv, aConfiguration, configure(new SwedishAnalyzer())); registerIfEnabled(SupportedLanguage.th, aConfiguration, configure(new ThaiAnalyzer())); registerIfEnabled(SupportedLanguage.tr, aConfiguration, configure(new TurkishAnalyzer())); }
From source file:edu.pucp.igc.piscosemanticsearch.Buscador.java
public void crearBuscador() throws IOException { directorio = FSDirectory.open(fileCarpetaIndice); directorioLectura = DirectoryReader.open(directorio); indexSearcher = new IndexSearcher(directorioLectura); analizador = new SpanishAnalyzer(); }
From source file:edu.pucp.igc.piscosemanticsearch.Buscador.java
public ResultadosDeBusqueda buscar(String[] textosABuscar, String[] camposAConsultar) throws ParseException, IOException { ScoreDoc[] listaScoreDoc;/*from ww w.j a v a 2 s. c o m*/ Query query = MultiFieldQueryParser.parse(textosABuscar, camposAConsultar, new SpanishAnalyzer()); // System.out.println("Busqueda en el campo " + Utilities.collapseStrings(camposAConsultar, " ") + ": " + query); listaScoreDoc = indexSearcher.search(query, 100).scoreDocs; ResultadosDeBusqueda resultados = new ResultadosDeBusqueda(ScoreDocsToDocumentos(listaScoreDoc), textosABuscar[0], query.toString()); // visualizarDocumentos(listaScoreDoc); return resultados; }
From source file:edu.pucp.igc.piscosemanticsearch.Buscador.java
public ScoreDoc[] buscarDocumentosDePisco(String textoABuscar) { ScoreDoc[] listaScoreDoc = null;//from ww w . j ava 2 s . com try { // QueryParser parser = new QueryParser("pisco", analizador); Query query = MultiFieldQueryParser.parse(new String[] { textoABuscar }, new String[] { "pisco" }, new SpanishAnalyzer()); // Query query = parser.parse(textoABuscar); // System.out.println("Busqueda textual de Pisco: " + query); listaScoreDoc = indexSearcher.search(query, 1000).scoreDocs; visualizarDocumentosDePisco(listaScoreDoc); } catch (ParseException ex) { Logger.getLogger(Buscador.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(Buscador.class.getName()).log(Level.SEVERE, null, ex); } return listaScoreDoc; }
From source file:edu.pucp.igc.piscosemanticsearch.Indexador.java
public void crearEscritor() throws IOException { Directory directorio = FSDirectory.open(fileCarpetaIndice); SpanishAnalyzer analizador = new SpanishAnalyzer(); Version version = Version.LUCENE_4_10_1; IndexWriterConfig configurador = new IndexWriterConfig(version, analizador); escritor = new IndexWriter(directorio, configurador); }
From source file:pucp.s2.gc.lucene.Searcher.java
private void crearBuscador() throws IOException { directory = FSDirectory.open(new File(indexDir)); directoryReader = DirectoryReader.open(directory); directory.close();//from w ww. j a va 2 s .c om searcher = new IndexSearcher(directoryReader); //analyzer = new StandardAnalyzer(); analyzer = new SpanishAnalyzer(); }
From source file:sisTradicional.IndexFiles.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] \n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = "Datos/recordsdc"; //String dump = "Datos/dump"; boolean temporal = false; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1];/*from ww w . ja va2 s .co m*/ i++; } else if ("-dump".equals(args[i])) { //dump = args[i+1]; i++; } } //ParserDump.start(dump); final File docDir = new File(docsPath); if (!docDir.exists() || !docDir.canRead()) { System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open((new File(indexPath)).toPath()); Analyzer analyzer = new SpanishAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir, temporal); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:sisTradicional.SearchFiles.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { nameDictionary();/*from w w w . j a v a 2 s . c o m*/ String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] -infoNeeds [infoNeedsFile] -output [resultsFile]"; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0); } String index = "index"; String field = "contents"; String infoNeeds = "Datos/InfoNeeds/necesidadesInformacionElegidas.xml"; String output = "practica3/equipo12.txt"; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-infoNeeds".equals(args[i])) { infoNeeds = args[i + 1]; i++; } else if ("-output".equals(args[i])) { output = args[i + 1]; i++; } } String[][] queryString = parseInfoNeeds(infoNeeds); IndexReader reader = DirectoryReader.open(FSDirectory.open((new File(index)).toPath())); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new SpanishAnalyzer(); QueryParser parser = new QueryParser(field, analyzer); PrintWriter writer = new PrintWriter(output, "UTF-8"); for (int i = 0; i < queryString.length; i++) { String line = queryString[i][1]; /*if (line == null || line.length() == -1) { break; }*/ line = line.trim(); /*if (line.length() == 0) { break; }*/ BooleanQuery b = new BooleanQuery(); String normalized = parser.parse(line).toString(field); findNames(b, normalized); findYear(b, normalized); /*String regex = "\\s*\\bcuyo\\b\\s*"; String regex2 = "\\s*\\brelacion\\b\\s*"; line = line.replaceAll(regex, " "); line = line.replaceAll(regex2, " ");*/ Query query = parser.parse(line + " " + b.toString()); System.out.println("Searching for: " + query.toString(field)); doPagingSearch(searcher, query, queryString[i][0], writer); /*if (queryString != null) { break; }*/ } reader.close(); writer.close(); }