Example usage for org.apache.lucene.analysis.es SpanishAnalyzer SpanishAnalyzer

List of usage examples for org.apache.lucene.analysis.es SpanishAnalyzer SpanishAnalyzer

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.es SpanishAnalyzer SpanishAnalyzer.

Prototype

public SpanishAnalyzer() 

Source Link

Document

Builds an analyzer with the default stop words: #DEFAULT_STOPWORD_FILE .

Usage

From source file:de.citec.lucene.CreateIndex.java

public static void main(String[] args) throws IOException {
    Analyzer analyzer = null;/*from  www  .j  a  va  2  s. c  o  m*/

    List<String> files = new ArrayList<>();
    files.add("/Users/swalter/Documents/EsaDeutsch/new_copus_german.txt");
    String indexPath = "/Users/swalter/Documents/EsaDeutsch/Index/";
    Language language = Language.DE;
    Directory dir = FSDirectory.open(Paths.get(indexPath));

    if (language.equals(Language.DE))
        analyzer = new GermanAnalyzer();
    if (language.equals(Language.ES))
        analyzer = new SpanishAnalyzer();
    if (language.equals(Language.EN))
        analyzer = new EnglishAnalyzer();

    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(OpenMode.CREATE);
    iwc.setRAMBufferSizeMB(12000);
    try (IndexWriter writer = new IndexWriter(dir, iwc)) {
        files.forEach(f -> {
            try {
                indexDocs(writer, Paths.get(f));
            } catch (IOException ex) {
                Logger.getLogger(CreateIndex.class.getName()).log(Level.SEVERE, null, ex);
            }
        });

    }
    System.out.println(counter);

}

From source file:de.citec.sc.sentence.preprocessing.lucene.CreateIndex.java

public static void main(String[] args) throws IOException {
    Analyzer analyzer = null;//ww w  .j av a 2  s .c  o m

    List<String> files = new ArrayList<>();
    files.add("/Users/swalter/Downloads/german_sentences_reduced.txt");
    String indexPath = "/Users/swalter/Index/GermanIndexReduced/";
    Language language = Language.DE;
    Directory dir = FSDirectory.open(Paths.get(indexPath));

    //files.add("/home/bettina/CITEC/MATOLL/preprocessSentences/idealSentences/idealSents_mecab_jdepp_rmvPunct_CoNLLU");
    //String indexPath = "/home/bettina/CITEC/MATOLL/preprocessSentences/idealSentences/index";
    //Language language = Language.JA;
    //Directory dir = FSDirectory.open(Paths.get(indexPath));

    if (language.equals(Language.DE))
        analyzer = new GermanAnalyzer();
    if (language.equals(Language.ES))
        analyzer = new SpanishAnalyzer();
    if (language.equals(Language.EN))
        analyzer = new EnglishAnalyzer();
    if (language.equals(Language.JA))
        analyzer = new JapaneseAnalyzer();

    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(OpenMode.CREATE);
    iwc.setRAMBufferSizeMB(12000);
    try (IndexWriter writer = new IndexWriter(dir, iwc)) {
        files.forEach(f -> {
            try {
                indexDocs(writer, Paths.get(f), language);
            } catch (IOException ex) {
                Logger.getLogger(CreateIndex.class.getName()).log(Level.SEVERE, null, ex);
            }
        });

    }

}

From source file:de.mirkosertic.desktopsearch.AnalyzerCache.java

License:Open Source License

public AnalyzerCache(Configuration aConfiguration) {
    standardAnalyzer = configure(new StandardAnalyzer());
    analyzerByLanguage = new HashMap<>();

    registerIfEnabled(SupportedLanguage.ar, aConfiguration, configure(new ArabicAnalyzer()));
    registerIfEnabled(SupportedLanguage.bg, aConfiguration, configure(new BulgarianAnalyzer()));
    registerIfEnabled(SupportedLanguage.br, aConfiguration, configure(new BrazilianAnalyzer()));
    registerIfEnabled(SupportedLanguage.ca, aConfiguration, configure(new CatalanAnalyzer()));
    registerIfEnabled(SupportedLanguage.ckb, aConfiguration, configure(new SoraniAnalyzer()));
    registerIfEnabled(SupportedLanguage.cz, aConfiguration, configure(new CzechAnalyzer()));
    registerIfEnabled(SupportedLanguage.da, aConfiguration, configure(new DanishAnalyzer()));
    registerIfEnabled(SupportedLanguage.de, aConfiguration, configure(new GermanAnalyzer()));
    registerIfEnabled(SupportedLanguage.el, aConfiguration, configure(new GreekAnalyzer()));
    registerIfEnabled(SupportedLanguage.en, aConfiguration, configure(new EnglishAnalyzer()));
    registerIfEnabled(SupportedLanguage.es, aConfiguration, configure(new SpanishAnalyzer()));
    registerIfEnabled(SupportedLanguage.eu, aConfiguration, configure(new BasqueAnalyzer()));
    registerIfEnabled(SupportedLanguage.fa, aConfiguration, configure(new PersianAnalyzer()));
    registerIfEnabled(SupportedLanguage.fi, aConfiguration, configure(new FinnishAnalyzer()));
    registerIfEnabled(SupportedLanguage.fr, aConfiguration, configure(new FrenchAnalyzer()));
    registerIfEnabled(SupportedLanguage.ga, aConfiguration, configure(new IrishAnalyzer()));
    registerIfEnabled(SupportedLanguage.gl, aConfiguration, configure(new GalicianAnalyzer()));
    registerIfEnabled(SupportedLanguage.hi, aConfiguration, configure(new HindiAnalyzer()));
    registerIfEnabled(SupportedLanguage.hu, aConfiguration, configure(new HungarianAnalyzer()));
    registerIfEnabled(SupportedLanguage.hy, aConfiguration, configure(new ArmenianAnalyzer()));
    registerIfEnabled(SupportedLanguage.id, aConfiguration, configure(new IndonesianAnalyzer()));
    registerIfEnabled(SupportedLanguage.it, aConfiguration, configure(new ItalianAnalyzer()));
    registerIfEnabled(SupportedLanguage.lv, aConfiguration, configure(new LatvianAnalyzer()));
    registerIfEnabled(SupportedLanguage.nl, aConfiguration, configure(new DutchAnalyzer()));
    registerIfEnabled(SupportedLanguage.no, aConfiguration, configure(new NorwegianAnalyzer()));
    registerIfEnabled(SupportedLanguage.pt, aConfiguration, configure(new PortugueseAnalyzer()));
    registerIfEnabled(SupportedLanguage.ro, aConfiguration, configure(new RomanianAnalyzer()));
    registerIfEnabled(SupportedLanguage.ru, aConfiguration, configure(new RussianAnalyzer()));
    registerIfEnabled(SupportedLanguage.sv, aConfiguration, configure(new SwedishAnalyzer()));
    registerIfEnabled(SupportedLanguage.th, aConfiguration, configure(new ThaiAnalyzer()));
    registerIfEnabled(SupportedLanguage.tr, aConfiguration, configure(new TurkishAnalyzer()));
}

From source file:edu.pucp.igc.piscosemanticsearch.Buscador.java

public void crearBuscador() throws IOException {
    directorio = FSDirectory.open(fileCarpetaIndice);
    directorioLectura = DirectoryReader.open(directorio);
    indexSearcher = new IndexSearcher(directorioLectura);
    analizador = new SpanishAnalyzer();
}

From source file:edu.pucp.igc.piscosemanticsearch.Buscador.java

public ResultadosDeBusqueda buscar(String[] textosABuscar, String[] camposAConsultar)
        throws ParseException, IOException {

    ScoreDoc[] listaScoreDoc;/*from ww  w.j a  v  a 2 s. c  o  m*/
    Query query = MultiFieldQueryParser.parse(textosABuscar, camposAConsultar, new SpanishAnalyzer());

    //        System.out.println("Busqueda en el campo " + Utilities.collapseStrings(camposAConsultar, " ") + ": " + query);
    listaScoreDoc = indexSearcher.search(query, 100).scoreDocs;

    ResultadosDeBusqueda resultados = new ResultadosDeBusqueda(ScoreDocsToDocumentos(listaScoreDoc),
            textosABuscar[0], query.toString());
    //        visualizarDocumentos(listaScoreDoc);
    return resultados;

}

From source file:edu.pucp.igc.piscosemanticsearch.Buscador.java

public ScoreDoc[] buscarDocumentosDePisco(String textoABuscar) {
    ScoreDoc[] listaScoreDoc = null;//from  ww w .  j ava 2 s . com
    try {

        //        QueryParser parser = new QueryParser("pisco", analizador);
        Query query = MultiFieldQueryParser.parse(new String[] { textoABuscar }, new String[] { "pisco" },
                new SpanishAnalyzer());

        //        Query query = parser.parse(textoABuscar);
        //            System.out.println("Busqueda textual de Pisco: " + query);
        listaScoreDoc = indexSearcher.search(query, 1000).scoreDocs;

        visualizarDocumentosDePisco(listaScoreDoc);

    } catch (ParseException ex) {
        Logger.getLogger(Buscador.class.getName()).log(Level.SEVERE, null, ex);
    } catch (IOException ex) {
        Logger.getLogger(Buscador.class.getName()).log(Level.SEVERE, null, ex);
    }
    return listaScoreDoc;

}

From source file:edu.pucp.igc.piscosemanticsearch.Indexador.java

public void crearEscritor() throws IOException {
    Directory directorio = FSDirectory.open(fileCarpetaIndice);
    SpanishAnalyzer analizador = new SpanishAnalyzer();
    Version version = Version.LUCENE_4_10_1;
    IndexWriterConfig configurador = new IndexWriterConfig(version, analizador);
    escritor = new IndexWriter(directorio, configurador);
}

From source file:pucp.s2.gc.lucene.Searcher.java

private void crearBuscador() throws IOException {
    directory = FSDirectory.open(new File(indexDir));
    directoryReader = DirectoryReader.open(directory);
    directory.close();//from   w ww.  j a va 2 s  .c  om
    searcher = new IndexSearcher(directoryReader);
    //analyzer = new StandardAnalyzer();
    analyzer = new SpanishAnalyzer();
}

From source file:sisTradicional.IndexFiles.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] \n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    String docsPath = "Datos/recordsdc";
    //String dump = "Datos/dump";
    boolean temporal = false;

    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];/*from   ww w . ja  va2  s .co  m*/
            i++;
        } else if ("-dump".equals(args[i])) {
            //dump = args[i+1];
            i++;
        }
    }
    //ParserDump.start(dump);

    final File docDir = new File(docsPath);
    if (!docDir.exists() || !docDir.canRead()) {
        System.out.println("Document directory '" + docDir.getAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open((new File(indexPath)).toPath());
        Analyzer analyzer = new SpanishAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        iwc.setOpenMode(OpenMode.CREATE);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir, temporal);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:sisTradicional.SearchFiles.java

License:Apache License

/** Simple command-line based search demo. */
public static void main(String[] args) throws Exception {
    nameDictionary();/*from   w  w  w  .  j a v a  2 s . c o m*/
    String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] -infoNeeds [infoNeedsFile] -output [resultsFile]";
    if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
        System.out.println(usage);
        System.exit(0);
    }

    String index = "index";
    String field = "contents";
    String infoNeeds = "Datos/InfoNeeds/necesidadesInformacionElegidas.xml";
    String output = "practica3/equipo12.txt";

    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            index = args[i + 1];
            i++;
        } else if ("-infoNeeds".equals(args[i])) {
            infoNeeds = args[i + 1];
            i++;
        } else if ("-output".equals(args[i])) {
            output = args[i + 1];
            i++;
        }
    }

    String[][] queryString = parseInfoNeeds(infoNeeds);

    IndexReader reader = DirectoryReader.open(FSDirectory.open((new File(index)).toPath()));
    IndexSearcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new SpanishAnalyzer();
    QueryParser parser = new QueryParser(field, analyzer);

    PrintWriter writer = new PrintWriter(output, "UTF-8");
    for (int i = 0; i < queryString.length; i++) {

        String line = queryString[i][1];

        /*if (line == null || line.length() == -1) {
          break;
        }*/

        line = line.trim();
        /*if (line.length() == 0) {
          break;
        }*/

        BooleanQuery b = new BooleanQuery();
        String normalized = parser.parse(line).toString(field);
        findNames(b, normalized);
        findYear(b, normalized);
        /*String regex = "\\s*\\bcuyo\\b\\s*";
        String regex2 = "\\s*\\brelacion\\b\\s*";
        line = line.replaceAll(regex, " ");
        line = line.replaceAll(regex2, " ");*/
        Query query = parser.parse(line + " " + b.toString());

        System.out.println("Searching for: " + query.toString(field));

        doPagingSearch(searcher, query, queryString[i][0], writer);

        /*if (queryString != null) {
          break;
        }*/
    }
    reader.close();
    writer.close();
}