workTextIndexService.Procesamiento.java Source code

Java tutorial

Introduction

Here is the source code for workTextIndexService.Procesamiento.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package workTextIndexService;

import com.mongodb.DBObject;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.es.SpanishAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.Version;

/**
 *
 * @author Rodrigo Rivas
 */
public class Procesamiento {

    private String resultN;
    private ArrayList<String> palabras;
    private StringTokenizer token;
    private String resultTitle;
    private String resultText;

    public String normalizar(String texto) {
        resultN = "";
        @SuppressWarnings("deprecation")
        SpanishAnalyzer analyzer = new SpanishAnalyzer(Version.LUCENE_4_10_1);
        try {
            TokenStream stream = analyzer.tokenStream(null, new StringReader(texto));
            stream.reset();
            while (stream.incrementToken()) {
                resultN = resultN + (stream.getAttribute(CharTermAttribute.class).toString()) + " ";
            }
            stream.close();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        return resultN.toLowerCase();
    }

    public List<String> obtenerPalabras(String texto) {
        palabras = new ArrayList<>();
        token = new StringTokenizer(texto, " ");
        while (token.hasMoreTokens()) {
            palabras.add(normalizar(token.nextToken()).replaceAll(" ", ""));
        }
        //System.out.println("Palabra procesada en metodo: "+palabras);
        return palabras;
    }

    public String mostrarResultado(DBObject value) {

        resultTitle = value.get("Titulo").toString();
        resultText = value.get("Texto").toString();
        token = new StringTokenizer(resultText, "\n");
        return "Titulo: " + resultTitle + "\n\nContenido:\n" + token.nextToken() + "\n" + token.nextToken() + "\n"
                + token.nextToken() + "\n" + token.nextToken();
        //return resultTitle+"\n\n"+resultText;
    }

    public String getId(String id) {
        token = new StringTokenizer(id, ",");
        return token.nextToken();
    }

    public String getCollection(String id) {
        token = new StringTokenizer(id, ",");
        id = token.nextToken();
        return token.nextToken();
    }
}