com.bindez.nlp.extract.ngram.corpus.ExtractWord.java Source code

Java tutorial

Introduction

Here is the source code for com.bindez.nlp.extract.ngram.corpus.ExtractWord.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package com.bindez.nlp.extract.ngram.corpus;

import com.bindez.nlp.spellcheck.BurmeseSpellchecker;
import com.bindez.nlp.extract.tokenizers.Word;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;

/**
 *
 * @author ahk
 */
public class ExtractWord {
    private HttpSolrServer server;

    public ExtractWord() {
        server = new MyanmarCorpusSolrServer().getSolrServer();
    }

    public List<String> query() throws SolrServerException {
        List<String> result = new ArrayList<String>();
        server = new MyanmarCorpusSolrServer().getSolrServer();

        SolrQuery query = new SolrQuery();
        query.setQuery("*:*");
        //query.add("fl","fl:totaltermfreq(content,"+word+")");
        query.setRows(Integer.MAX_VALUE);
        QueryResponse response = server.query(query);
        SolrDocumentList results = response.getResults();
        long numFound = results.getNumFound();
        System.out.println(numFound);
        for (SolrDocument result1 : results) {
            String word = result1.getFieldValue("id").toString();
            result.add(word);

        }

        return result;

    }

    public static void main(String args[])
            throws SolrServerException, FileNotFoundException, IOException, Exception {
        String st = "??? ?  ";
        List<Word> wordList = MyanmarCorpusSegmentor.segment(st);
        for (Word word : wordList) {
            System.out.println(word.getText());
        }
        //  System.out.println(wordList.size());

        // new ExtractWord().removewhitespace();

    }

    public void removewhitespace() throws FileNotFoundException, IOException {
        FileInputStream fstream = new FileInputStream("/home/ahk/Bindez/removespace.txt");
        BufferedReader br = new BufferedReader(new InputStreamReader(fstream));

        String strLine;

        //Read File Line By Line
        while ((strLine = br.readLine()) != null) {
            // Print the content on the console
            System.out.println(BurmeseSpellchecker.getInstance().normalization(strLine));
        }

        //Close the input stream
        br.close();

    }
}