Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package com.bindez.nlp.extract.ngram.corpus; import com.bindez.nlp.spellcheck.BurmeseSpellchecker; import com.bindez.nlp.extract.tokenizers.Word; import java.io.BufferedReader; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.List; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.HttpSolrServer; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; /** * * @author ahk */ public class ExtractWord { private HttpSolrServer server; public ExtractWord() { server = new MyanmarCorpusSolrServer().getSolrServer(); } public List<String> query() throws SolrServerException { List<String> result = new ArrayList<String>(); server = new MyanmarCorpusSolrServer().getSolrServer(); SolrQuery query = new SolrQuery(); query.setQuery("*:*"); //query.add("fl","fl:totaltermfreq(content,"+word+")"); query.setRows(Integer.MAX_VALUE); QueryResponse response = server.query(query); SolrDocumentList results = response.getResults(); long numFound = results.getNumFound(); System.out.println(numFound); for (SolrDocument result1 : results) { String word = result1.getFieldValue("id").toString(); result.add(word); } return result; } public static void main(String args[]) throws SolrServerException, FileNotFoundException, IOException, Exception { String st = "??? ? "; List<Word> wordList = MyanmarCorpusSegmentor.segment(st); for (Word word : wordList) { System.out.println(word.getText()); } // System.out.println(wordList.size()); // new ExtractWord().removewhitespace(); } public void removewhitespace() throws FileNotFoundException, IOException { FileInputStream fstream = new FileInputStream("/home/ahk/Bindez/removespace.txt"); BufferedReader br = new BufferedReader(new InputStreamReader(fstream)); String strLine; //Read File Line By Line while ((strLine = br.readLine()) != null) { // Print the content on the console System.out.println(BurmeseSpellchecker.getInstance().normalization(strLine)); } //Close the input stream br.close(); } }