List of usage examples for org.apache.lucene.misc HighFreqTerms HighFreqTerms
HighFreqTerms
From source file:luceneindexcreator.LuceneIndexCreator.java
public static void main(String[] args) { try {//from w w w .j a v a 2 s. c om Comparator<TermStats> comparator = new Comparator<TermStats>() { @Override public int compare(TermStats t1, TermStats t2) { return t1.totalTermFreq < t2.totalTermFreq ? -1 : 1; }; }; LuceneIndexCreator lw = new LuceneIndexCreator(INDEX_PATH, JSON_FILE_PATH_WEEKLY); lw.createIndex(); //Check the index has been created successfully Directory indexDirectory = FSDirectory.open(new File(INDEX_PATH)); IndexReader indexReader = DirectoryReader.open(indexDirectory); int numDocs = indexReader.numDocs(); /* Keywords SORTED BY DATE * //generation of Date indexes and the associated json files of keyword freq * ArrayList<String> indexedDates = new ArrayList<String>(); * for ( int i = 0; i < numDocs; i++){ * Document document = indexReader.document(i); * //indexRader.toString(i); * String date = document.get("Date"); * if (!contains(indexedDates, date)) { * LuceneIndexCreator lwd = new LuceneIndexCreator(PARENT_INDEX_PATH + date, JSON_FILE_PATH_WEEKLY); * lwd.createSubindexDate(date); * indexedDates.add(date); * } * Directory indexDirectoryDate = FSDirectory.open(new File(PARENT_INDEX_PATH + date)); * IndexReader indexReaderDate = DirectoryReader.open(indexDirectoryDate); * HighFreqTerms hTerms = new HighFreqTerms(); * JSONArray termResultJSONArray = new JSONArray(); * TermStats[] hTermResult = hTerms.getHighFreqTerms(indexReaderDate, 50, "content", comparator); * //creating json object * for (int j = 0; j < hTermResult.length; j++) { * JSONObject termResultJSON = new JSONObject(); * termResultJSON.put("Term", hTermResult[j].termtext.utf8ToString()); * termResultJSON.put("Frequency", hTermResult[j].totalTermFreq); * termResultJSONArray.add(termResultJSON); * //System.out.println("" + hTermResult[i].termtext.utf8ToString() + " " + hTermResult[i].totalTermFreq); * } * //outputting json * try(FileWriter file = new FileWriter("JSONResults/" + date + ".json")) { * file.write(termResultJSONArray.toJSONString()); * System.out.println("Successfully Copied JSON Object to File..."); * System.out.println("\nJSON Object: " + termResultJSONArray ); * * } * //date = date.substring(5, 16).trim(); * //System.out.println( "d=" + document.get("content")); * //System.out.println("date: " + date + "."); * } */ // keywords sorted by week //generation of Date indexes and the associated json files of keyword freq ArrayList<String> indexedWeeks = new ArrayList<String>(); //creating subindexes for each week for (int i = 0; i < numDocs; i++) { Document document = indexReader.document(i); //System.out.println(document.get("Week_number")); //System.out.println(document.get("Date")); String weekNum = document.get("Week_number"); //System.out.println(weekNum); if (!contains(indexedWeeks, weekNum)) { LuceneIndexCreator lww = new LuceneIndexCreator(PARENT_INDEX_PATH + "week" + weekNum, JSON_FILE_PATH_WEEKLY); lww.createSubindexWeek(weekNum); indexedWeeks.add(weekNum); } } JSONArray json1 = new JSONArray(); for (String weekNum : indexedWeeks) { Directory indexDirectoryWeek = FSDirectory.open(new File(PARENT_INDEX_PATH + "week" + weekNum)); IndexReader indexReaderWeek = DirectoryReader.open(indexDirectoryWeek); HighFreqTerms hTerms = new HighFreqTerms(); TermStats[] hTermResult = hTerms.getHighFreqTerms(indexReaderWeek, 100, "content", comparator); //creating json object JSONObject json2 = new JSONObject(); json2.put("Week", weekNum); JSONArray json3 = new JSONArray(); for (int j = 0; j < hTermResult.length; j++) { JSONObject json4 = new JSONObject(); json4.put("Term", hTermResult[j].termtext.utf8ToString()); json4.put("Frequency", hTermResult[j].totalTermFreq); json3.add(json4); } json2.put("Terms", json3); json1.add(json2); } //output json try (FileWriter file = new FileWriter("JSONResults/allWeeklyTerms.json")) { file.write(json1.toJSONString()); System.out.println("Successfully Copied JSON Object to File..."); System.out.println("\nJSON Object: " + json1); } // gets term freq for all docs HighFreqTerms hTerms = new HighFreqTerms(); JSONArray termResultJSONArray = new JSONArray(); //array of termStats TermStats[] hTermResult = hTerms.getHighFreqTerms(indexReader, 150, "content", comparator); //creating json object for (int i = 0; i < hTermResult.length; i++) { JSONObject termResultJSON = new JSONObject(); termResultJSON.put("Term", hTermResult[i].termtext.utf8ToString()); termResultJSON.put("Frequency", hTermResult[i].totalTermFreq); termResultJSONArray.add(termResultJSON); //System.out.println("" + hTermResult[i].termtext.utf8ToString() + " " + hTermResult[i].totalTermFreq); } //outputting json try (FileWriter file = new FileWriter("JSONResults/allTermFreq.json")) { file.write(termResultJSONArray.toJSONString()); System.out.println("Successfully Copied JSON Object to File..."); System.out.println("\nJSON Object: " + termResultJSONArray); } } catch (Exception e) { e.printStackTrace(); } }