Example usage for org.apache.lucene.misc HighFreqTerms HighFreqTerms

List of usage examples for org.apache.lucene.misc HighFreqTerms HighFreqTerms

Introduction

In this page you can find the example usage for org.apache.lucene.misc HighFreqTerms HighFreqTerms.

Prototype

HighFreqTerms

Source Link

Usage

From source file:luceneindexcreator.LuceneIndexCreator.java

public static void main(String[] args) {
    try {//from   w  w  w .j  a  v a 2 s. c om
        Comparator<TermStats> comparator = new Comparator<TermStats>() {
            @Override
            public int compare(TermStats t1, TermStats t2) {
                return t1.totalTermFreq < t2.totalTermFreq ? -1 : 1;
            };
        };

        LuceneIndexCreator lw = new LuceneIndexCreator(INDEX_PATH, JSON_FILE_PATH_WEEKLY);
        lw.createIndex();

        //Check the index has been created successfully
        Directory indexDirectory = FSDirectory.open(new File(INDEX_PATH));
        IndexReader indexReader = DirectoryReader.open(indexDirectory);

        int numDocs = indexReader.numDocs();
        /* Keywords SORTED BY DATE
         *      //generation of Date indexes and the associated json files of keyword freq            
         *      ArrayList<String> indexedDates = new ArrayList<String>();
         *      for ( int i = 0; i < numDocs; i++){
         *          Document document = indexReader.document(i);
         *          //indexRader.toString(i);
         *          String date = document.get("Date");
         *          if (!contains(indexedDates, date)) {
         *              LuceneIndexCreator lwd = new LuceneIndexCreator(PARENT_INDEX_PATH + date, JSON_FILE_PATH_WEEKLY);
         *              lwd.createSubindexDate(date);
         *              indexedDates.add(date);
         *          }
         *          Directory indexDirectoryDate = FSDirectory.open(new File(PARENT_INDEX_PATH + date));
         *          IndexReader indexReaderDate = DirectoryReader.open(indexDirectoryDate);
         *          HighFreqTerms hTerms = new HighFreqTerms();
         *          JSONArray termResultJSONArray = new JSONArray();
         *          TermStats[] hTermResult = hTerms.getHighFreqTerms(indexReaderDate, 50, "content", comparator);
         *          //creating json object
         *          for (int j = 0; j < hTermResult.length; j++) {
         *              JSONObject termResultJSON = new JSONObject();
         *              termResultJSON.put("Term", hTermResult[j].termtext.utf8ToString());
         *              termResultJSON.put("Frequency", hTermResult[j].totalTermFreq);
         *              termResultJSONArray.add(termResultJSON);
         *              //System.out.println("" + hTermResult[i].termtext.utf8ToString() + " " +  hTermResult[i].totalTermFreq);
         *          }
         *          //outputting json
         *          try(FileWriter file = new FileWriter("JSONResults/" + date + ".json")) {
         *              file.write(termResultJSONArray.toJSONString());
         *              System.out.println("Successfully Copied JSON Object to File...");
         *              System.out.println("\nJSON Object: " + termResultJSONArray );
         *
         *          }
         *              //date = date.substring(5, 16).trim();
         *              //System.out.println( "d=" + document.get("content"));
         *              //System.out.println("date: " + date + ".");
         *      }
        */

        // keywords sorted by week
        //generation of Date indexes and the associated json files of keyword freq                      
        ArrayList<String> indexedWeeks = new ArrayList<String>();

        //creating subindexes for each week
        for (int i = 0; i < numDocs; i++) {
            Document document = indexReader.document(i);
            //System.out.println(document.get("Week_number"));
            //System.out.println(document.get("Date"));
            String weekNum = document.get("Week_number");
            //System.out.println(weekNum);
            if (!contains(indexedWeeks, weekNum)) {
                LuceneIndexCreator lww = new LuceneIndexCreator(PARENT_INDEX_PATH + "week" + weekNum,
                        JSON_FILE_PATH_WEEKLY);
                lww.createSubindexWeek(weekNum);
                indexedWeeks.add(weekNum);
            }
        }
        JSONArray json1 = new JSONArray();
        for (String weekNum : indexedWeeks) {
            Directory indexDirectoryWeek = FSDirectory.open(new File(PARENT_INDEX_PATH + "week" + weekNum));
            IndexReader indexReaderWeek = DirectoryReader.open(indexDirectoryWeek);
            HighFreqTerms hTerms = new HighFreqTerms();
            TermStats[] hTermResult = hTerms.getHighFreqTerms(indexReaderWeek, 100, "content", comparator);

            //creating json object 
            JSONObject json2 = new JSONObject();
            json2.put("Week", weekNum);
            JSONArray json3 = new JSONArray();
            for (int j = 0; j < hTermResult.length; j++) {
                JSONObject json4 = new JSONObject();
                json4.put("Term", hTermResult[j].termtext.utf8ToString());
                json4.put("Frequency", hTermResult[j].totalTermFreq);
                json3.add(json4);
            }
            json2.put("Terms", json3);
            json1.add(json2);
        }
        //output json
        try (FileWriter file = new FileWriter("JSONResults/allWeeklyTerms.json")) {
            file.write(json1.toJSONString());
            System.out.println("Successfully Copied JSON Object to File...");
            System.out.println("\nJSON Object: " + json1);
        }

        // gets term freq for all docs 
        HighFreqTerms hTerms = new HighFreqTerms();
        JSONArray termResultJSONArray = new JSONArray();

        //array of termStats
        TermStats[] hTermResult = hTerms.getHighFreqTerms(indexReader, 150, "content", comparator);

        //creating json object
        for (int i = 0; i < hTermResult.length; i++) {
            JSONObject termResultJSON = new JSONObject();
            termResultJSON.put("Term", hTermResult[i].termtext.utf8ToString());
            termResultJSON.put("Frequency", hTermResult[i].totalTermFreq);
            termResultJSONArray.add(termResultJSON);
            //System.out.println("" + hTermResult[i].termtext.utf8ToString() + " " +  hTermResult[i].totalTermFreq);
        }
        //outputting json
        try (FileWriter file = new FileWriter("JSONResults/allTermFreq.json")) {
            file.write(termResultJSONArray.toJSONString());
            System.out.println("Successfully Copied JSON Object to File...");
            System.out.println("\nJSON Object: " + termResultJSONArray);
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}