in.dream_lab.eventgen.factory.CsvSplitter.java Source code

Java tutorial

Introduction

Here is the source code for in.dream_lab.eventgen.factory.CsvSplitter.java

Source

/**
 * Copyright 2015 DREAM:Lab, Indian Institute of Science, Bangalore
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package in.dream_lab.eventgen.factory;

import com.opencsv.CSVReader;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.ISODateTimeFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.List;

/*
 * Splits the CSV file in round-robin manner and stores it to individual files
 * based on the number of threads
 */
public class CsvSplitter {
    public static Logger LOG = LoggerFactory.getLogger(CsvSplitter.class);

    public static int numThreads;
    public static int peakRate;

    public static List<String> extractHeadersFromCSV(String inputFileName) {
        try {
            CSVReader reader = new CSVReader(new FileReader(inputFileName));
            String[] headers = reader.readNext(); // use .intern() later
            reader.close();
            List<String> headerList = new ArrayList<String>();
            for (String s : headers) {
                headerList.add(s);
            }
            return headerList;
        } catch (FileNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        return null;
    }

    //Assumes sorted on timestamp csv file
    //It also treats the first event to be at 0 relative time
    public static List<TableClass> roundRobinSplitCsvToMemory(String inputSortedCSVFileName, int numThreads,
            double accFactor, String datasetType) throws IOException {
        CSVReader reader = new CSVReader(new FileReader(inputSortedCSVFileName));
        String[] nextLine;
        int ctr = 0;
        String[] headers = reader.readNext(); // use .intern() later
        List<String> headerList = new ArrayList<String>();
        for (String s : headers) {
            headerList.add(s);
        }

        List<TableClass> tableList = new ArrayList<TableClass>();
        for (int i = 0; i < numThreads; i++) {
            TableClass tableClass = new TableClass();
            tableClass.setHeader(headerList);
            tableList.add(tableClass);
        }

        TableClass tableClass = null;
        boolean flag = true;
        long startTs = 0, deltaTs = 0;

        //CODE TO ACCOMODATE PLUG DATASET SPECIAL CASE TO RUN IT FOR 10 MINS
        int numMins = 90000; // Keeping it fixed for current set of experiments
        Double cutOffTimeStamp = 0.0;//int msgs=0;
        while ((nextLine = reader.readNext()) != null) {
            // nextLine[] is an array of values from the line
            // System.out.println(nextLine[0] + "  " + nextLine[1] + "   " +
            // nextLine[2] + "  " + nextLine[3] + "  etc...");

            List<String> row = new ArrayList<String>();
            for (int i = 0; i < nextLine.length; i++) {
                row.add(nextLine[i]);
            }

            tableClass = tableList.get(ctr);
            ctr = (ctr + 1) % numThreads;

            int timestampColIndex = 0;
            DateTime date = null;
            if (datasetType.equals("TAXI")) {
                timestampColIndex = 3;
                date = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss").parseDateTime(nextLine[timestampColIndex]);
            } else if (datasetType.equals("SYS")) {
                timestampColIndex = 0;
                date = ISODateTimeFormat.dateTimeParser().parseDateTime(nextLine[timestampColIndex]);
                //date = ISODateTimeFormat.dateTimeParser().parseDateTime(
                //      nextLine[timestampColIndex]);
            } else if (datasetType.equals("PLUG")) {
                timestampColIndex = 1;
                date = new DateTime(Long.parseLong(nextLine[timestampColIndex]) * 1000);
                //date = ISODateTimeFormat.dateTimeParser().parseDateTime(
                //      nextLine[timestampColIndex]);
            }

            else if (datasetType.equals("UIDAI")) {
                timestampColIndex = 1;
                date = new DateTime(Long.parseLong(nextLine[timestampColIndex]) * 1000);
                //date = ISODateTimeFormat.dateTimeParser().parseDateTime(
                //      nextLine[timestampColIndex]);
            }

            long ts = date.getMillis();
            if (flag) {
                startTs = ts;
                flag = false;
                cutOffTimeStamp = startTs + numMins * (1.0 / accFactor) * 60 * 1000; // accFactor is actually the scaling factor or deceleration factor
                //System.out.println("GOTSTART TS : "  + ts + " cut off " + cutOffTimeStamp);
            }

            if (ts > cutOffTimeStamp) {
                //System.out.println("GOT TS : "  + ts + " cut off " + cutOffTimeStamp + "  msgs " + (++msgs));
                break; // No More data to be loaded
            }

            deltaTs = ts - startTs;
            deltaTs = (long) (accFactor * deltaTs);
            tableClass.append(deltaTs, row);
            //System.out.println("ts " + (ts - startTs) + " deltaTs " + deltaTs);
        }

        reader.close();
        return tableList;
    }

    public static void roundRobinSplitCsvToFiles(String inputFileName, int numThreads) throws IOException {

        BufferedReader bReader = new BufferedReader(new FileReader(inputFileName));
        String headerLine = bReader.readLine();
        System.out.println(headerLine);
        String line;

        BufferedWriter[] bWriters = new BufferedWriter[numThreads];

        for (int i = 0; i < numThreads; i++) {
            bWriters[i] = new BufferedWriter(new FileWriter("/var/tmp/SyS/out/output" + i + ".csv"));
            bWriters[i].write(headerLine);
            bWriters[i].newLine();
        }

        int ctr = 0;
        while ((line = bReader.readLine()) != null) {
            bWriters[ctr].write(line);
            bWriters[ctr].newLine();
            ctr = (ctr + 1) % numThreads;
        }

        bReader.close();
        for (int i = 0; i < numThreads; i++) {
            bWriters[i].flush();
            bWriters[i].close();
        }
    }

    /**
     * @param args
     * @throws ParseException
     * @throws IOException
     */
    public static void main(String[] args) throws ParseException, IOException {
        // TODO Auto-generated method stub
        int defaultNumThreads = 4, defaultPeakRate = 100;
        switch (args.length) {
        case 2:
            numThreads = Integer.parseInt(args[0]);
            peakRate = Integer.parseInt(args[1]);
            break;
        case 1:
            numThreads = Integer.parseInt(args[0]);
            peakRate = defaultPeakRate;
            break;
        case 0:
            numThreads = defaultNumThreads;
            peakRate = defaultPeakRate;
            break;
        default:
            LOG.warn("Invalid Number of Arguments! args = numThreads peakRate");
            return;
        }

        //      String inputFileName = "/var/tmp/SyS/bangalore.csv";
        String inputFileName = "/Users/anshushukla/data/trytry.csv";

        //roundRobinSplitCsvToFiles(inputFileName, numThreads);
        List<TableClass> list = roundRobinSplitCsvToMemory(inputFileName, numThreads, 0.001, "SYS");
        for (int i = 0; i < numThreads; i++)
            System.out.println(list.get(i).getRows().size());

        //Test Iterator Functionality
        //      for(RowClass t : list.get(0)){
        //         System.out.println(t);
        //      }

        LOG.info("jkl");
    }
}