PDI.Hadoop.Datamining.Tools.HistorianParser.java Source code

Java tutorial

Introduction

Here is the source code for PDI.Hadoop.Datamining.Tools.HistorianParser.java

Source

/*
 * @(#) HistorianParser.java
 *
 * Summary: Historian Parser Main
 *
 * @since   16 Nov, 2010
 * @version   0.2.7.43
 *
 * Copyright: (c) 2010 Phasor Data Inc, 2010, All Other Rights Reserved.
 *
 * Class Modification History:
 * ------------------------------------------------
 * 11/16/2010 - Eunsu Yun
 *              Generated original version of source code
 * 11/25/2010 - Duho Kim
 *              Added new headers and license agreement.
 * 12/13/2010 - Duho Kim
 *              Refactoring: Separated PDI.Hadoop.Datamining.Utils.PathUtils
 */
package PDI.Hadoop.Datamining.Tools;

import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.TimeZone;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import PDI.Hadoop.Datamining.Utils.DateUtils;
import PDI.Hadoop.Datamining.Utils.PathUtils;
import TVA.Hadoop.MapReduce.Historian.HistorianInputFormat;
import TVA.Hadoop.MapReduce.Historian.File.StandardPointFile;

/**
 * <B>PDI's openPDC Historian Parser</B><br/>
 * Map Reduce application for parsing a historian format file
 * and preferred retrieval measurements
 * 
 * @author   Eunsu Yoon
 * @author   Duho Kim
 * @since   17 Nov, 2010
 * @version   0.2.7.43
 * 
 */
public class HistorianParser extends Configured implements Tool {

    /*
     * internal version constant variable
     */
    private static final String PDI_HISTORIAN_VER = "PDI openPDC Historian Parser v0.2.7.42";

    /*
     * internal verbose status variable
     */
    private boolean bVerbose;

    /*
     * internal method showing version info
     */
    private static void pdi_showVersion() {
        System.out.println(PDI_HISTORIAN_VER);
    }

    /*
     * internal method setting verbose mode
     */
    private void pdi_setVerbose(boolean bVerbose) {
        this.bVerbose = bVerbose;
    }

    /*
     * internal method showing verbose status
     */
    private boolean pdi_isVerbose() {
        return this.bVerbose;
    }

    /*
     * internal method printing help message
     */
    private static int printUsage() {
        //ToolRunner.printGenericCommandUsage(System.out);

        pdi_showVersion();
        System.out.println("Usage: HistorianParser\t[-m <numOfMapTask>]\n" + "\t\t\t[-r <numOfReduceTask>]\n "
                + "\t\t\t[-startTS <ts1(unix format)>]\n" + "\t\t\t[-endTS <ts2(unix format)>]\n"
                + "\t\t\t[-pointIDS <id1;..;idN>]\n" + "\t\t\t[-sourcePATH <input_file(directory)>]\n"
                + "\t\t\t[-destPATH <output_directory>]\n" + "\t\t\t[-outputMaxSize <output Max size in MB>]\n"
                + "\t\t\t[-compression <yes|no(default)>]\n"
                + "\t\t\t[-filePrefix <filePrefix(default: \"devarchive_archive_\")>]\n" + "\t\t\t[-verbose]");
        System.out.println(
                "Example: HistorianParser -m 8 -r 6 -startTS 1286985656 -endTS 1286985680 -pointIDS \"48;51;53;54\" -outputMaxSize 5 -sourcePATH /hist_input/ -destPATH /hist_output/ -filePrefix \"devarchive_archive_\"");

        return -1;
    }

    /**
     * The main driver for historian map/reduce program. Invoke this method to
     * submit the map/reduce job.
     * 
     * @throws IOException
     *         When there is communication problems with the job tracker.
     */
    public int run(String[] args) throws Exception {

        JobConf conf = new JobConf(getConf(), HistorianParser.class);
        JobClient jobClient = new JobClient(conf);

        List<String> sourcePaths = new ArrayList<String>();

        String destPath = "";
        String currentDate = DateUtils.getCurrentDateString();
        String startTS = "";
        String endTS = "";
        String pointIDS = "";
        String outputSize = "";

        conf.setMapOutputKeyClass(Text.class);
        conf.setMapOutputValueClass(StandardPointFile.class);
        conf.setMapperClass(MapClass.class);
        conf.setReducerClass(ReduceClass.class);
        conf.setInputFormat(HistorianInputFormat.class);

        conf.set("compression", "no");
        conf.set("filePrefix", "devarchive_archive_");

        for (int i = 0; i < args.length; ++i) {
            try {
                if ("-m".equals(args[i])) {
                    conf.setNumMapTasks(Integer.parseInt(args[++i]));
                } else if ("-r".equals(args[i])) {
                    conf.setNumReduceTasks(Integer.parseInt(args[++i]));
                } else if ("-startTS".equals(args[i])) {
                    conf.set("startTS", args[++i]);
                    startTS = args[i];
                } else if ("-endTS".equals(args[i])) {
                    conf.set("endTS", args[++i]);
                    endTS = args[i];
                } else if ("-pointIDS".equals(args[i])) {
                    conf.set("pointIDS", args[++i]);
                    pointIDS = args[i];
                } else if ("-outputMaxSize".equals(args[i])) {
                    conf.set("outputSize", args[++i]);
                    outputSize = args[i];
                } else if ("-sourcePATH".equals(args[i])) {
                    String sourcePath = "" + args[++i];
                    if (sourcePath.indexOf(',') == -1) {
                        sourcePaths.add(sourcePath);
                    } else {
                        String[] paths = sourcePath.split(",");
                        for (int ii = 0; ii < paths.length; ii++) {
                            sourcePaths.add(paths[ii]);
                        }
                    }
                } else if ("-destPATH".equals(args[i])) {
                    destPath = "" + args[++i] + "/";
                } else if ("-compression".equals(args[i])) {
                    conf.set("compression", args[++i]);
                } else if ("-filePrefix".equals(args[i])) {
                    conf.set("filePrefix", args[++i]);
                } else if ("-v".equals(args[i])) {
                    pdi_showVersion();
                    return 0;
                } else if ("-verbose".equals(args[i])) {
                    this.pdi_setVerbose(true);
                } else if ("-h".equals(args[i])) {
                    return printUsage();
                }
            } catch (NumberFormatException except) {
                System.out.println("ERROR: Integer expected instead of " + args[i]);
                return printUsage();
            } catch (ArrayIndexOutOfBoundsException except) {
                System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
                return printUsage();
            }
        }

        // Check for the user input parameters
        if ((0 == sourcePaths.size()) || destPath.equals("") || startTS.equals("") || endTS.equals("")
                || pointIDS.equals("") || outputSize.equals("") || (0 == conf.get("filePrefix").length())) {
            System.out.println("ERROR: Wrong input parameters.");
            return printUsage();
        }

        String startTime = DateUtils.unixTimestampToHumanReadableTime2(startTS);
        String endTime = DateUtils.unixTimestampToHumanReadableTime2(endTS);

        System.out.println("-------------------------------------------------------");
        System.out.println("jobName      : " + currentDate);
        System.out.println("filePrefix   : " + conf.get("filePrefix"));
        for (int i = 0; i < sourcePaths.size(); i++) {
            System.out.println("sourcePath[" + i + "]: " + sourcePaths.get(i));
        }
        System.out.println("destPath     : " + destPath);
        System.out.println("startTS      : " + startTS + " (" + startTime + ")");
        System.out.println("endTS        : " + endTS + " (" + endTime + ")");
        System.out.println("pointIDS     : " + pointIDS);
        System.out.println("outputMaxSize: " + outputSize + " MB");
        System.out.println("compression  : " + conf.get("compression"));
        System.out.println("-------------------------------------------------------");

        PathUtils utils = new PathUtils(this.pdi_isVerbose());
        if (false == utils.pdi_setRecursiveInputPaths(conf, sourcePaths, startTS, endTS)) {
            return -1;
        }

        // set output path to current time
        FileOutputFormat.setOutputPath(conf, utils.getOutputPath(destPath, currentDate));

        // set jobName to current time
        //      conf.setJobName(date.toString());
        conf.setJobName(currentDate);
        JobClient.runJob(conf); // run the job

        //      mergeAndCopyToLocal(conf, destPath);

        return 0;
    }

    /**
     * <b>HistorianParser binary main</b><br/>
     *
     * <pre>Example:<br/>
     * HistorianParser   -m 8 -r 6 -startTS 1286985656 -endTS 1286985680 -pointIDS "48;51;53;54"<br/>
     *          -outputMaxSize 5 -sourcePATH /hist_input/ -destPATH /hist_output/<br/>
     *          -filePrefix "devarchive_archive_"<br/></pre>
     * @throws Exception
     *         When there is some problems.
     */
    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run(new Configuration(), new HistorianParser(), args);
        System.exit(res);
    }
}