Example usage for org.apache.hadoop.fs FileSystem copyFromLocalFile

List of usage examples for org.apache.hadoop.fs FileSystem copyFromLocalFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem copyFromLocalFile.

Prototype

public void copyFromLocalFile(Path src, Path dst) throws IOException 

Source Link

Document

The src file is on the local disk.

Usage

From source file:CountJob.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String msgs = "";
    doJob("1", args, msgs);
    doJob("2", args, msgs);
    FileSystem hdfs = FileSystem.get(conf);

    BufferedReader bfr = new BufferedReader(
            new InputStreamReader(hdfs.open(new Path("/data/output/temp/12/part-r-00000"))));
    BufferedReader bfr2 = new BufferedReader(
            new InputStreamReader(hdfs.open(new Path("/data/output/temp/22/part-r-00000"))));
    Boolean same = true;//from   w w w .j  a va2 s. com
    String line1;
    String line2;
    line1 = bfr.readLine();
    line2 = bfr2.readLine();
    while (same == true) {
        if ((line1 == null && line2 != null) || (line1 != null && line2 == null)) {
            same = false;
            break;
        } else if ((line1 == null && line2 == null)) {
            break;
        } else {
            if (line1.equals(line2)) {
                line1 = bfr.readLine();
                line2 = bfr2.readLine();
            } else {
                same = false;
                break;
            }
        }
    }
    if (same == true) {
        System.out.print("same " + same + "\n");
        Path localP = new Path("/tmp/output.txt");
        hdfs.copyToLocalFile(new Path("/data/output/temp/12/part-r-00000"), localP);
        hdfs.copyFromLocalFile(localP, new Path(args[1] + "/part-r-00000"));
        hdfs.createNewFile(new Path(args[1] + "/_SUCCESS"));
        System.out.print("created result");

    } else {

        System.out.print("Different");
        doJob("3", args, msgs);
        Path localP = new Path("/tmp/output.txt");
        hdfs.copyToLocalFile(new Path("/data/output/temp/32/part-r-00000"), localP);
        hdfs.copyFromLocalFile(localP, new Path(args[1] + "/part-r-00000"));
        hdfs.createNewFile(new Path(args[1] + "/_SUCCESS"));
        System.out.print("created result");

    }
    hdfs.delete(new Path("/data/output/temp/12/part-r-00000"), true);
    hdfs.delete(new Path("/data/output/temp/22/part-r-00000"), true);

}

From source file:BwaAlignmentBase.java

License:Open Source License

public ArrayList<String> copyResults(String outputSamFileName) {
    ArrayList<String> returnedValues = new ArrayList<String>();
    String outputDir = this.bwaInterpreter.getOutputHdfsDir();

    Configuration conf = new Configuration();
    try {/*from   w w w.  java2 s .  com*/
        FileSystem fs = FileSystem.get(conf);

        fs.copyFromLocalFile(new Path(this.bwaInterpreter.getOutputFile()),
                new Path(outputDir + "/" + outputSamFileName));

        // Delete the old results file
        File tmpSamFullFile = new File(this.bwaInterpreter.getOutputFile());
        tmpSamFullFile.delete();
    } catch (IOException e) {
        e.printStackTrace();
        this.LOG.error(e.toString());
    }

    returnedValues.add(outputDir + "/" + outputSamFileName);

    return returnedValues;
}

From source file:be.ugent.intec.halvade.utils.HalvadeFileUtils.java

License:Open Source License

/**
 * @return returns 0 if successfull, -1 if filesize is incorrect and -2 if an exception occurred
 *///from   ww  w  .  ja va 2  s  .c  om
protected static int privateUploadFileToHDFS(TaskInputOutputContext context, FileSystem fs, String from,
        String to) {
    try {
        // check if file is present on HDFS
        Path toPath = new Path(to);
        Path fromPath = new Path(from);
        File f = new File(from);
        if (!fs.exists(toPath)) {
            fs.copyFromLocalFile(fromPath, toPath);
            context.getCounter(HalvadeCounters.FOUT_TO_HDFS).increment(f.length());
        } else {
            // check if filesize is correct
            if (fs.getFileStatus(toPath).getLen() != f.length()) {
                // incorrect filesize, remove and download again
                fs.delete(toPath, false);
                fs.copyFromLocalFile(fromPath, toPath);
                context.getCounter(HalvadeCounters.FOUT_TO_HDFS).increment(f.length());
            }
        }
        if (fs.getFileStatus(toPath).getLen() != f.length())
            return -1;
        else
            return 0;
    } catch (IOException ex) {
        Logger.DEBUG("failed to upload " + from + " to HDFS: " + ex.getLocalizedMessage());
        Logger.EXCEPTION(ex);
        return -2;
    }
}

From source file:cascading.flow.hadoop.util.HadoopUtil.java

License:Open Source License

/**
 * Copies paths from one local path to a remote path. If syncTimes is true, both modification and access time are
 * changed to match the local 'from' path.
 * <p/>//from w w w  .j  ava2  s  . com
 * Returns a map of file-name to remote modification times if the remote time is different than the local time.
 *
 * @param config
 * @param commonPaths
 * @param syncTimes
 */
public static Map<String, Long> syncPaths(Configuration config, Map<Path, Path> commonPaths,
        boolean syncTimes) {
    if (commonPaths == null)
        return Collections.emptyMap();

    Map<String, Long> timestampMap = new HashMap<>();

    Map<Path, Path> copyPaths = getCopyPaths(config, commonPaths); // tests remote file existence or if stale

    LocalFileSystem localFS = getLocalFS(config);
    FileSystem remoteFS = getDefaultFS(config);

    for (Map.Entry<Path, Path> entry : copyPaths.entrySet()) {
        Path localPath = entry.getKey();
        Path remotePath = entry.getValue();

        try {
            LOG.info("copying from: {}, to: {}", localPath, remotePath);
            remoteFS.copyFromLocalFile(localPath, remotePath);

            if (!syncTimes) {
                timestampMap.put(remotePath.getName(),
                        remoteFS.getFileStatus(remotePath).getModificationTime());
                continue;
            }
        } catch (IOException exception) {
            throw new FlowException("unable to copy local: " + localPath + " to remote: " + remotePath,
                    exception);
        }

        FileStatus localFileStatus = null;

        try {
            // sync the modified times so we can lazily upload jars to hdfs after job is started
            // otherwise modified time will be local to hdfs
            localFileStatus = localFS.getFileStatus(localPath);
            remoteFS.setTimes(remotePath, localFileStatus.getModificationTime(), -1); // don't set the access time
        } catch (IOException exception) {
            LOG.info(
                    "unable to set local modification time on remote file: {}, 'dfs.namenode.accesstime.precision' may be set to 0 on HDFS.",
                    remotePath);

            if (localFileStatus != null)
                timestampMap.put(remotePath.getName(), localFileStatus.getModificationTime());
        }
    }

    return timestampMap;
}

From source file:cgl.hadoop.apps.runner.RunnerMap.java

License:Open Source License

public void map(String key, String value, Context context) throws IOException, InterruptedException {

    long startTime = System.currentTimeMillis();
    String endTime = "";

    Configuration conf = context.getConfiguration();
    String programDir = conf.get(DataAnalysis.PROGRAM_DIR);
    String execName = conf.get(DataAnalysis.EXECUTABLE);
    String cmdArgs = conf.get(DataAnalysis.PARAMETERS);
    String outputDir = conf.get(DataAnalysis.OUTPUT_DIR);
    String workingDir = conf.get(DataAnalysis.WORKING_DIR);

    System.out.println("the map key : " + key);
    System.out.println("the value path : " + value.toString());
    System.out.println("Local DB : " + this.localDB);

    // We have the full file names in the value.
    String localInputFile = "";
    String outFile = "";
    String stdOutFile = "";
    String stdErrFile = "";
    String fileNameOnly = "";

    fileNameOnly = key;// w  ww  .ja va 2  s  . co m
    localInputFile = workingDir + File.separator + fileNameOnly;
    outFile = workingDir + File.separator + fileNameOnly + ".output";
    stdErrFile = workingDir + File.separator + fileNameOnly + ".error";
    stdOutFile = workingDir + File.separator + fileNameOnly + ".input";
    /**
    Write your code to get localInputFile, outFile,
    stdOutFile and stdErrFile
    **/

    // download the file from HDFS
    Path inputFilePath = new Path(value);
    FileSystem fs = inputFilePath.getFileSystem(conf);
    fs.copyToLocalFile(inputFilePath, new Path(localInputFile));

    // Prepare the arguments to the executable
    String execCommand = cmdArgs.replaceAll("#_INPUTFILE_#", localInputFile);
    if (cmdArgs.indexOf("#_OUTPUTFILE_#") > -1) {
        execCommand = execCommand.replaceAll("#_OUTPUTFILE_#", outFile);
    } else {
        outFile = stdOutFile;
    }

    endTime = Double.toString(((System.currentTimeMillis() - startTime) / 1000.0));
    System.out.println("Before running the executable Finished in " + endTime + " seconds");

    execCommand = this.localBlastProgram + File.separator + execName + " " + execCommand + " -db "
            + this.localDB;
    //Create the external process

    startTime = System.currentTimeMillis();

    Process p = Runtime.getRuntime().exec(execCommand);

    OutputHandler inputStream = new OutputHandler(p.getInputStream(), "INPUT", stdOutFile);
    OutputHandler errorStream = new OutputHandler(p.getErrorStream(), "ERROR", stdErrFile);

    // start the stream threads.
    inputStream.start();
    errorStream.start();

    p.waitFor();
    //end time of this procress
    endTime = Double.toString(((System.currentTimeMillis() - startTime) / 1000.0));
    System.out.println("Program Finished in " + endTime + " seconds");

    //Upload the results to HDFS
    startTime = System.currentTimeMillis();

    Path outputDirPath = new Path(outputDir);
    Path outputFileName = new Path(outputDirPath, fileNameOnly);
    fs.copyFromLocalFile(new Path(outFile), outputFileName);

    endTime = Double.toString(((System.currentTimeMillis() - startTime) / 1000.0));
    System.out.println("Upload Result Finished in " + endTime + " seconds");

}

From source file:cgl.hadoop.apps.runner.RunnerMap.java

License:Open Source License

public void map(String key, String value, Context context) throws IOException, InterruptedException {

    long startTime = System.currentTimeMillis();
    String endTime = "";

    Configuration conf = context.getConfiguration();
    String programDir = conf.get(DataAnalysis.PROGRAM_DIR);
    String execName = conf.get(DataAnalysis.EXECUTABLE);
    String cmdArgs = conf.get(DataAnalysis.PARAMETERS);
    String outputDir = conf.get(DataAnalysis.OUTPUT_DIR);
    String workingDir = conf.get(DataAnalysis.WORKING_DIR);

    System.out.println("the map key : " + key);
    System.out.println("the value path : " + value.toString());
    System.out.println("Local DB : " + this.localDB);

    // We have the full file names in the value.
    String localInputFile = "";
    String outFile = "";
    String stdOutFile = "";
    String stdErrFile = "";
    String fileNameOnly = "";

    //Custom code
    String[] tmp = value.split(File.separator);
    fileNameOnly = tmp[tmp.length - 1];//  w w  w .  j  a v  a2 s .  c o m
    localInputFile = workingDir + File.separator + fileNameOnly;
    outFile = workingDir + File.separator + fileNameOnly + ".out";
    stdOutFile = workingDir + File.separator + fileNameOnly + ".stdout";
    stdErrFile = workingDir + File.separator + fileNameOnly + ".stderr";

    // download the file from HDFS
    Path inputFilePath = new Path(value);
    FileSystem fs = inputFilePath.getFileSystem(conf);
    fs.copyToLocalFile(inputFilePath, new Path(localInputFile));

    // Prepare the arguments to the executable
    String execCommand = cmdArgs.replaceAll("#_INPUTFILE_#", localInputFile);
    if (cmdArgs.indexOf("#_OUTPUTFILE_#") > -1) {
        execCommand = execCommand.replaceAll("#_OUTPUTFILE_#", outFile);
    } else {
        outFile = stdOutFile;
    }

    endTime = Double.toString(((System.currentTimeMillis() - startTime) / 1000.0));
    System.out.println("Before running the executable Finished in " + endTime + " seconds");

    execCommand = this.localBlastProgram + File.separator + execName + " " + execCommand + " -db "
            + this.localDB;
    //Create the external process

    startTime = System.currentTimeMillis();

    Process p = Runtime.getRuntime().exec(execCommand);

    OutputHandler inputStream = new OutputHandler(p.getInputStream(), "INPUT", stdOutFile);
    OutputHandler errorStream = new OutputHandler(p.getErrorStream(), "ERROR", stdErrFile);

    // start the stream threads.
    inputStream.start();
    errorStream.start();

    p.waitFor();
    //end time of this procress
    endTime = Double.toString(((System.currentTimeMillis() - startTime) / 1000.0));
    System.out.println("Program Finished in " + endTime + " seconds");

    //Upload the results to HDFS
    startTime = System.currentTimeMillis();

    Path outputDirPath = new Path(outputDir);
    Path outputFileName = new Path(outputDirPath, fileNameOnly);
    fs.copyFromLocalFile(new Path(outFile), outputFileName);

    endTime = Double.toString(((System.currentTimeMillis() - startTime) / 1000.0));
    System.out.println("Upload Result Finished in " + endTime + " seconds");

}

From source file:com.asakusafw.testdriver.file.FileDeployer.java

License:Apache License

private void copy(File result, String destination) throws IOException {
    assert result != null;
    assert destination != null;
    Path target = new Path(destination);
    FileSystem fs = target.getFileSystem(configuration);
    fs.copyFromLocalFile(new Path(result.toURI()), target);
}

From source file:com.bigjob.Client.java

License:Apache License

private void addToLocalResources(FileSystem fs, String fileSrcPath, String fileDstPath, int appId,
        Map<String, LocalResource> localResources, String resources) throws IOException {
    String suffix = appName + "/" + appId + "/" + fileDstPath;
    Path dst = new Path(fs.getHomeDirectory(), suffix);
    LOG.debug("HDFS Destination for Script: " + dst.toString());
    if (fileSrcPath == null) {
        FSDataOutputStream ostream = null;
        try {// w  w  w.  ja v a  2 s .co m
            ostream = FileSystem.create(fs, dst, new FsPermission((short) 0710));
            ostream.writeUTF(resources);
        } finally {
            IOUtils.closeQuietly(ostream);
        }
    } else {
        fs.copyFromLocalFile(new Path(fileSrcPath), dst);
    }
    FileStatus scFileStatus = fs.getFileStatus(dst);
    LocalResource scRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromURI(dst.toUri()),
            LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, scFileStatus.getLen(),
            scFileStatus.getModificationTime());
    localResources.put(fileDstPath, scRsrc);
}

From source file:com.blackberry.logtools.logmultisearch.java

License:Apache License

@SuppressWarnings("static-access")
public int run(String[] argv) throws Exception {

    //Configuring configuration and filesystem to work on HDFS
    final Configuration conf = getConf(); //Configuration processed by ToolRunner
    FileSystem fs = FileSystem.get(conf);
    //Initiate tools used for running search
    LogTools tools = new LogTools();

    //Other options
    String date_format = "RFC5424";
    String field_separator = "";
    ArrayList<String> D_options = new ArrayList<String>();
    boolean quiet = true;
    boolean silent = false;
    boolean log = false;
    boolean forcelocal = false;
    boolean forceremote = false;

    //The arguments are 
    // - strings//from w w w.j a  v a  2 s .  c  o m
    // - dc number
    // - service
    // - component
    // - startTime (Something 'date' can parse, or just a time in ms from epoch)
    // - endTime (Same as start)
    // - outputDir

    //Indexing for arguments to be passed for Mapreduce
    int stringsNum = 0;
    int dcNum = 1;
    int svcNum = 2;
    int compNum = 3;
    int startNum = 4;
    int endNum = 5;
    int outNum = 6;

    //Parsing through user arguments
    String[] args = new String[7];
    int count = 0; //Count created to track the parse of all arguments
    int argcount = 0; //Count created to track number of arguments to be passed on
    while (count < argv.length) {
        String arg = argv[count];
        count++;
        if (arg.equals("--")) {
            break;
        } else if (arg.startsWith("-")) {
            if (arg.equals("--v")) {
                quiet = tools.parseV(silent);
            } else if (arg.equals("--i")) {
                conf.set("logdriver.search.case.insensitive", "true");
            } else if (arg.equals("--a")) {
                LogTools.logConsole(quiet, silent, warn, "AND searching selected");
                conf.set("logdriver.search.and", "true");
            } else if (arg.startsWith("--dateFormat=")) {
                arg = arg.replace("--dateFormat=", "");
                date_format = arg;
            } else if (arg.startsWith("--fieldSeparator=")) {
                arg = arg.replace("--fieldSeparator=", "");
                field_separator = arg;
            } else if (arg.startsWith("-strings=")) {
                arg = arg.replace("-strings=", "");
                args[stringsNum] = arg;
                argcount++;
            } else if (arg.startsWith("-dc=")) {
                arg = arg.replace("-dc=", "");
                args[dcNum] = arg;
                argcount++;
            } else if (arg.startsWith("-svc=")) {
                arg = arg.replace("-svc=", "");
                args[svcNum] = arg;
                argcount++;
            } else if (arg.startsWith("-comp=")) {
                arg = arg.replace("-comp=", "");
                args[compNum] = arg;
                argcount++;
            } else if (arg.startsWith("-start=")) {
                arg = arg.replace("-start=", "");
                args[startNum] = arg;
                argcount++;
            } else if (arg.startsWith("-end=")) {
                arg = arg.replace("-end=", "");
                args[endNum] = arg;
                argcount++;
            }
            //User inputs output directory that is to be created
            //Check to see if parent directory exists && output directory does not exist
            else if (arg.startsWith("--out=")) {
                args[outNum] = tools.parseOut(arg, fs);
                argcount++;
            } else if (arg.startsWith("-D")) {
                D_options.add(arg);
            } else if (arg.equals("--silent")) {
                silent = tools.parseSilent(quiet);
            } else if (arg.equals("--log")) {
                log = true;
            } else if (arg.equals("--l")) {
                forcelocal = tools.parsePigMode(forceremote);
            } else if (arg.equals("--r")) {
                forceremote = tools.parsePigMode(forcelocal);
            } else {
                LogTools.logConsole(quiet, silent, error, "Unrecognized option: " + arg);
                System.exit(1);
            }
        } else {
            LogTools.logConsole(quiet, silent, error, "Unrecognized option: " + arg);
            System.exit(1);
        }
    }

    //Default output should be stdout represented by "-"
    if (args[outNum] == null) {
        args[outNum] = "-";
        argcount++;
        LogTools.logConsole(quiet, silent, info, "Output set to default stdout.");
    }

    if (argcount < 7) {
        System.err.println(";****************************************" + "\n\t\t\t NOT ENOUGH ARGUMENTS\n"
                + "\n\tUSAGE: logmultisearch [REQUIRED ARGUMENTS] [OPTIONS] (Order does not matter)"
                + "\n\tREQUIRED ARGUMENTS:"
                + "\n\t\t-strings=[STRINGS_DIR|STRINGS_FILE|STRING]   String/file/directory of strings to search."
                + "\n\t\t-dc=[DATACENTER]            Data Center."
                + "\n\t\t-svc=[SERVICE]               Service."
                + "\n\t\t-comp=[COMPONENT]                Component."
                + "\n\t\t-start=[START]               Start time." + "\n\t\t-end=[END]               End time."
                + "\n\tOptions:"
                + "\n\t\t--out=[DIRECTORY]                  Desired output directory. If not defined, output to stdout."
                + "\n\t\t--v                              Verbose output."
                + "\n\t\t--r                              Force remote sort."
                + "\n\t\t--l                              Force local sort."
                + "\n\t\t--dateFormat=[FORMAT]              Valid formats are RFC822, RFC3164 (zero padded day),"
                + "\n\t                                   RFC5424 (default), or any valid format string for FastDateFormat."
                + "\n\t\t--fieldSeparator=X                  The separator to use to separate fields in intermediate"
                + "\n\t                                      files.  Defaults to 'INFORMATION SEPARATOR ONE' (U+001F)."
                + "\n\t\t--silent               Output only the data."
                + "\n\t\t--i                              Make search case insensitive."
                + "\n\t\t--a                              Enable AND searching."
                + "\n\t\t--log                  Save all the logs.\n"
                + ";****************************************");
        System.exit(1);
    }

    //Parse time inputs for start and end of search
    args[startNum] = tools.parseDate(args[startNum]);
    args[endNum] = tools.parseDate(args[endNum]);
    tools.checkTime(args[startNum], args[endNum]);

    //Retrieve 'strings' argument to be able to pass search strings to HDFS
    //Retrieve 'out' argument to determine where output of results should be sent
    String strings = args[stringsNum];
    String out = args[outNum];

    //Generate files to temporarily store output of mapreduce jobs and pig logs locally                
    File local_output = File.createTempFile("tmp.", RandomStringUtils.randomAlphanumeric(10));
    if (log != true) {
        local_output.deleteOnExit();
    }
    File pig_tmp = File.createTempFile("tmp.", RandomStringUtils.randomAlphanumeric(10));
    if (log != true) {
        pig_tmp.deleteOnExit();
    }

    //Name the temp directory for storing results in HDFS
    String tmp = "tmp/logmultisearch-" + RandomStringUtils.randomAlphanumeric(10);

    //Set args[stringsNum] to be location of search strings to be used for the Multisearch
    args[stringsNum] = (StringEscapeUtils.escapeJava(tmp) + "/strings");

    //Set args[outNum] to be temp output directory to be passed onto MultiSearchByTime instead of UserInput argument
    args[outNum] = (StringEscapeUtils.escapeJava(tmp) + "/rawlines");

    //Managing console output - deal with --v/--silent
    Logger LOG = LoggerFactory.getLogger(logmultisearch.class);
    tools.setConsoleOutput(local_output, quiet, silent);

    //Create temp directory in HDFS to store logsearch logs before sorting
    tools.tmpDirHDFS(quiet, silent, fs, conf, tmp, log);

    //If the strings argument is the path of a file, copy the file to HDFS.
    //If the strings argument is the path of a directory, copy all files in the directory to HDFS.
    //If the strings argument is not a path to a file/directory, write to a newly created file in HDFS.
    try {
        File f = new File(strings);
        if (f.isFile()) {
            LogTools.logConsole(quiet, silent, warn, "Strings input is a File...");

            //dos2unix file conversion
            File dos2unix = File.createTempFile("tmp.", RandomStringUtils.randomAlphanumeric(10));
            dos2unix.deleteOnExit();
            tools.dosTounix(f, dos2unix);

            //Copy over temp directory into a new directory in HDFS to be used for logmultisearch
            fs.copyFromLocalFile(new Path(dos2unix.getAbsolutePath()), new Path(tmp + "/strings"));
        } else if (f.isDirectory()) {
            LogTools.logConsole(quiet, silent, warn, "Strings input is a Directory...");

            //Get list of all files in directory to convert from dos2unix
            String[] fileList = f.list();

            //Create temp directory to store all converted files
            File tempDir = Files.createTempDir();
            tempDir.deleteOnExit();

            //Convert all files from dos2unix and write to temp directory
            for (int i = 0; i < fileList.length; i++) {
                File dos2unix = File.createTempFile("unix", fileList[i], tempDir);
                dos2unix.deleteOnExit();
                tools.dosTounix(new File(f.getAbsolutePath() + "/" + fileList[i]), dos2unix);
            }

            //Copy over temp directory into a new directory in HDFS to be used for logmultisearch
            fs.copyFromLocalFile(new Path(tempDir.getAbsolutePath()), new Path(tmp + "/strings"));
        } else {
            LogTools.logConsole(quiet, silent, warn, "Strings input is a search string...");
            //Make directory and file for strings
            fs.mkdirs(new Path(tmp + "/strings"));
            fs.createNewFile(new Path(tmp + "/strings/strings"));
            //Write search strings to file
            FSDataOutputStream hdfsOut = fs.create(new Path(tmp + "/strings/strings"));
            hdfsOut.writeUTF(strings);
            hdfsOut.close();
        }
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(1);
    }

    LogTools.logConsole(quiet, silent, warn, "Searching...");
    LogTools.logConsole(quiet, silent, warn,
            "Passing Arguments: SearchStrings=" + strings + " DC=" + args[dcNum] + " Service=" + args[svcNum]
                    + " Component=" + args[compNum] + " StartTime=" + args[startNum] + " EndTime="
                    + args[endNum] + " Output=" + out);

    //Set standard configuration for running Mapreduce and PIG
    String queue_name = "logsearch";

    //Start Mapreduce job
    tools.runMRJob(quiet, silent, conf, D_options, out, LOG, field_separator, queue_name, args,
            "MultiSearchByTime", new MultiSearchByTime());

    //Before sorting, determine the number of records and size of the results found
    long foundresults = tools.getResults(local_output);
    long size = tools.getSize(foundresults, tmp, fs);

    //Run PIG job if results found
    tools.runPig(silent, quiet, foundresults, size, tmp, out, D_options, queue_name, date_format,
            field_separator, pig_tmp, fs, conf, forcelocal, forceremote);

    //Display location of tmp files if log enabled
    tools.logs(log, local_output, pig_tmp, tmp);

    return 0;
}

From source file:com.blackberry.logtools.LogTools.java

License:Apache License

public void runPigLocal(Map<String, String> params, String out, String tmp, final boolean quiet,
        final boolean silent, Configuration conf, String queue_name, String additional_jars, File pig_tmp,
        ArrayList<String> D_options, String PIG_DIR, FileSystem fs)
        throws IllegalArgumentException, IOException {
    //Create temp file on local to hold data to sort
    final File local_tmp = Files.createTempDir();
    local_tmp.deleteOnExit();/*  www . jav a  2  s  .com*/

    Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
        @Override
        public void run() {
            try {
                logConsole(quiet, silent, warn, "Deleting tmp files in local tmp");
                delete(local_tmp);
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
    }));

    //Set input parameter for pig job
    params.put("tmpdir", local_tmp.toString() + "/" + tmp);

    //Check for an out of '-', meaning write to stdout
    String pigout;
    if (out.equals("-")) {
        params.put("out", local_tmp + "/" + tmp + "/final");
        pigout = local_tmp + "/" + tmp + "/final";
    } else {
        params.put("out", local_tmp + "/" + StringEscapeUtils.escapeJava(out));
        pigout = StringEscapeUtils.escapeJava(out);
    }

    //Copy the tmp folder from HDFS to the local tmp directory, and delete the remote folder
    fs.copyToLocalFile(true, new Path(tmp), new Path(local_tmp + "/" + tmp));

    try {
        logConsole(quiet, silent, info, "Running PIG Command");
        conf.set("mapred.job.queue.name", queue_name);
        conf.set("pig.additional.jars", additional_jars);
        conf.set("pig.exec.reducers.bytes.per.reducer", Integer.toString(100 * 1000 * 1000));
        conf.set("pig.logfile", pig_tmp.toString());
        conf.set("hadoopversion", "23");
        //PIG temp directory set to be able to delete all temp files/directories
        conf.set("pig.temp.dir", local_tmp.getAbsolutePath());

        //Setting output separator for logdriver
        String DEFAULT_OUTPUT_SEPARATOR = "\t";
        Charset UTF_8 = Charset.forName("UTF-8");
        String outputSeparator = conf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            System.err.println(
                    ";******************** The output separator must be a single byte in UTF-8. ******************** ");
            System.exit(1);
        }
        conf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));

        dOpts(D_options, silent, out, conf);

        PigServer pigServer = new PigServer(ExecType.LOCAL, conf);
        UserGroupInformation.setConfiguration(new Configuration(false));
        pigServer.registerScript(PIG_DIR + "/formatAndSortLocal.pg", params);
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(1);
    }

    logConsole(quiet, silent, warn, "PIG Job Completed.");

    if (out.equals("-")) {
        System.out.println(";#################### DATA RESULTS ####################");
        try {
            File results = new File(pigout);
            String[] resultList = results.list();

            //Find the files in the directory, open and printout results
            for (int i = 0; i < resultList.length; i++) {
                if (resultList[i].contains("part-") && !resultList[i].contains(".crc")) {
                    BufferedReader br = new BufferedReader(
                            new FileReader(new File(pigout + "/" + resultList[i])));
                    String line;
                    line = br.readLine();
                    while (line != null) {
                        System.out.println(line);
                        line = br.readLine();
                    }
                    br.close();
                }
            }
            System.out.println(";#################### END OF RESULTS ####################");
        } catch (IOException e) {
            e.printStackTrace();
            System.exit(1);
        }
    } else {
        fs.copyFromLocalFile(new Path(local_tmp + "/" + StringEscapeUtils.escapeJava(out)), new Path(pigout));
        System.out.println(
                ";#################### Done. Search results are in " + pigout + " ####################");
    }
}