Example usage for org.apache.hadoop.fs FileSystem rename

List of usage examples for org.apache.hadoop.fs FileSystem rename

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem rename.

Prototype

public abstract boolean rename(Path src, Path dst) throws IOException;

Source Link

Document

Renames Path src to Path dst.

Usage

From source file:com.netflix.aegisthus.tools.Utils.java

License:Apache License

public static void copy(Path from, Path to, boolean snappy, TaskAttemptContext ctx) throws IOException {
    FileSystem fromFs = from.getFileSystem(ctx.getConfiguration());
    FileSystem toFs = to.getFileSystem(ctx.getConfiguration());

    if (!to.isAbsolute()) {
        to = new Path(ctx.getConfiguration().get("mapred.working.dir"), to);
    }//from   w ww  .ja v  a  2  s . c o  m
    if (!snappy && onSameHdfs(ctx.getConfiguration(), from, to)) {
        LOG.info(String.format("renaming %s to %s", from, to));
        toFs.mkdirs(to.getParent());
        toFs.rename(from, to);
        return;
    }

    InputStream in = fromFs.open(from);
    OutputStream out = toFs.create(to, false);
    try {
        if (snappy) {
            in = new SnappyInputStream2(in);
        }
        byte[] buffer = new byte[65536];
        int bytesRead;
        int count = 0;
        while ((bytesRead = in.read(buffer)) >= 0) {
            if (bytesRead > 0) {
                out.write(buffer, 0, bytesRead);
            }
            if (count++ % 50 == 0) {
                ctx.progress();
            }
        }
    } finally {
        in.close();
        out.close();
    }
}

From source file:com.ning.metrics.collector.events.hadoop.writer.HadoopOutputChunk.java

License:Apache License

/**
 * Rename sourcePath to destinationPath. Parents don't have to exist (they will be created if they don't).
 *
 * @param fileSystem Filesystem object to operate on
 * @throws IOException generic IOException
 *//*from w  ww  .j  a  v a  2 s  . c  o m*/
public void commit(final FileSystem fileSystem) throws IOException {
    final Path destinationDir = destinationPath.getParent();

    // parent directory has to exist for a hdfs rename to succeed
    if (!fileSystem.exists(destinationDir) && !fileSystem.mkdirs(destinationPath.getParent())) {
        throw new IOException(String.format(
                "Unable to make destination directory %s (does the parent directory exist?)", destinationDir));
    }
    if (!fileSystem.rename(sourcePath, destinationPath)) {
        throw new IOException(String.format("Unable to rename %s to %s", sourcePath, destinationPath));
    }
}

From source file:com.panguso.lc.analysis.format.Logcenter.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    context = new ClassPathXmlApplicationContext("applicationContext.xml");
    Properties prop = context.getBean("configProperties", Properties.class);
    // ??//from   w  w  w.  jav  a  2 s.co m
    // String time = new DateTime().toString("yyyyMMddHH");

    // hadoop.lib=/application/format/lib/
    // hadoop.conf=/application/format/conf/
    // hadoop.src=/log/src/
    // hadoop.dest=/log/dest/
    // hadoop.archive=/log/archive/
    libPath = prop.getProperty("hadoop.lib");
    confPath = prop.getProperty("hadoop.conf");
    srcPath = prop.getProperty("hadoop.src");
    destPath = prop.getProperty("hadoop.dest");
    archivePath = prop.getProperty("hadoop.archive");
    Configuration conf = getConf();
    logger.info("libPath=" + libPath);
    logger.info("confPath=" + confPath);
    logger.info("srcPath=" + srcPath);
    logger.info("destPath=" + destPath);
    logger.info("archivePath=" + archivePath);

    FileSystem fs = FileSystem.get(conf);
    // --jar
    FileStatus[] fJars = fs.listStatus(new Path(libPath));
    for (FileStatus fileStatus : fJars) {
        String jar = libPath + fileStatus.getPath().getName();
        DistributedCache.addFileToClassPath(new Path(jar), conf, FileSystem.get(conf));
    }
    // --?
    FileStatus[] fProp = fs.listStatus(new Path(confPath));
    for (FileStatus fileStatus : fProp) {
        DistributedCache.addArchiveToClassPath(new Path(confPath + fileStatus.getPath().getName()), conf,
                FileSystem.get(conf));
    }
    FileStatus[] fDirs = fs.listStatus(new Path(srcPath));
    if (fDirs != null && fDirs.length > 0) {
        for (FileStatus file : fDirs) {
            // dir
            String currentTime = file.getPath().getName();
            String srcPathWithTime = srcPath + currentTime + "/";
            String destPathWithTime = destPath + currentTime + "/";
            String archPathWithTime = archivePath + currentTime + "/";
            // ??
            if (analysisService.isSuccessful(currentTime)) {
                continue;
            }

            // ??job?

            // 
            fs.delete(new Path(destPathWithTime), true);

            // ?
            // if (!fs.exists(new Path(srcPathWithTime))) {
            // logger.warn("outPath does not exist,inputPath=" +
            // srcPathWithTime);
            // analysisService.saveFailureJob(job.getJobName(),
            // currentTime);
            // return -1;
            // }
            // ?classpath";"":"
            Job job = new Job(conf);
            String jars = job.getConfiguration().get("mapred.job.classpath.files");
            job.getConfiguration().set("mapred.job.classpath.files", jars.replace(";", ":"));
            logger.info("current dir=" + currentTime);
            job.setJobName("format_" + currentTime);

            job.setJarByClass(Logcenter.class);
            job.setMapperClass(FormatAnalysisMapper.class);
            job.setReducerClass(FormatAnalysisReducer.class);
            job.setCombinerClass(FormatAnalysisReducer.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            job.setOutputFormatClass(TextOutputFormat.class);
            // job.setNumReduceTasks(0);
            // //??reduce????namenode
            FileInputFormat.addInputPath(job, new Path(srcPathWithTime));
            FileOutputFormat.setOutputPath(job, new Path(destPathWithTime));

            // ?
            boolean result = false;
            try {
                result = job.waitForCompletion(true);
            } catch (FileAlreadyExistsException e) {
                logger.warn(e.getMessage(), e);
            }
            if (!result) {
                logger.warn("job execute failure!");
                analysisService.saveFailureJob(job.getJobName(), currentTime);
                continue;
                // return -1;
            }

            // ,
            fs.delete(new Path(archPathWithTime), true);
            fs.rename(new Path(srcPathWithTime), new Path(archPathWithTime));
            analysisService.saveSuccessJob(job.getJobName(), currentTime);
        }
    }

    FileSystem.closeAll();
    return 0;
}

From source file:com.pegasus.ResultInfo.java

License:Apache License

public int run(final String[] args) throws Exception {

    Configuration conf = getConf();
    final FileSystem fs = FileSystem.get(conf);
    edge_path = new Path(conf.get("edge_path"));
    all_vertices = new Path(conf.get("all_vertices"));
    curbm_path = new Path(conf.get("iteration_state"));
    tempbm_path = new Path(conf.get("stage1out"));
    nextbm_path = new Path(conf.get("stage2out"));
    output_path = new Path(conf.get("stage3out"));
    grapherOut_path = new Path(conf.get("grapherout"));
    nreducers = Integer.parseInt(conf.get("num_reducers"));
    local_output_path = conf.get("local_output");

    // initital cleanup
    fs.delete(tempbm_path, true);/*from   w  w w  .  j a va2 s .  c  om*/
    fs.delete(nextbm_path, true);
    fs.delete(output_path, true);
    fs.delete(curbm_path, true);
    fs.delete(grapherOut_path, true);
    FileUtil.fullyDelete(new File(local_output_path));
    fs.mkdirs(curbm_path);
    //fs.mkdirs(grapherOut_path);

    FileStatus[] statusArray = fs.listStatus(all_vertices);
    for (int index = 0; index < statusArray.length; index++) {
        Path temp = statusArray[index].getPath();
        FileUtil.copy(fs, temp, fs, curbm_path, false, conf);
    }

    make_symmetric = 1;

    System.out.println("\n-----===[PEGASUS: A Peta-Scale Graph Mining System]===-----\n");

    // Iteratively calculate neighborhood function. 
    // rotate directory
    for (int i = cur_iter; i < MAX_ITERATIONS; i++) {
        cur_iter++;

        System.out.println("configStage1");
        JobClient.runJob(configStage1());
        System.out.println("configStage2");
        JobClient.runJob(configStage2());
        System.out.println("configStage3");
        JobClient.runJob(configStage3());

        FileUtil.fullyDelete(FileSystem.getLocal(getConf()), new Path(local_output_path));

        // copy neighborhood information from HDFS to local disk, and read it!
        String new_path = local_output_path + "/" + i;
        fs.copyToLocalFile(output_path, new Path(new_path));
        ResultInfo ri = readIterationOutput(new_path);

        changed_nodes[iter_counter] = ri.changed;
        changed_nodes[iter_counter] = ri.unchanged;

        iter_counter++;

        System.out.println("Hop " + i + " : changed = " + ri.changed + ", unchanged = " + ri.unchanged);
        fs.delete(curbm_path);
        fs.delete(tempbm_path);
        fs.delete(output_path);
        fs.rename(nextbm_path, curbm_path);

        // Stop when the minimum neighborhood doesn't change
        if (ri.changed == 0) {
            System.out.println("All the component ids converged. Finishing...");
            fs.rename(curbm_path, grapherOut_path);
            break;
        }
    }
    FileUtil.fullyDelete(FileSystem.getLocal(getConf()), new Path(local_output_path));

    // finishing.
    System.out.println("\n[PEGASUS] Connected component computed.");
    System.out.println("[PEGASUS] Total Iteration = " + iter_counter);
    return 0;
}

From source file:com.philiphubbard.digraph.MRCompressChains.java

License:Open Source License

public static boolean continueIteration(Job job, Path inputPathOrig, Path outputPathOrig) throws IOException {
    FileSystem fileSystem = FileSystem.get(job.getConfiguration());

    if (iter > 0) {
        Path outputPathOld = new Path(outputPathOrig.toString() + (iter - 1));
        if (fileSystem.exists(outputPathOld))
            fileSystem.delete(outputPathOld, true);
    }/*from w w w. j a  v  a  2  s  .c om*/

    Counters jobCounters = job.getCounters();
    long numCompressions = jobCounters.findCounter(MRCompressChains.CompressionCounter.numCompressions)
            .getValue();
    if (numCompressions == 0)
        numIterWithoutCompressions++;
    else
        numIterWithoutCompressions = 0;
    int limit = job.getConfiguration().getInt(CONFIG_TERMINATION_COUNT, 1);
    boolean keepGoing = (numIterWithoutCompressions < limit);

    if (keepGoing) {
        iter++;
    } else {
        Path outputPath = new Path(outputPathOrig.toString() + iter);
        fileSystem.rename(outputPath, outputPathOrig);
    }

    return keepGoing;
}

From source file:com.placeiq.piqconnect.Runner.java

License:Apache License

public int run(final String[] args) throws Exception {
    Path pathEdges = new Path(args[0]);
    Path pathVector = new Path(args[1]);
    Path workDir = new Path(args[2]);
    Path pathOutputStage1 = new Path(workDir, "stage1");
    Path pathOutputStage2 = new Path(workDir, "stage2");
    Path pathOutputVector = new Path(workDir, "result");

    numberOfReducers = Integer.parseInt(args[3]);
    blockSize = Integer.parseInt(args[4]);

    int maxConvergence = Integer.parseInt(args[5]);
    int maxIters = Integer.parseInt(args[6]);

    if (maxConvergence < 0) {
        maxConvergence = 0;//w  ww . j  a  v  a  2s . c  o m
    }
    if (maxIters < 0 || maxIters > MAX_ITERATIONS) {
        maxIters = MAX_ITERATIONS;
    }

    FileSystem fs = FileSystem.get(getConf());

    int n = 0;
    long changedNodes = Long.MAX_VALUE;
    while (n < maxIters && changedNodes > maxConvergence) {
        fs.delete(pathOutputStage1, true);
        fs.delete(pathOutputStage2, true);
        LOG.info("Start iteration " + n + " Stage1");
        Job job1 = buildJob1(pathEdges, pathVector, pathOutputStage1);
        if (!job1.waitForCompletion(true)) {
            LOG.error("Failed to execute IterationStage1 for iteration #" + n);
            return -1;
        }
        LOG.info("Start iteration " + n + " Stage2");
        Job job2 = buildJob2(pathOutputStage1, pathOutputStage2);
        if (!job2.waitForCompletion(true)) {
            LOG.error("Failed to execute IterationStage2 for iteration #" + n);
            return -1;
        }
        changedNodes = job2.getCounters().findCounter(PiqConnectCounter.NUMBER_INCOMPLETE_VECTOR).getValue();
        long unchangedNodes = job2.getCounters().findCounter(PiqConnectCounter.NUMBER_FINAL_VECTOR).getValue();
        LOG.info("End of iteration " + n + ", changedNodes=" + changedNodes + ", unchangedNodes="
                + unchangedNodes);
        LOG.info(pathOutputStage2);
        fs.delete(pathVector, true);
        if (!fs.rename(pathOutputStage2, pathVector)) {
            LOG.error("failed to rename " + pathOutputStage2 + " into " + pathVector);
            return -1;
        }
        n++;
    }
    Job job3 = buildJob3(pathVector, pathOutputVector);
    if (!job3.waitForCompletion(true)) {
        LOG.error("Failed to execute FinalResultBuilder for iteration #" + n);
        return -1;
    }
    LOG.info("Connected component computed in " + n + " iterations");
    return 0;
}

From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java

License:Apache License

/**
 * Move all of the files from the work directory to the final output
 * @param context the task context//from w w  w. j av  a2s .  c  om
 * @param fs the output file system
 * @param jobOutputDir the final output direcotry
 * @param taskOutput the work path
 * @throws IOException
 */
private void moveTaskOutputs(TaskAttemptContext context, FileSystem fs, Path jobOutputDir, Path taskOutput)
        throws IOException {
    TaskAttemptID attemptId = context.getTaskAttemptID();
    context.progress();
    if (fs.isFile(taskOutput)) {
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, workPath);
        if (!fs.rename(taskOutput, finalOutputPath)) {
            if (!fs.delete(finalOutputPath, true)) {
                throw new IOException("Failed to delete earlier output of task: " + attemptId);
            }
            if (!fs.rename(taskOutput, finalOutputPath)) {
                throw new IOException("Failed to save output of task: " + attemptId);
            }
        }
        LOG.debug("Moved " + taskOutput + " to " + finalOutputPath);
    } else if (fs.getFileStatus(taskOutput).isDir()) {
        FileStatus[] paths = fs.listStatus(taskOutput);
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, workPath);
        fs.mkdirs(finalOutputPath);
        if (paths != null) {
            for (FileStatus path : paths) {
                moveTaskOutputs(context, fs, jobOutputDir, path.getPath());
            }
        }
    }
}

From source file:com.rapleaf.hank.hadoop.DomainBuilderOutputCommitter.java

License:Apache License

public static void commitJob(String domainName, JobConf conf) throws IOException {
    Path outputPath = new Path(DomainBuilderProperties.getOutputPath(domainName, conf));
    Path tmpOutputPath = new Path(DomainBuilderProperties.getTmpOutputPath(domainName, conf));
    FileSystem fs = outputPath.getFileSystem(conf);

    // Create outputPath
    fs.mkdirs(outputPath);//from   w w  w  . j  av  a2  s  .c  om

    // Move temporary output to final output
    LOG.info("Moving temporary output files from: " + tmpOutputPath + " to final output path: " + outputPath);
    FileStatus[] partitions = fs.listStatus(tmpOutputPath);
    for (FileStatus partition : partitions) {
        if (partition.isDir()) {
            FileStatus[] partitionFiles = fs.listStatus(partition.getPath());
            for (FileStatus partitionFile : partitionFiles) {
                Path sourcePath = partitionFile.getPath();
                Path targetPath = new Path(new Path(outputPath, partition.getPath().getName()),
                        partitionFile.getPath().getName());
                LOG.info("Moving: " + sourcePath + " to: " + targetPath);
                if (!fs.mkdirs(targetPath.getParent())) {
                    throw new IOException("Failed at creating directory " + targetPath.getParent());
                }
                if (!fs.rename(sourcePath, targetPath)) {
                    throw new IOException("Failed at renaming " + sourcePath + " to " + targetPath);
                }
            }
        }
    }

    // Finally, cleanup
    cleanupJob(domainName, conf);
}

From source file:com.redsqirl.workflow.server.connect.HDFSInterface.java

License:Open Source License

/**
 * Move a path to another location/*from  w  w w  . j  a v  a 2s . com*/
 * 
 * @param old_path
 * @param new_path
 * @return Error Message
 * @throws RemoteException
 */
@Override
public String move(String old_path, String new_path) throws RemoteException {
    String error = null;
    try {
        Path oldP = new Path(old_path), newP = new Path(new_path);
        HdfsFileChecker hCh = new HdfsFileChecker(newP);
        if (!hCh.exists()) {
            FileSystem fs = NameNodeVar.getFS();
            fs.rename(oldP, newP);
            // fs.close();
        } else {
            error = LanguageManagerWF.getText("HdfsInterface.ouputexists");
        }
        // hCh.close();

    } catch (IOException e) {
        logger.error(e.getMessage());
        error = LanguageManagerWF.getText("HdfsInterface.errormove", new Object[] { e.getMessage() });
    }
    if (error != null) {
        logger.debug(error);
    }
    return error;
}

From source file:com.rim.logdriver.admin.LogMaintenance.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }/*from   w w  w.  ja v  a2  s.  c o  m*/

    // For some reason, Oozie needs some options to be set in system instead of
    // in the confiuration. So copy the configs over.
    {
        Iterator<Entry<String, String>> i = conf.iterator();
        while (i.hasNext()) {
            Entry<String, String> next = i.next();
            System.setProperty(next.getKey(), next.getValue());
        }
    }

    if (args.length < 3) {
        printUsage();
        return 1;
    }

    String userName = args[0];
    String dcNumber = args[1];
    String service = args[2];
    String date = null;
    String hour = null;
    if (args.length >= 4) {
        date = args[3];
    }
    if (args.length >= 5) {
        hour = args[4];
    }

    // Set from environment variables
    oozieUrl = getConfOrEnv(conf, "OOZIE_URL");
    String mergeJobPropertiesFile = getConfOrEnv(conf, "MERGEJOB_CONF");
    String filterJobPropertiesFile = getConfOrEnv(conf, "FILTERJOB_CONF");
    String daysBeforeArchive = getConfOrEnv(conf, "DAYS_BEFORE_ARCHIVE");
    String daysBeforeDelete = getConfOrEnv(conf, "DAYS_BEFORE_DELETE");
    String maxConcurrentMergeJobs = getConfOrEnv(conf, "MAX_CONCURRENT_MERGE_JOBS");
    String maxConcurrentFilterJobs = getConfOrEnv(conf, "MAX_CONCURRENT_FILTER_JOBS");
    String zkConnectString = getConfOrEnv(conf, "ZK_CONNECT_STRING");
    String logdir = getConfOrEnv(conf, "logdriver.logdir.name");
    boolean resetOrphanedJobs = Boolean.parseBoolean(getConfOrEnv(conf, "reset.orphaned.jobs"));
    String rootDir = getConfOrEnv(conf, "service.root.dir");

    boolean doMerge = true;
    boolean doArchive = true;
    boolean doDelete = true;

    if (oozieUrl == null) {
        LOG.info("OOZIE_URL is not set.  Not merging or archiving.");
        doMerge = false;
        doArchive = false;
    }
    if (zkConnectString == null) {
        LOG.error("ZK_CONNECT_STRING is not set.  Exiting.");
        return 1;
    }
    if (mergeJobPropertiesFile == null) {
        LOG.info("MERGEJOB_CONF is not set.  Not merging.");
        doMerge = false;
    }
    if (filterJobPropertiesFile == null) {
        LOG.info("FILTERJOB_CONF is not set.  Not archiving.");
        doArchive = false;
    }
    if (daysBeforeArchive == null) {
        LOG.info("DAYS_BEFORE_ARCHIVE is not set.  Not archiving.");
        doArchive = false;
    }
    if (doArchive && Integer.parseInt(daysBeforeArchive) < 0) {
        LOG.info("DAYS_BEFORE_ARCHIVE is negative.  Not archiving.");
        doArchive = false;
    }
    if (daysBeforeDelete == null) {
        LOG.info("DAYS_BEFORE_DELETE is not set.  Not deleting.");
        doDelete = false;
    }
    if (doDelete && Integer.parseInt(daysBeforeDelete) < 0) {
        LOG.info("DAYS_BEFORE_DELETE is negative.  Not deleting.");
        doDelete = false;
    }
    if (maxConcurrentMergeJobs == null) {
        LOG.info("MAX_CONCURRENT_MERGE_JOBS is not set.  Using default value of -1.");
        maxConcurrentMergeJobs = "-1";
    }
    if (maxConcurrentFilterJobs == null) {
        LOG.info("MAX_CONCURRENT_FILTER_JOBS is not set.  Using default value of -1.");
        maxConcurrentMergeJobs = "-1";
    }
    if (logdir == null) {
        LOG.info("LOGDRIVER_LOGDIR_NAME is not set.  Using default value of 'logs'.");
        logdir = "logs";
    }
    if (rootDir == null) {
        LOG.info("SERVICE_ROOT_DIR is not set.  Using default value of 'service'.");
        rootDir = "/service";
    }

    // Now it's safe to create our Oozie Runners.
    OozieRunner mergeOozieRunner = new OozieRunner(oozieUrl, Integer.parseInt(maxConcurrentMergeJobs));
    Thread mergeOozieRunnerThread = new Thread(mergeOozieRunner);
    mergeOozieRunnerThread.setName("OozieRunner - Merge");
    mergeOozieRunnerThread.setDaemon(false);
    mergeOozieRunnerThread.start();

    OozieRunner filterOozieRunner = new OozieRunner(oozieUrl, Integer.parseInt(maxConcurrentFilterJobs));
    Thread filterOozieRunnerThread = new Thread(filterOozieRunner);
    filterOozieRunnerThread.setName("OozieRunner - Filter");
    filterOozieRunnerThread.setDaemon(false);
    filterOozieRunnerThread.start();

    // Figure out what date we start filters on.
    String filterCutoffDate = "";
    if (doArchive) {
        Calendar cal = Calendar.getInstance();
        cal.add(Calendar.DAY_OF_MONTH, Integer.parseInt("-" + daysBeforeArchive));
        filterCutoffDate = String.format("%04d%02d%02d%02d", cal.get(Calendar.YEAR),
                (cal.get(Calendar.MONTH) + 1), cal.get(Calendar.DAY_OF_MONTH), cal.get(Calendar.HOUR_OF_DAY));
        LOG.info("Archiving logs from before {}", filterCutoffDate);
    }
    String deleteCutoffDate = "";
    if (doDelete) {
        Calendar cal = Calendar.getInstance();
        cal.add(Calendar.DAY_OF_MONTH, Integer.parseInt("-" + daysBeforeDelete));
        deleteCutoffDate = String.format("%04d%02d%02d%02d", cal.get(Calendar.YEAR),
                (cal.get(Calendar.MONTH) + 1), cal.get(Calendar.DAY_OF_MONTH), cal.get(Calendar.HOUR_OF_DAY));
        LOG.info("Deleting logs from before {}", deleteCutoffDate);
    }

    long now = System.currentTimeMillis();

    // Various exceptions have been popping up here. So make sure I catch them
    // all.
    try {
        // We can hang if this fails. So make sure we abort if it fails.
        FileSystem fs = null;
        try {
            fs = FileSystem.get(conf);
            fs.exists(new Path("/")); // Test if it works.
        } catch (IOException e) {
            LOG.error("Error getting filesystem.", e);
            return 1;
        }
        // We'll need an Oozie client to check on orphaned directories.
        oozieClient = getOozieClient();

        // LockUtils are used in a couple of places
        LockUtil lu = new LockUtil(zkConnectString);

        // Patterns to recognize hour, day and incoming directories, so that they
        // can be processed.
        Pattern datePathPattern;
        Pattern hourPathPattern;
        Pattern incomingPathPattern;
        Pattern dataPathPattern;
        Pattern archivePathPattern;
        Pattern workingPathPattern;
        if (hour != null) {
            datePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")");
            hourPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")");
            incomingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")/([^/]+)/incoming");
            dataPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")/([^/]+)/data");
            archivePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")/([^/]+)/archive");
            workingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/("
                    + Pattern.quote(hour) + ")/([^/]+)/working/([^/]+)_(\\d+)");
        } else if (date != null) {
            datePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")");
            hourPathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(\\d{2})");
            incomingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date)
                    + ")/(\\d{2})/([^/]+)/incoming");
            dataPathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(\\d{2})/([^/]+)/data");
            archivePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date)
                    + ")/(\\d{2})/([^/]+)/archive");
            workingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date)
                    + ")/(\\d{2})/([^/]+)/working/([^/]+)_(\\d+)");
        } else {
            datePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})");
            hourPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})");
            incomingPathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/incoming");
            dataPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/"
                    + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/data");
            archivePathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/archive");
            workingPathPattern = Pattern
                    .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/"
                            + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/working/([^/]+)_(\\d+)");
        }

        // Do a depth first search of the directory, processing anything that
        // looks
        // interesting along the way
        Deque<Path> paths = new ArrayDeque<Path>();
        Path rootPath = new Path(rootDir + "/" + dcNumber + "/" + service + "/" + logdir + "/");
        paths.push(rootPath);

        while (paths.size() > 0) {
            Path p = paths.pop();
            LOG.debug("{}", p.toString());

            if (!fs.exists(p)) {
                continue;
            }

            FileStatus dirStatus = fs.getFileStatus(p);
            FileStatus[] children = fs.listStatus(p);
            boolean addChildren = true;

            boolean old = dirStatus.getModificationTime() < now - WAIT_TIME;
            LOG.debug("    Was last modified {}ms ago", now - dirStatus.getModificationTime());

            if (!old) {
                LOG.debug("    Skipping, since it's not old enough.");

            } else if ((!rootPath.equals(p)) && (children.length == 0
                    || (children.length == 1 && children[0].getPath().getName().equals(READY_MARKER)))) {
                // old and no children? Delete!
                LOG.info("    Deleting empty directory {}", p.toString());
                fs.delete(p, true);

            } else {
                Matcher matcher = datePathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    LOG.debug("Checking date directory");

                    // If this is already done, then skip it. So only process if it
                    // doesn't exist.
                    if (fs.exists(new Path(p, READY_MARKER)) == false) {
                        // Check each subdirectory. If they all have ready markers, then I
                        // guess we're ready.
                        boolean ready = true;
                        for (FileStatus c : children) {
                            if (c.isDir() && fs.exists(new Path(c.getPath(), READY_MARKER)) == false) {
                                ready = false;
                                break;
                            }
                        }

                        if (ready) {
                            fs.createNewFile(new Path(p, READY_MARKER));
                        }
                    }
                }

                matcher = hourPathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    LOG.debug("Checking hour directory");

                    // If this is already done, then skip it. So only process if it
                    // doesn't exist.
                    if (fs.exists(new Path(p, READY_MARKER)) == false) {
                        // Check each subdirectory. If they all have ready markers, then I
                        // guess we're ready.
                        boolean ready = true;
                        for (FileStatus c : children) {
                            if (c.isDir() && fs.exists(new Path(c.getPath(), READY_MARKER)) == false) {
                                ready = false;
                                break;
                            }
                        }

                        if (ready) {
                            fs.createNewFile(new Path(p, READY_MARKER));
                        }
                    }
                }

                // Check to see if we have to run a merge
                matcher = incomingPathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    LOG.debug("Checking incoming directory");
                    String matchDate = matcher.group(1);
                    String matchHour = matcher.group(2);
                    String matchComponent = matcher.group(3);

                    String timestamp = matchDate + matchHour;

                    if (doDelete && timestamp.compareTo(deleteCutoffDate) < 0) {
                        LOG.info("Deleting old directory: {}", p);
                        fs.delete(p, true);
                        addChildren = false;
                    } else if (doMerge) {

                        // old, looks right, and has children? Run it!
                        boolean hasMatchingChildren = false;
                        boolean subdirTooYoung = false;

                        for (FileStatus child : children) {
                            if (!hasMatchingChildren) {
                                FileStatus[] grandchildren = fs.listStatus(child.getPath());
                                for (FileStatus gc : grandchildren) {
                                    if (VALID_FILE.matcher(gc.getPath().getName()).matches()) {
                                        hasMatchingChildren = true;
                                        break;
                                    }
                                }
                            }
                            if (!subdirTooYoung) {
                                if (child.getModificationTime() >= now - WAIT_TIME) {
                                    subdirTooYoung = true;
                                    LOG.debug("    Subdir {} is too young.", child.getPath());
                                }
                            }
                        }

                        if (!hasMatchingChildren) {
                            LOG.debug("    No files match the expected pattern ({})", VALID_FILE.pattern());
                        }

                        if (hasMatchingChildren && !subdirTooYoung) {
                            LOG.info("    Run Merge job {} :: {} {} {} {} {}", new Object[] { p.toString(),
                                    dcNumber, service, matchDate, matchHour, matchComponent });

                            Properties oozieJobProps = new Properties();
                            oozieJobProps.load(new FileInputStream(mergeJobPropertiesFile));

                            oozieJobProps.setProperty("rootDir", rootDir);
                            oozieJobProps.setProperty("dcNumber", dcNumber);
                            oozieJobProps.setProperty("service", service);
                            oozieJobProps.setProperty("date", matchDate);
                            oozieJobProps.setProperty("hour", matchHour);
                            oozieJobProps.setProperty("component", matchComponent);
                            oozieJobProps.setProperty("user.name", userName);
                            oozieJobProps.setProperty("logdir", logdir);

                            mergeOozieRunner.submit(oozieJobProps);

                            addChildren = false;
                        }
                    }
                }

                // Check to see if we need to run a filter and archive
                matcher = dataPathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    String matchDate = matcher.group(1);
                    String matchHour = matcher.group(2);
                    String matchComponent = matcher.group(3);

                    String timestamp = matchDate + matchHour;

                    if (doDelete && timestamp.compareTo(deleteCutoffDate) < 0) {
                        LOG.info("Deleting old directory: {}", p);
                        fs.delete(p, true);
                        addChildren = false;
                    } else if (doArchive && timestamp.compareTo(filterCutoffDate) < 0) {

                        Properties oozieJobProps = new Properties();
                        oozieJobProps.load(new FileInputStream(filterJobPropertiesFile));

                        oozieJobProps.setProperty("rootDir", rootDir);
                        oozieJobProps.setProperty("dcNumber", dcNumber);
                        oozieJobProps.setProperty("service", service);
                        oozieJobProps.setProperty("date", matchDate);
                        oozieJobProps.setProperty("hour", matchHour);
                        oozieJobProps.setProperty("component", matchComponent);
                        oozieJobProps.setProperty("user.name", userName);
                        oozieJobProps.setProperty("logdir", logdir);

                        // Check to see if we should just keep all or delete all here.
                        // The filter file should be here
                        String appPath = oozieJobProps.getProperty("oozie.wf.application.path");
                        appPath = appPath.replaceFirst("\\$\\{.*?\\}", "");
                        Path filterFile = new Path(appPath + "/" + service + ".yaml");
                        LOG.info("Filter file is {}", filterFile);
                        if (fs.exists(filterFile)) {
                            List<BoomFilterMapper.Filter> filters = BoomFilterMapper.loadFilters(matchComponent,
                                    fs.open(filterFile));

                            if (filters == null) {
                                LOG.warn(
                                        "    Got null when getting filters.  Not processing. {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                            } else if (filters.size() == 0) {
                                LOG.warn("    Got no filters.  Not processing. {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                            } else if (filters.size() == 1
                                    && filters.get(0) instanceof BoomFilterMapper.KeepAllFilter) {
                                LOG.info("    Keeping everything. {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                                // Move files from data to archive
                                // delete it all!
                                String destination = rootDir + "/" + dcNumber + "/" + service + "/" + logdir
                                        + "/" + matchDate + "/" + matchHour + "/" + matchComponent
                                        + "/archive/";

                                String[] moveArgs = { zkConnectString, dcNumber, service, matchDate, matchHour,
                                        matchComponent, "move " + p.toUri().getPath() + " " + destination };
                                ToolRunner.run(new Configuration(), new LockedFs(), moveArgs);
                            } else if (filters.size() == 1
                                    && filters.get(0) instanceof BoomFilterMapper.DropAllFilter) {
                                LOG.info("    Dropping everything. {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                                // delete it all!
                                String[] delArgs = { zkConnectString, dcNumber, service, matchDate, matchHour,
                                        matchComponent, "delete " + p.toUri().getPath() };
                                ToolRunner.run(new Configuration(), new LockedFs(), delArgs);
                            } else {
                                LOG.info("    Run Filter/Archive job {} :: {} {} {} {} {}",
                                        new Object[] { p.toString(), dcNumber, service, matchDate, matchHour,
                                                matchComponent });
                                filterOozieRunner.submit(oozieJobProps);
                            }
                        } else {
                            LOG.warn("Skipping filter job, since no filter file exists");
                        }

                        addChildren = false;
                    }
                }

                matcher = archivePathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    String matchDate = matcher.group(1);
                    String matchHour = matcher.group(2);

                    String timestamp = matchDate + matchHour;

                    if (doDelete && timestamp.compareTo(deleteCutoffDate) < 0) {
                        LOG.info("Deleting old directory: {}", p);
                        fs.delete(p, true);
                        addChildren = false;
                    }
                }

                matcher = workingPathPattern.matcher(p.toUri().getPath());
                if (matcher.matches()) {
                    LOG.info("  Matches working pattern");
                    if (resetOrphanedJobs) {
                        String matchDate = matcher.group(1);
                        String matchHour = matcher.group(2);
                        String matchComponent = matcher.group(3);
                        String matchOozieJobId = matcher.group(4);

                        // Check to see what's up with the oozie job. If it's still
                        // running,
                        // we don't want to touch it.
                        Status status = null;
                        try {
                            WorkflowJob jobInfo = oozieClient.getJobInfo(matchOozieJobId);
                            status = jobInfo.getStatus();
                        } catch (OozieClientException e) {
                            if (e.getMessage() != null && e.getMessage().contains("Job does not exist")) {
                                LOG.info("Oozie job not found.  Proceeding as though job was failed.", e);
                                status = Status.FAILED;
                            } else {
                                LOG.error("Oozie client error.  Not Proceeding.", e);
                            }
                        }
                        LOG.info("  Oozie job status is {}", status);
                        if (status != null && status != Status.RUNNING && status != Status.PREP
                                && status != Status.SUSPENDED) {
                            // Move everything from working/xxx/incoming/ to incoming/
                            PathInfo lockPathInfo = new PathInfo(rootDir + "/" + dcNumber + "/" + service + "/"
                                    + logdir + "/" + matchDate + "/" + matchHour + "/" + matchComponent);
                            lu.acquireWriteLock(lu.getLockPath(lockPathInfo));

                            FileStatus[] fileStatuses = fs
                                    .listStatus(new Path(p.toUri().getPath() + "/incoming/"));
                            if (fileStatuses != null) {
                                for (FileStatus fileStatus : fileStatuses) {
                                    Path toPath = new Path(fileStatus.getPath().getParent().getParent()
                                            .getParent().getParent(),
                                            "incoming/" + fileStatus.getPath().getName());

                                    LOG.info("  Moving data from {} to {}", fileStatus.getPath(), toPath);
                                    LOG.info("    mkdir {}", toPath);
                                    fs.mkdirs(toPath);

                                    Path fromDir = new Path(p.toUri().getPath(),
                                            "incoming/" + fileStatus.getPath().getName());
                                    LOG.info("    moving from {}", fromDir);
                                    FileStatus[] files = fs.listStatus(fromDir);
                                    if (files == null || files.length == 0) {
                                        LOG.info("    Nothing to move from  {}", fromDir);
                                    } else {
                                        for (FileStatus f : files) {
                                            LOG.info("    rename {} {}", f.getPath(),
                                                    new Path(toPath, f.getPath().getName()));
                                            fs.rename(f.getPath(), new Path(toPath, f.getPath().getName()));
                                        }
                                    }

                                    LOG.info("    rm {}", fileStatus.getPath().getParent().getParent());
                                    fs.delete(fileStatus.getPath().getParent().getParent(), true);
                                }

                                lu.releaseWriteLock(lu.getLockPath(lockPathInfo));

                            }
                        }
                    }

                    addChildren = false;
                }
            }

            // Add any children which are directories to the stack.
            if (addChildren) {
                for (int i = children.length - 1; i >= 0; i--) {
                    FileStatus child = children[i];
                    if (child.isDir()) {
                        paths.push(child.getPath());
                    }
                }
            }
        }

        // Since we may have deleted a bunch of directories, delete any unused
        // locks
        // from ZooKeeper.
        {
            LOG.info("Checking for unused locks in ZooKeeper");
            String scanPath = rootDir + "/" + dcNumber + "/" + service + "/" + logdir;
            if (date != null) {
                scanPath += "/" + date;
                if (hour != null) {
                    scanPath += "/" + hour;
                }
            }

            List<LockInfo> lockInfo = lu.scan(scanPath);

            for (LockInfo li : lockInfo) {
                // Check if the lock path still exists in HDFS. If it doesn't, then
                // delete it from ZooKeeper.
                String path = li.getPath();
                String hdfsPath = path.substring(LockUtil.ROOT.length());
                if (!fs.exists(new Path(hdfsPath))) {
                    ZooKeeper zk = lu.getZkClient();

                    while (!path.equals(LockUtil.ROOT)) {
                        try {
                            zk.delete(path, -1);
                        } catch (KeeperException.NotEmptyException e) {
                            // That's fine. just stop trying then.
                            break;
                        } catch (Exception e) {
                            LOG.error("Caught exception trying to delete from ZooKeeper.", e);
                            break;
                        }
                        LOG.info("Deleted from ZooKeeper: {}", path);
                        path = path.substring(0, path.lastIndexOf('/'));
                    }

                }
            }
        }
        lu.close();

        // Now that we're done, wait for the Oozie Runner to stop, and print the
        // results.
        LOG.info("Waiting for Oozie jobs to complete.");
        mergeOozieRunner.shutdown();
        mergeOozieRunnerThread.join();
        LOG.info("Oozie Job Stats : Merge  : Started={} Succeeded={} failed={} errors={}",
                new Object[] { mergeOozieRunner.getStarted(), mergeOozieRunner.getSucceeded(),
                        mergeOozieRunner.getFailed(), mergeOozieRunner.getErrors() });

        filterOozieRunner.shutdown();
        filterOozieRunnerThread.join();
        LOG.info("Oozie Job Stats : Filter : Started={} Succeeded={} failed={} errors={}",
                new Object[] { filterOozieRunner.getStarted(), filterOozieRunner.getSucceeded(),
                        filterOozieRunner.getFailed(), filterOozieRunner.getErrors() });

    } catch (Exception e) {
        LOG.error("Unexpected exception caught.", e);
        return 1;
    }

    return 0;
}