Example usage for org.apache.hadoop.mapred FileAlreadyExistsException FileAlreadyExistsException

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred FileAlreadyExistsException FileAlreadyExistsException.

Prototype

public FileAlreadyExistsException(String msg)

Source Link

Usage

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputCommitter.java

License:Apache License

@Override
public void commitJob(JobContext context) throws IOException {
    Configuration configuration = context.getConfiguration();
    MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(configuration);
    BasicMapReduceTaskContext taskContext = classLoader.getTaskContextProvider().get(this.taskContext);

    String outputDatasetName = configuration.get(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET);
    PartitionedFileSet outputDataset = taskContext.getDataset(outputDatasetName);
    Partitioning partitioning = outputDataset.getPartitioning();

    Set<PartitionKey> partitionsToAdd = new HashSet<>();
    Set<String> relativePaths = new HashSet<>();
    // Go over all files in the temporary directory and keep track of partitions to add for them
    FileStatus[] allCommittedTaskPaths = getAllCommittedTaskPaths(context);
    for (FileStatus committedTaskPath : allCommittedTaskPaths) {
        FileSystem fs = committedTaskPath.getPath().getFileSystem(configuration);
        RemoteIterator<LocatedFileStatus> fileIter = fs.listFiles(committedTaskPath.getPath(), true);
        while (fileIter.hasNext()) {
            Path path = fileIter.next().getPath();
            String relativePath = getRelative(committedTaskPath.getPath(), path);

            int lastPathSepIdx = relativePath.lastIndexOf(Path.SEPARATOR);
            if (lastPathSepIdx == -1) {
                // this shouldn't happen because each relative path should consist of at least one partition key and
                // the output file name
                LOG.warn("Skipping path '{}'. It's relative path '{}' has fewer than two parts", path,
                        relativePath);//from w w w.j  a  v  a2  s  .c  o m
                continue;
            }
            // relativePath = "../key1/key2/part-m-00000"
            // relativeDir = "../key1/key2"
            // fileName = "part-m-00000"
            String relativeDir = relativePath.substring(0, lastPathSepIdx);
            String fileName = relativePath.substring(lastPathSepIdx + 1);

            Path finalDir = new Path(FileOutputFormat.getOutputPath(context), relativeDir);
            Path finalPath = new Path(finalDir, fileName);
            if (fs.exists(finalPath)) {
                throw new FileAlreadyExistsException("Final output path " + finalPath + " already exists");
            }
            PartitionKey partitionKey = getPartitionKey(partitioning, relativeDir);
            partitionsToAdd.add(partitionKey);
            relativePaths.add(relativeDir);
        }
    }

    // We need to copy to the parent of the FileOutputFormat's outputDir, since we added a _temporary_jobId suffix to
    // the original outputDir.
    Path finalOutput = FileOutputFormat.getOutputPath(context);
    FileSystem fs = finalOutput.getFileSystem(configuration);
    for (FileStatus stat : getAllCommittedTaskPaths(context)) {
        mergePaths(fs, stat, finalOutput);
    }

    // compute the metadata to be written to every output partition
    Map<String, String> metadata = ConfigurationUtil.getNamedConfigurations(this.taskContext.getConfiguration(),
            PartitionedFileSetArguments.OUTPUT_PARTITION_METADATA_PREFIX);

    // create all the necessary partitions
    for (PartitionKey partitionKey : partitionsToAdd) {
        PartitionOutput partitionOutput = outputDataset.getPartitionOutput(partitionKey);
        partitionOutput.setMetadata(metadata);
        partitionOutput.addPartition();
    }

    // close the TaskContext, which flushes dataset operations
    try {
        taskContext.flushOperations();
    } catch (Exception e) {
        Throwables.propagateIfPossible(e, IOException.class);
        throw new IOException(e);
    }

    // delete the job-specific _temporary folder and create a _done file in the o/p folder
    cleanupJob(context);

    // mark all the final output paths with a _SUCCESS file, if configured to do so (default = true)
    if (configuration.getBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, true)) {
        for (String relativePath : relativePaths) {
            Path pathToMark = new Path(finalOutput, relativePath);
            Path markerPath = new Path(pathToMark, SUCCEEDED_FILE_NAME);
            fs.createNewFile(markerPath);
        }
    }
}

From source file:com.david.mos.out.FileOutputFormat.java

License:Apache License

public void checkOutputSpecs(JobContext job) throws FileAlreadyExistsException, IOException {
    // Ensure that the output directory is set and not already there
    Path outDir = getOutputPath(job);
    if (outDir == null) {
        throw new InvalidJobConfException("Output directory not set.");
    }/*from   ww  w .  j  a v  a2  s .  c o m*/

    // get delegation token for outDir's file system
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { outDir }, job.getConfiguration());

    if (outDir.getFileSystem(job.getConfiguration()).exists(outDir)) {
        throw new FileAlreadyExistsException("Output directory " + outDir + " already exists");
    }
}

From source file:com.rapleaf.ramhdfs.RamFileSystem.java

License:Apache License

@Override
public boolean mkdirs(Path f) throws IOException {
    if (f == null) {
        throw new IllegalArgumentException("mkdirs path arg is null");
    }/*  w  w w  . j a  v a  2 s . c om*/
    Path parent = f.getParent();
    FileObject p2f = pathToFileObject(f);
    if (isDirectory(p2f)) {
        return true;
    }
    if (parent != null) {
        FileObject parent2f = pathToFileObject(parent);
        if (parent2f != null && parent2f.exists() && !isDirectory(parent2f)) {
            throw new FileAlreadyExistsException("Parent path is not a directory: " + parent);
        }
    }
    return (parent == null || mkdirs(parent)) && (createDirectory(p2f) || isDirectory(p2f));
}

From source file:info.halo9pan.word2vec.hadoop.mr.SortOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(JobContext job) throws InvalidJobConfException, IOException {
    // Ensure that the output directory is set
    Path outDir = getOutputPath(job);
    if (outDir == null) {
        throw new InvalidJobConfException("Output directory not set in JobConf.");
    }//from www . j  a va 2  s  .c  om

    final Configuration jobConf = job.getConfiguration();

    // get delegation token for outDir's file system
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { outDir }, jobConf);

    final FileSystem fs = outDir.getFileSystem(jobConf);

    if (fs.exists(outDir)) {
        // existing output dir is considered empty iff its only content is the
        // partition file.
        //
        final FileStatus[] outDirKids = fs.listStatus(outDir);
        boolean empty = false;
        if (outDirKids != null && outDirKids.length == 1) {
            final FileStatus st = outDirKids[0];
            final String fname = st.getPath().getName();
            empty = !st.isDirectory() && SortInputFormat.PARTITION_FILENAME.equals(fname);
        }
        if (WordSort.getUseSimplePartitioner(job) || !empty) {
            throw new FileAlreadyExistsException("Output directory " + outDir + " already exists");
        }
    }
}

From source file:info.halo9pan.word2vec.hadoop.terasort.TeraOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(JobContext job) throws InvalidJobConfException, IOException {
    // Ensure that the output directory is set
    Path outDir = getOutputPath(job);
    if (outDir == null) {
        throw new InvalidJobConfException("Output directory not set in JobConf.");
    }//from w  ww  .j  av  a  2s. com

    final Configuration jobConf = job.getConfiguration();

    // get delegation token for outDir's file system
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { outDir }, jobConf);

    final FileSystem fs = outDir.getFileSystem(jobConf);

    if (fs.exists(outDir)) {
        // existing output dir is considered empty iff its only content is the
        // partition file.
        //
        final FileStatus[] outDirKids = fs.listStatus(outDir);
        boolean empty = false;
        if (outDirKids != null && outDirKids.length == 1) {
            final FileStatus st = outDirKids[0];
            final String fname = st.getPath().getName();
            empty = !st.isDirectory() && TeraInputFormat.PARTITION_FILENAME.equals(fname);
        }
        if (TeraSort.getUseSimplePartitioner(job) || !empty) {
            throw new FileAlreadyExistsException("Output directory " + outDir + " already exists");
        }
    }
}

From source file:org.apache.hadoop.examples.terasort.TeraOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(JobContext job) throws InvalidJobConfException, IOException {
    // Ensure that the output directory is set
    Path outDir = getOutputPath(job);
    if (outDir == null) {
        throw new InvalidJobConfException("Output directory not set in JobConf.");
    }//from w  w w  .  j  av a2s. co  m

    final Configuration jobConf = job.getConfiguration();

    // get delegation token for outDir's file system
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { outDir }, jobConf);

    final FileSystem fs = outDir.getFileSystem(jobConf);

    try {
        // existing output dir is considered empty iff its only content is the
        // partition file.
        //
        final FileStatus[] outDirKids = fs.listStatus(outDir);
        boolean empty = false;
        if (outDirKids != null && outDirKids.length == 1) {
            final FileStatus st = outDirKids[0];
            final String fname = st.getPath().getName();
            empty = !st.isDirectory() && TeraInputFormat.PARTITION_FILENAME.equals(fname);
        }
        if (TeraSort.getUseSimplePartitioner(job) || !empty) {
            throw new FileAlreadyExistsException("Output directory " + outDir + " already exists");
        }
    } catch (FileNotFoundException ignored) {
    }
}

From source file:org.apache.hama.bsp.FileOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(FileSystem ignored, BSPJob job)
        throws FileAlreadyExistsException, InvalidJobConfException, IOException {
    // Ensure that the output directory is set and not already there
    Path outDir = getOutputPath(job);
    if (outDir == null && job.getNumBspTask() != 0) {
        throw new InvalidJobConfException("Output directory not set in JobConf.");
    }//from  w w  w .  j  ava 2  s .c o  m
    if (outDir != null) {
        FileSystem fs = outDir.getFileSystem(job.getConfiguration());
        // normalize the output directory
        outDir = fs.makeQualified(outDir);
        setOutputPath(job, outDir);
        // check its existence
        if (fs.exists(outDir)) {
            throw new FileAlreadyExistsException("Output directory " + outDir + " already exists");
        }
    }
}

From source file:org.apache.tez.mapreduce.examples.TestOrderedWordCount.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    boolean generateSplitsInClient;

    SplitsInClientOptionParser splitCmdLineParser = new SplitsInClientOptionParser();
    try {/*from  w ww .j a  v a 2  s  .  com*/
        generateSplitsInClient = splitCmdLineParser.parse(otherArgs, false);
        otherArgs = splitCmdLineParser.getRemainingArgs();
    } catch (ParseException e1) {
        System.err.println("Invalid options");
        printUsage();
        return 2;
    }

    boolean useTezSession = conf.getBoolean("USE_TEZ_SESSION", true);
    long interJobSleepTimeout = conf.getInt("INTER_JOB_SLEEP_INTERVAL", 0) * 1000;

    boolean retainStagingDir = conf.getBoolean("RETAIN_STAGING_DIR", false);
    boolean useMRSettings = conf.getBoolean("USE_MR_CONFIGS", true);
    // TODO needs to use auto reduce parallelism
    int intermediateNumReduceTasks = conf.getInt("IREDUCE_NUM_TASKS", 2);

    if (((otherArgs.length % 2) != 0) || (!useTezSession && otherArgs.length != 2)) {
        printUsage();
        return 2;
    }

    List<String> inputPaths = new ArrayList<String>();
    List<String> outputPaths = new ArrayList<String>();

    for (int i = 0; i < otherArgs.length; i += 2) {
        inputPaths.add(otherArgs[i]);
        outputPaths.add(otherArgs[i + 1]);
    }

    UserGroupInformation.setConfiguration(conf);

    TezConfiguration tezConf = new TezConfiguration(conf);
    TestOrderedWordCount instance = new TestOrderedWordCount();

    FileSystem fs = FileSystem.get(conf);

    String stagingDirStr = conf.get(TezConfiguration.TEZ_AM_STAGING_DIR,
            TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT) + Path.SEPARATOR
            + Long.toString(System.currentTimeMillis());
    Path stagingDir = new Path(stagingDirStr);
    FileSystem pathFs = stagingDir.getFileSystem(tezConf);
    pathFs.mkdirs(new Path(stagingDirStr));

    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirStr);
    stagingDir = pathFs.makeQualified(new Path(stagingDirStr));

    TokenCache.obtainTokensForNamenodes(instance.credentials, new Path[] { stagingDir }, conf);
    TezClientUtils.ensureStagingDirExists(tezConf, stagingDir);

    // No need to add jar containing this class as assumed to be part of
    // the tez jars.

    // TEZ-674 Obtain tokens based on the Input / Output paths. For now assuming staging dir
    // is the same filesystem as the one used for Input/Output.

    if (useTezSession) {
        LOG.info("Creating Tez Session");
        tezConf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, true);
    } else {
        tezConf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, false);
    }
    TezClient tezSession = TezClient.create("OrderedWordCountSession", tezConf, null, instance.credentials);
    tezSession.start();

    DAGStatus dagStatus = null;
    DAGClient dagClient = null;
    String[] vNames = { "initialmap", "intermediate_reducer", "finalreduce" };

    Set<StatusGetOpts> statusGetOpts = EnumSet.of(StatusGetOpts.GET_COUNTERS);
    try {
        for (int dagIndex = 1; dagIndex <= inputPaths.size(); ++dagIndex) {
            if (dagIndex != 1 && interJobSleepTimeout > 0) {
                try {
                    LOG.info("Sleeping between jobs, sleepInterval=" + (interJobSleepTimeout / 1000));
                    Thread.sleep(interJobSleepTimeout);
                } catch (InterruptedException e) {
                    LOG.info("Main thread interrupted. Breaking out of job loop");
                    break;
                }
            }

            String inputPath = inputPaths.get(dagIndex - 1);
            String outputPath = outputPaths.get(dagIndex - 1);

            if (fs.exists(new Path(outputPath))) {
                throw new FileAlreadyExistsException("Output directory " + outputPath + " already exists");
            }
            LOG.info("Running OrderedWordCount DAG" + ", dagIndex=" + dagIndex + ", inputPath=" + inputPath
                    + ", outputPath=" + outputPath);

            Map<String, LocalResource> localResources = new TreeMap<String, LocalResource>();

            DAG dag = instance.createDAG(fs, conf, localResources, stagingDir, dagIndex, inputPath, outputPath,
                    generateSplitsInClient, useMRSettings, intermediateNumReduceTasks);

            boolean doPreWarm = dagIndex == 1 && useTezSession && conf.getBoolean("PRE_WARM_SESSION", true);
            int preWarmNumContainers = 0;
            if (doPreWarm) {
                preWarmNumContainers = conf.getInt("PRE_WARM_NUM_CONTAINERS", 0);
                if (preWarmNumContainers <= 0) {
                    doPreWarm = false;
                }
            }
            if (doPreWarm) {
                LOG.info("Pre-warming Session");
                PreWarmVertex preWarmVertex = PreWarmVertex.create("PreWarm", preWarmNumContainers,
                        dag.getVertex("initialmap").getTaskResource());
                preWarmVertex.addTaskLocalFiles(dag.getVertex("initialmap").getTaskLocalFiles());
                preWarmVertex.setTaskEnvironment(dag.getVertex("initialmap").getTaskEnvironment());
                preWarmVertex.setTaskLaunchCmdOpts(dag.getVertex("initialmap").getTaskLaunchCmdOpts());

                tezSession.preWarm(preWarmVertex);
            }

            if (useTezSession) {
                LOG.info("Waiting for TezSession to get into ready state");
                waitForTezSessionReady(tezSession);
                LOG.info("Submitting DAG to Tez Session, dagIndex=" + dagIndex);
                dagClient = tezSession.submitDAG(dag);
                LOG.info("Submitted DAG to Tez Session, dagIndex=" + dagIndex);
            } else {
                LOG.info("Submitting DAG as a new Tez Application");
                dagClient = tezSession.submitDAG(dag);
            }

            while (true) {
                dagStatus = dagClient.getDAGStatus(statusGetOpts);
                if (dagStatus.getState() == DAGStatus.State.RUNNING
                        || dagStatus.getState() == DAGStatus.State.SUCCEEDED
                        || dagStatus.getState() == DAGStatus.State.FAILED
                        || dagStatus.getState() == DAGStatus.State.KILLED
                        || dagStatus.getState() == DAGStatus.State.ERROR) {
                    break;
                }
                try {
                    Thread.sleep(500);
                } catch (InterruptedException e) {
                    // continue;
                }
            }

            while (dagStatus.getState() != DAGStatus.State.SUCCEEDED
                    && dagStatus.getState() != DAGStatus.State.FAILED
                    && dagStatus.getState() != DAGStatus.State.KILLED
                    && dagStatus.getState() != DAGStatus.State.ERROR) {
                if (dagStatus.getState() == DAGStatus.State.RUNNING) {
                    ExampleDriver.printDAGStatus(dagClient, vNames);
                }
                try {
                    try {
                        Thread.sleep(1000);
                    } catch (InterruptedException e) {
                        // continue;
                    }
                    dagStatus = dagClient.getDAGStatus(statusGetOpts);
                } catch (TezException e) {
                    LOG.fatal("Failed to get application progress. Exiting");
                    return -1;
                }
            }
            ExampleDriver.printDAGStatus(dagClient, vNames, true, true);
            LOG.info("DAG " + dagIndex + " completed. " + "FinalState=" + dagStatus.getState());
            if (dagStatus.getState() != DAGStatus.State.SUCCEEDED) {
                LOG.info("DAG " + dagIndex + " diagnostics: " + dagStatus.getDiagnostics());
            }
        }
    } catch (Exception e) {
        LOG.error("Error occurred when submitting/running DAGs", e);
        throw e;
    } finally {
        if (!retainStagingDir) {
            pathFs.delete(stagingDir, true);
        }
        LOG.info("Shutting down session");
        tezSession.stop();
    }

    if (!useTezSession) {
        ExampleDriver.printDAGStatus(dagClient, vNames);
        LOG.info("Application completed. " + "FinalState=" + dagStatus.getState());
    }
    return dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1;
}

From source file:org.apache.tez.mapreduce.examples.UnionExample.java

License:Apache License

public boolean run(String inputPath, String outputPath, Configuration conf) throws Exception {
    System.out.println("Running UnionExample");
    // conf and UGI
    TezConfiguration tezConf;/*from   w  w w . ja  v a  2s .  c  o m*/
    if (conf != null) {
        tezConf = new TezConfiguration(conf);
    } else {
        tezConf = new TezConfiguration();
    }
    UserGroupInformation.setConfiguration(tezConf);
    String user = UserGroupInformation.getCurrentUser().getShortUserName();

    // staging dir
    FileSystem fs = FileSystem.get(tezConf);
    String stagingDirStr = Path.SEPARATOR + "user" + Path.SEPARATOR + user + Path.SEPARATOR + ".staging"
            + Path.SEPARATOR + Path.SEPARATOR + Long.toString(System.currentTimeMillis());
    Path stagingDir = new Path(stagingDirStr);
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirStr);
    stagingDir = fs.makeQualified(stagingDir);

    // No need to add jar containing this class as assumed to be part of
    // the tez jars.

    // TEZ-674 Obtain tokens based on the Input / Output paths. For now assuming staging dir
    // is the same filesystem as the one used for Input/Output.

    TezClient tezSession = TezClient.create("UnionExampleSession", tezConf);
    tezSession.start();

    DAGClient dagClient = null;

    try {
        if (fs.exists(new Path(outputPath))) {
            throw new FileAlreadyExistsException("Output directory " + outputPath + " already exists");
        }

        Map<String, LocalResource> localResources = new TreeMap<String, LocalResource>();

        DAG dag = createDAG(fs, tezConf, localResources, stagingDir, inputPath, outputPath);

        tezSession.waitTillReady();
        dagClient = tezSession.submitDAG(dag);

        // monitoring
        DAGStatus dagStatus = dagClient
                .waitForCompletionWithStatusUpdates(EnumSet.of(StatusGetOpts.GET_COUNTERS));
        if (dagStatus.getState() != DAGStatus.State.SUCCEEDED) {
            System.out.println("DAG diagnostics: " + dagStatus.getDiagnostics());
            return false;
        }
        return true;
    } finally {
        fs.delete(stagingDir, true);
        tezSession.stop();
    }
}

From source file:org.archive.nutchwax.IndexMerger.java

License:Apache License

/**
 * Merge all input indexes to the single output index
 *///from  w ww .j a  va 2 s .c  om
public void merge(IndexReader[] readers, Path outputIndex, Path localWorkingDir, boolean parallel)
        throws IOException {
    LOG.info("merging indexes to: " + outputIndex);

    FileSystem localFs = FileSystem.getLocal(getConf());
    if (localFs.exists(localWorkingDir)) {
        localFs.delete(localWorkingDir, true);
    }
    localFs.mkdirs(localWorkingDir);

    // Get local output target
    //
    FileSystem fs = FileSystem.get(getConf());
    if (fs.exists(outputIndex)) {
        throw new FileAlreadyExistsException("Output directory " + outputIndex + " already exists!");
    }

    Path tmpLocalOutput = new Path(localWorkingDir, "merge-output");
    Path localOutput = fs.startLocalOutput(outputIndex, tmpLocalOutput);

    //
    // Merge indices
    //
    IndexWriter writer = new IndexWriter(localOutput.toString(), null, true);
    writer.setMergeFactor(getConf().getInt("indexer.mergeFactor", IndexWriter.DEFAULT_MERGE_FACTOR));
    writer.setMaxBufferedDocs(getConf().getInt("indexer.minMergeDocs", IndexWriter.DEFAULT_MAX_BUFFERED_DOCS));
    writer.setMaxMergeDocs(getConf().getInt("indexer.maxMergeDocs", IndexWriter.DEFAULT_MAX_MERGE_DOCS));
    writer.setTermIndexInterval(
            getConf().getInt("indexer.termIndexInterval", IndexWriter.DEFAULT_TERM_INDEX_INTERVAL));
    writer.setInfoStream(LogUtil.getDebugStream(LOG));
    writer.setUseCompoundFile(false);
    writer.setSimilarity(new NutchSimilarity());
    writer.addIndexes(readers);
    writer.close();

    //
    // Put target back
    //
    fs.completeLocalOutput(outputIndex, tmpLocalOutput);
    LOG.info("done merging");
}