List of usage examples for org.apache.hadoop.mapred FileAlreadyExistsException FileAlreadyExistsException
public FileAlreadyExistsException(String msg)
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputCommitter.java
License:Apache License
@Override public void commitJob(JobContext context) throws IOException { Configuration configuration = context.getConfiguration(); MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(configuration); BasicMapReduceTaskContext taskContext = classLoader.getTaskContextProvider().get(this.taskContext); String outputDatasetName = configuration.get(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET); PartitionedFileSet outputDataset = taskContext.getDataset(outputDatasetName); Partitioning partitioning = outputDataset.getPartitioning(); Set<PartitionKey> partitionsToAdd = new HashSet<>(); Set<String> relativePaths = new HashSet<>(); // Go over all files in the temporary directory and keep track of partitions to add for them FileStatus[] allCommittedTaskPaths = getAllCommittedTaskPaths(context); for (FileStatus committedTaskPath : allCommittedTaskPaths) { FileSystem fs = committedTaskPath.getPath().getFileSystem(configuration); RemoteIterator<LocatedFileStatus> fileIter = fs.listFiles(committedTaskPath.getPath(), true); while (fileIter.hasNext()) { Path path = fileIter.next().getPath(); String relativePath = getRelative(committedTaskPath.getPath(), path); int lastPathSepIdx = relativePath.lastIndexOf(Path.SEPARATOR); if (lastPathSepIdx == -1) { // this shouldn't happen because each relative path should consist of at least one partition key and // the output file name LOG.warn("Skipping path '{}'. It's relative path '{}' has fewer than two parts", path, relativePath);//from w w w.j a v a2 s .c o m continue; } // relativePath = "../key1/key2/part-m-00000" // relativeDir = "../key1/key2" // fileName = "part-m-00000" String relativeDir = relativePath.substring(0, lastPathSepIdx); String fileName = relativePath.substring(lastPathSepIdx + 1); Path finalDir = new Path(FileOutputFormat.getOutputPath(context), relativeDir); Path finalPath = new Path(finalDir, fileName); if (fs.exists(finalPath)) { throw new FileAlreadyExistsException("Final output path " + finalPath + " already exists"); } PartitionKey partitionKey = getPartitionKey(partitioning, relativeDir); partitionsToAdd.add(partitionKey); relativePaths.add(relativeDir); } } // We need to copy to the parent of the FileOutputFormat's outputDir, since we added a _temporary_jobId suffix to // the original outputDir. Path finalOutput = FileOutputFormat.getOutputPath(context); FileSystem fs = finalOutput.getFileSystem(configuration); for (FileStatus stat : getAllCommittedTaskPaths(context)) { mergePaths(fs, stat, finalOutput); } // compute the metadata to be written to every output partition Map<String, String> metadata = ConfigurationUtil.getNamedConfigurations(this.taskContext.getConfiguration(), PartitionedFileSetArguments.OUTPUT_PARTITION_METADATA_PREFIX); // create all the necessary partitions for (PartitionKey partitionKey : partitionsToAdd) { PartitionOutput partitionOutput = outputDataset.getPartitionOutput(partitionKey); partitionOutput.setMetadata(metadata); partitionOutput.addPartition(); } // close the TaskContext, which flushes dataset operations try { taskContext.flushOperations(); } catch (Exception e) { Throwables.propagateIfPossible(e, IOException.class); throw new IOException(e); } // delete the job-specific _temporary folder and create a _done file in the o/p folder cleanupJob(context); // mark all the final output paths with a _SUCCESS file, if configured to do so (default = true) if (configuration.getBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, true)) { for (String relativePath : relativePaths) { Path pathToMark = new Path(finalOutput, relativePath); Path markerPath = new Path(pathToMark, SUCCEEDED_FILE_NAME); fs.createNewFile(markerPath); } } }
From source file:com.david.mos.out.FileOutputFormat.java
License:Apache License
public void checkOutputSpecs(JobContext job) throws FileAlreadyExistsException, IOException { // Ensure that the output directory is set and not already there Path outDir = getOutputPath(job); if (outDir == null) { throw new InvalidJobConfException("Output directory not set."); }/*from ww w . j a v a2 s . c o m*/ // get delegation token for outDir's file system TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { outDir }, job.getConfiguration()); if (outDir.getFileSystem(job.getConfiguration()).exists(outDir)) { throw new FileAlreadyExistsException("Output directory " + outDir + " already exists"); } }
From source file:com.rapleaf.ramhdfs.RamFileSystem.java
License:Apache License
@Override public boolean mkdirs(Path f) throws IOException { if (f == null) { throw new IllegalArgumentException("mkdirs path arg is null"); }/* w w w . j a v a 2 s . c om*/ Path parent = f.getParent(); FileObject p2f = pathToFileObject(f); if (isDirectory(p2f)) { return true; } if (parent != null) { FileObject parent2f = pathToFileObject(parent); if (parent2f != null && parent2f.exists() && !isDirectory(parent2f)) { throw new FileAlreadyExistsException("Parent path is not a directory: " + parent); } } return (parent == null || mkdirs(parent)) && (createDirectory(p2f) || isDirectory(p2f)); }
From source file:info.halo9pan.word2vec.hadoop.mr.SortOutputFormat.java
License:Apache License
@Override public void checkOutputSpecs(JobContext job) throws InvalidJobConfException, IOException { // Ensure that the output directory is set Path outDir = getOutputPath(job); if (outDir == null) { throw new InvalidJobConfException("Output directory not set in JobConf."); }//from www . j a va 2 s .c om final Configuration jobConf = job.getConfiguration(); // get delegation token for outDir's file system TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { outDir }, jobConf); final FileSystem fs = outDir.getFileSystem(jobConf); if (fs.exists(outDir)) { // existing output dir is considered empty iff its only content is the // partition file. // final FileStatus[] outDirKids = fs.listStatus(outDir); boolean empty = false; if (outDirKids != null && outDirKids.length == 1) { final FileStatus st = outDirKids[0]; final String fname = st.getPath().getName(); empty = !st.isDirectory() && SortInputFormat.PARTITION_FILENAME.equals(fname); } if (WordSort.getUseSimplePartitioner(job) || !empty) { throw new FileAlreadyExistsException("Output directory " + outDir + " already exists"); } } }
From source file:info.halo9pan.word2vec.hadoop.terasort.TeraOutputFormat.java
License:Apache License
@Override public void checkOutputSpecs(JobContext job) throws InvalidJobConfException, IOException { // Ensure that the output directory is set Path outDir = getOutputPath(job); if (outDir == null) { throw new InvalidJobConfException("Output directory not set in JobConf."); }//from w ww .j av a 2s. com final Configuration jobConf = job.getConfiguration(); // get delegation token for outDir's file system TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { outDir }, jobConf); final FileSystem fs = outDir.getFileSystem(jobConf); if (fs.exists(outDir)) { // existing output dir is considered empty iff its only content is the // partition file. // final FileStatus[] outDirKids = fs.listStatus(outDir); boolean empty = false; if (outDirKids != null && outDirKids.length == 1) { final FileStatus st = outDirKids[0]; final String fname = st.getPath().getName(); empty = !st.isDirectory() && TeraInputFormat.PARTITION_FILENAME.equals(fname); } if (TeraSort.getUseSimplePartitioner(job) || !empty) { throw new FileAlreadyExistsException("Output directory " + outDir + " already exists"); } } }
From source file:org.apache.hadoop.examples.terasort.TeraOutputFormat.java
License:Apache License
@Override public void checkOutputSpecs(JobContext job) throws InvalidJobConfException, IOException { // Ensure that the output directory is set Path outDir = getOutputPath(job); if (outDir == null) { throw new InvalidJobConfException("Output directory not set in JobConf."); }//from w w w . j av a2s. co m final Configuration jobConf = job.getConfiguration(); // get delegation token for outDir's file system TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { outDir }, jobConf); final FileSystem fs = outDir.getFileSystem(jobConf); try { // existing output dir is considered empty iff its only content is the // partition file. // final FileStatus[] outDirKids = fs.listStatus(outDir); boolean empty = false; if (outDirKids != null && outDirKids.length == 1) { final FileStatus st = outDirKids[0]; final String fname = st.getPath().getName(); empty = !st.isDirectory() && TeraInputFormat.PARTITION_FILENAME.equals(fname); } if (TeraSort.getUseSimplePartitioner(job) || !empty) { throw new FileAlreadyExistsException("Output directory " + outDir + " already exists"); } } catch (FileNotFoundException ignored) { } }
From source file:org.apache.hama.bsp.FileOutputFormat.java
License:Apache License
@Override public void checkOutputSpecs(FileSystem ignored, BSPJob job) throws FileAlreadyExistsException, InvalidJobConfException, IOException { // Ensure that the output directory is set and not already there Path outDir = getOutputPath(job); if (outDir == null && job.getNumBspTask() != 0) { throw new InvalidJobConfException("Output directory not set in JobConf."); }//from w w w . j ava 2 s .c o m if (outDir != null) { FileSystem fs = outDir.getFileSystem(job.getConfiguration()); // normalize the output directory outDir = fs.makeQualified(outDir); setOutputPath(job, outDir); // check its existence if (fs.exists(outDir)) { throw new FileAlreadyExistsException("Output directory " + outDir + " already exists"); } } }
From source file:org.apache.tez.mapreduce.examples.TestOrderedWordCount.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); boolean generateSplitsInClient; SplitsInClientOptionParser splitCmdLineParser = new SplitsInClientOptionParser(); try {/*from w ww .j a v a 2 s . com*/ generateSplitsInClient = splitCmdLineParser.parse(otherArgs, false); otherArgs = splitCmdLineParser.getRemainingArgs(); } catch (ParseException e1) { System.err.println("Invalid options"); printUsage(); return 2; } boolean useTezSession = conf.getBoolean("USE_TEZ_SESSION", true); long interJobSleepTimeout = conf.getInt("INTER_JOB_SLEEP_INTERVAL", 0) * 1000; boolean retainStagingDir = conf.getBoolean("RETAIN_STAGING_DIR", false); boolean useMRSettings = conf.getBoolean("USE_MR_CONFIGS", true); // TODO needs to use auto reduce parallelism int intermediateNumReduceTasks = conf.getInt("IREDUCE_NUM_TASKS", 2); if (((otherArgs.length % 2) != 0) || (!useTezSession && otherArgs.length != 2)) { printUsage(); return 2; } List<String> inputPaths = new ArrayList<String>(); List<String> outputPaths = new ArrayList<String>(); for (int i = 0; i < otherArgs.length; i += 2) { inputPaths.add(otherArgs[i]); outputPaths.add(otherArgs[i + 1]); } UserGroupInformation.setConfiguration(conf); TezConfiguration tezConf = new TezConfiguration(conf); TestOrderedWordCount instance = new TestOrderedWordCount(); FileSystem fs = FileSystem.get(conf); String stagingDirStr = conf.get(TezConfiguration.TEZ_AM_STAGING_DIR, TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT) + Path.SEPARATOR + Long.toString(System.currentTimeMillis()); Path stagingDir = new Path(stagingDirStr); FileSystem pathFs = stagingDir.getFileSystem(tezConf); pathFs.mkdirs(new Path(stagingDirStr)); tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirStr); stagingDir = pathFs.makeQualified(new Path(stagingDirStr)); TokenCache.obtainTokensForNamenodes(instance.credentials, new Path[] { stagingDir }, conf); TezClientUtils.ensureStagingDirExists(tezConf, stagingDir); // No need to add jar containing this class as assumed to be part of // the tez jars. // TEZ-674 Obtain tokens based on the Input / Output paths. For now assuming staging dir // is the same filesystem as the one used for Input/Output. if (useTezSession) { LOG.info("Creating Tez Session"); tezConf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, true); } else { tezConf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, false); } TezClient tezSession = TezClient.create("OrderedWordCountSession", tezConf, null, instance.credentials); tezSession.start(); DAGStatus dagStatus = null; DAGClient dagClient = null; String[] vNames = { "initialmap", "intermediate_reducer", "finalreduce" }; Set<StatusGetOpts> statusGetOpts = EnumSet.of(StatusGetOpts.GET_COUNTERS); try { for (int dagIndex = 1; dagIndex <= inputPaths.size(); ++dagIndex) { if (dagIndex != 1 && interJobSleepTimeout > 0) { try { LOG.info("Sleeping between jobs, sleepInterval=" + (interJobSleepTimeout / 1000)); Thread.sleep(interJobSleepTimeout); } catch (InterruptedException e) { LOG.info("Main thread interrupted. Breaking out of job loop"); break; } } String inputPath = inputPaths.get(dagIndex - 1); String outputPath = outputPaths.get(dagIndex - 1); if (fs.exists(new Path(outputPath))) { throw new FileAlreadyExistsException("Output directory " + outputPath + " already exists"); } LOG.info("Running OrderedWordCount DAG" + ", dagIndex=" + dagIndex + ", inputPath=" + inputPath + ", outputPath=" + outputPath); Map<String, LocalResource> localResources = new TreeMap<String, LocalResource>(); DAG dag = instance.createDAG(fs, conf, localResources, stagingDir, dagIndex, inputPath, outputPath, generateSplitsInClient, useMRSettings, intermediateNumReduceTasks); boolean doPreWarm = dagIndex == 1 && useTezSession && conf.getBoolean("PRE_WARM_SESSION", true); int preWarmNumContainers = 0; if (doPreWarm) { preWarmNumContainers = conf.getInt("PRE_WARM_NUM_CONTAINERS", 0); if (preWarmNumContainers <= 0) { doPreWarm = false; } } if (doPreWarm) { LOG.info("Pre-warming Session"); PreWarmVertex preWarmVertex = PreWarmVertex.create("PreWarm", preWarmNumContainers, dag.getVertex("initialmap").getTaskResource()); preWarmVertex.addTaskLocalFiles(dag.getVertex("initialmap").getTaskLocalFiles()); preWarmVertex.setTaskEnvironment(dag.getVertex("initialmap").getTaskEnvironment()); preWarmVertex.setTaskLaunchCmdOpts(dag.getVertex("initialmap").getTaskLaunchCmdOpts()); tezSession.preWarm(preWarmVertex); } if (useTezSession) { LOG.info("Waiting for TezSession to get into ready state"); waitForTezSessionReady(tezSession); LOG.info("Submitting DAG to Tez Session, dagIndex=" + dagIndex); dagClient = tezSession.submitDAG(dag); LOG.info("Submitted DAG to Tez Session, dagIndex=" + dagIndex); } else { LOG.info("Submitting DAG as a new Tez Application"); dagClient = tezSession.submitDAG(dag); } while (true) { dagStatus = dagClient.getDAGStatus(statusGetOpts); if (dagStatus.getState() == DAGStatus.State.RUNNING || dagStatus.getState() == DAGStatus.State.SUCCEEDED || dagStatus.getState() == DAGStatus.State.FAILED || dagStatus.getState() == DAGStatus.State.KILLED || dagStatus.getState() == DAGStatus.State.ERROR) { break; } try { Thread.sleep(500); } catch (InterruptedException e) { // continue; } } while (dagStatus.getState() != DAGStatus.State.SUCCEEDED && dagStatus.getState() != DAGStatus.State.FAILED && dagStatus.getState() != DAGStatus.State.KILLED && dagStatus.getState() != DAGStatus.State.ERROR) { if (dagStatus.getState() == DAGStatus.State.RUNNING) { ExampleDriver.printDAGStatus(dagClient, vNames); } try { try { Thread.sleep(1000); } catch (InterruptedException e) { // continue; } dagStatus = dagClient.getDAGStatus(statusGetOpts); } catch (TezException e) { LOG.fatal("Failed to get application progress. Exiting"); return -1; } } ExampleDriver.printDAGStatus(dagClient, vNames, true, true); LOG.info("DAG " + dagIndex + " completed. " + "FinalState=" + dagStatus.getState()); if (dagStatus.getState() != DAGStatus.State.SUCCEEDED) { LOG.info("DAG " + dagIndex + " diagnostics: " + dagStatus.getDiagnostics()); } } } catch (Exception e) { LOG.error("Error occurred when submitting/running DAGs", e); throw e; } finally { if (!retainStagingDir) { pathFs.delete(stagingDir, true); } LOG.info("Shutting down session"); tezSession.stop(); } if (!useTezSession) { ExampleDriver.printDAGStatus(dagClient, vNames); LOG.info("Application completed. " + "FinalState=" + dagStatus.getState()); } return dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1; }
From source file:org.apache.tez.mapreduce.examples.UnionExample.java
License:Apache License
public boolean run(String inputPath, String outputPath, Configuration conf) throws Exception { System.out.println("Running UnionExample"); // conf and UGI TezConfiguration tezConf;/*from w w w . ja v a 2s . c o m*/ if (conf != null) { tezConf = new TezConfiguration(conf); } else { tezConf = new TezConfiguration(); } UserGroupInformation.setConfiguration(tezConf); String user = UserGroupInformation.getCurrentUser().getShortUserName(); // staging dir FileSystem fs = FileSystem.get(tezConf); String stagingDirStr = Path.SEPARATOR + "user" + Path.SEPARATOR + user + Path.SEPARATOR + ".staging" + Path.SEPARATOR + Path.SEPARATOR + Long.toString(System.currentTimeMillis()); Path stagingDir = new Path(stagingDirStr); tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDirStr); stagingDir = fs.makeQualified(stagingDir); // No need to add jar containing this class as assumed to be part of // the tez jars. // TEZ-674 Obtain tokens based on the Input / Output paths. For now assuming staging dir // is the same filesystem as the one used for Input/Output. TezClient tezSession = TezClient.create("UnionExampleSession", tezConf); tezSession.start(); DAGClient dagClient = null; try { if (fs.exists(new Path(outputPath))) { throw new FileAlreadyExistsException("Output directory " + outputPath + " already exists"); } Map<String, LocalResource> localResources = new TreeMap<String, LocalResource>(); DAG dag = createDAG(fs, tezConf, localResources, stagingDir, inputPath, outputPath); tezSession.waitTillReady(); dagClient = tezSession.submitDAG(dag); // monitoring DAGStatus dagStatus = dagClient .waitForCompletionWithStatusUpdates(EnumSet.of(StatusGetOpts.GET_COUNTERS)); if (dagStatus.getState() != DAGStatus.State.SUCCEEDED) { System.out.println("DAG diagnostics: " + dagStatus.getDiagnostics()); return false; } return true; } finally { fs.delete(stagingDir, true); tezSession.stop(); } }
From source file:org.archive.nutchwax.IndexMerger.java
License:Apache License
/** * Merge all input indexes to the single output index *///from w ww .j a va 2 s .c om public void merge(IndexReader[] readers, Path outputIndex, Path localWorkingDir, boolean parallel) throws IOException { LOG.info("merging indexes to: " + outputIndex); FileSystem localFs = FileSystem.getLocal(getConf()); if (localFs.exists(localWorkingDir)) { localFs.delete(localWorkingDir, true); } localFs.mkdirs(localWorkingDir); // Get local output target // FileSystem fs = FileSystem.get(getConf()); if (fs.exists(outputIndex)) { throw new FileAlreadyExistsException("Output directory " + outputIndex + " already exists!"); } Path tmpLocalOutput = new Path(localWorkingDir, "merge-output"); Path localOutput = fs.startLocalOutput(outputIndex, tmpLocalOutput); // // Merge indices // IndexWriter writer = new IndexWriter(localOutput.toString(), null, true); writer.setMergeFactor(getConf().getInt("indexer.mergeFactor", IndexWriter.DEFAULT_MERGE_FACTOR)); writer.setMaxBufferedDocs(getConf().getInt("indexer.minMergeDocs", IndexWriter.DEFAULT_MAX_BUFFERED_DOCS)); writer.setMaxMergeDocs(getConf().getInt("indexer.maxMergeDocs", IndexWriter.DEFAULT_MAX_MERGE_DOCS)); writer.setTermIndexInterval( getConf().getInt("indexer.termIndexInterval", IndexWriter.DEFAULT_TERM_INDEX_INTERVAL)); writer.setInfoStream(LogUtil.getDebugStream(LOG)); writer.setUseCompoundFile(false); writer.setSimilarity(new NutchSimilarity()); writer.addIndexes(readers); writer.close(); // // Put target back // fs.completeLocalOutput(outputIndex, tmpLocalOutput); LOG.info("done merging"); }