Example usage for org.apache.hadoop.fs FileSystem mkdirs

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem mkdirs.

Prototype

public boolean mkdirs(Path f) throws IOException

Source Link

Document

Call #mkdirs(Path,FsPermission) with default permission.

Usage

From source file:com.linkedin.whiteelephant.mapreduce.lib.job.StagedOutputJob.java

License:Apache License

@Override
public boolean waitForCompletion(boolean verbose)
        throws IOException, InterruptedException, ClassNotFoundException {
    final Path actualOutputPath = FileOutputFormat.getOutputPath(this);
    final Path stagedPath = new Path(String.format("%s/%s/staged", stagingPrefix, System.currentTimeMillis()));

    FileOutputFormat.setOutputPath(this, stagedPath);

    final Thread hook = new Thread(new Runnable() {
        @Override//from  w  ww  .  j a  v a2s . c  om
        public void run() {
            try {
                killJob();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    });

    Runtime.getRuntime().addShutdownHook(hook);

    final boolean retVal = super.waitForCompletion(verbose);
    Runtime.getRuntime().removeShutdownHook(hook);

    if (retVal) {
        FileSystem fs = actualOutputPath.getFileSystem(getConfiguration());

        fs.mkdirs(actualOutputPath);

        log.info(String.format("Deleting data at old path[%s]", actualOutputPath));
        fs.delete(actualOutputPath, true);

        log.info(String.format("Moving from staged path[%s] to final resting place[%s]", stagedPath,
                actualOutputPath));
        return fs.rename(stagedPath, actualOutputPath);
    }

    log.warn("retVal was false for some reason...");
    return retVal;
}

From source file:com.liveramp.cascading_ext.FileSystemHelper.java

License:Apache License

/**
 * Safely mkdirs a directory by retrying the operation <code>numTries</code> times and sleeping <code>delayBetweenTries</code> milliseconds between each
 * try. If it still fails, it throws an IOException.
 *
 * @param fs                the filesystem object
 * @param dir               the directory to be created
 * @param numTries          number of tries to attempt the operation
 * @param delayBetweenTries the sleep delta between tries in millis
 * @throws IOException//w  w  w .j  a va  2s  . co  m
 */
public static void safeMkdirs(FileSystem fs, Path dir, int numTries, long delayBetweenTries)
        throws IOException {
    while (numTries-- > 0) {
        if (fs.mkdirs(dir)) {
            return;
        } else {
            try {
                Thread.sleep(delayBetweenTries);
            } catch (InterruptedException ie) {
                throw new RuntimeException(ie);
            }
        }
    }

    throw new IOException("Could not mkdirs the directory \"" + dir + "\"!");
}

From source file:com.liveramp.hank.hadoop.DomainBuilderAbstractOutputFormat.java

License:Apache License

public static void moveContentsAndDelete(Path srcDir, Path dstDir, FileSystem fs, Logger logger)
        throws IOException {
    if (!fs.exists(srcDir)) {
        return;//from   www.j a va 2 s .  co m
    }
    if (fs.exists(srcDir) && !fs.isDirectory(srcDir)) {
        throw new IllegalArgumentException(srcDir + " is not a directory");
    }
    if (fs.exists(dstDir) && !fs.isDirectory(dstDir)) {
        throw new IllegalArgumentException(dstDir + " is not a directory");
    }
    if (logger.isDebugEnabled()) {
        logger.debug("Moving contents of: " + srcDir + " to: " + dstDir);
    }
    FileStatus[] files = fs.listStatus(srcDir);
    for (FileStatus file : files) {
        Path sourcePath = file.getPath();
        Path targetPath = new Path(dstDir, file.getPath().getName());
        if (logger.isDebugEnabled()) {
            logger.debug("Moving: " + sourcePath + " to: " + targetPath);
        }
        if (!fs.mkdirs(targetPath.getParent())) {
            throw new IOException("Failed at creating directory " + targetPath.getParent());
        }
        if (!fs.rename(sourcePath, targetPath)) {
            throw new IOException("Failed at renaming " + sourcePath + " to " + targetPath);
        }
    }
    fs.delete(srcDir);
}

From source file:com.liveramp.hank.hadoop.DomainBuilderOutputCommitter.java

License:Apache License

public static void commitJob(String domainName, JobConf conf) throws IOException {
    Path outputPath = new Path(DomainBuilderProperties.getOutputPath(domainName, conf));
    Path tmpOutputPath = new Path(DomainBuilderProperties.getTmpOutputPath(domainName, conf));
    FileSystem fs = outputPath.getFileSystem(conf);

    // Create outputPath
    fs.mkdirs(outputPath);

    // Move temporary output to final output
    LOG.info("Moving temporary output files from: " + tmpOutputPath + " to final output path: " + outputPath);

    /* Current multithreading handles each partition separately.
     * Could use a higher level of granularity and have each file copying
     * performed as a separate Runnable.
     *///  w  w w  .  ja  v  a2 s .co  m
    final ExecutorService executor = Executors.newFixedThreadPool(N_THREADS);
    Set<Integer> copiedPartitions = new HashSet<Integer>();
    final List<MoveContentsAndDeleteTask> tasks = new ArrayList<MoveContentsAndDeleteTask>();

    // Copy complete partitions
    copyPartitionsFrom(tmpOutputPath, fs, copiedPartitions, tasks, executor, outputPath);

    // Copy missing partitions from the empty partitions directory
    Path emptyPartitionsPath = new Path(tmpOutputPath, DomainBuilderAbstractOutputFormat.EMPTY_PARTITIONS_DIR);
    if (fs.exists(emptyPartitionsPath)) {
        copyPartitionsFrom(emptyPartitionsPath, fs, copiedPartitions, tasks, executor, outputPath);
    }

    executor.shutdown();

    try {
        boolean allCopiersFinished = false;
        while (!allCopiersFinished) {
            allCopiersFinished = executor.awaitTermination(WAIT_CYCLE_SECONDS, TimeUnit.SECONDS);
        }
    } catch (InterruptedException e) {
        throw new IOException("Executor interrupted", e);
    }

    for (MoveContentsAndDeleteTask task : tasks) {
        if (task.exception != null) {
            throw new IOException("Partition copying failed for " + task.srcDir, task.exception);
        }
    }

    // Finally, cleanup
    cleanupJob(domainName, conf);
}

From source file:com.moz.fiji.mapreduce.util.SerializeLoggerAspect.java

License:Apache License

/**
 * Logic to serialize collected profiling content to a file on HDFS. The files are stored
 * in the current working directory for this context, in a folder specified by STATS_DIR. The per
 * task file is named by the task attempt id.
 * We obtain the profiling stats collected by the LogTimerAspect in FijiSchema. The format of the
 * file is as follows: Job Name, Job ID, Task Attempt, Function Signature,
 * Aggregate Time (nanoseconds), Number of Invocations, Time per call (nanoseconds)'\n'
 *
 * @param context The {@link TaskInputOutputContext} for this job.
 * @throws IOException If the writes to HDFS fail.
 *///ww w.ja  v  a  2 s  .  c  om
private void serializeToFile(TaskInputOutputContext context) throws IOException {
    Path parentPath = new Path(context.getWorkingDirectory(), STATS_DIR);
    FileSystem fs = parentPath.getFileSystem(context.getConfiguration());
    fs.mkdirs(parentPath);
    Path path = new Path(parentPath, context.getTaskAttemptID().toString());
    OutputStreamWriter out = new OutputStreamWriter(fs.create(path, true), "UTF-8");
    try {
        out.write("Job Name, Job ID, Task Attempt, Function Signature, Aggregate Time (nanoseconds), "
                + "Number of Invocations, Time per call (nanoseconds)\n");

        ConcurrentHashMap<String, LoggingInfo> signatureTimeMap = mLogTimerAspect.getSignatureTimeMap();
        for (Map.Entry<String, LoggingInfo> entrySet : signatureTimeMap.entrySet()) {
            writeProfileInformation(out, context, entrySet.getKey(), entrySet.getValue());
        }

        signatureTimeMap = mMRLogTimerAspect.getSignatureTimeMap();
        for (Map.Entry<String, LoggingInfo> entrySet : signatureTimeMap.entrySet()) {
            writeProfileInformation(out, context, entrySet.getKey(), entrySet.getValue());
        }
    } finally {
        out.close();
    }
}

From source file:com.mvad.flink.demo.streaming.lib.sink.bucketing.BucketingSink.java

License:Apache License

/**
 * Opens a new part file.//  w  ww .  jav a2s  .  c o m
 *
 * <p>
 * This closes the old bucket file and retrieves a new bucket path from the {@code Bucketer}.
 */
private void openNewPartFile(Path bucketPath, BucketState<T> bucketState) throws Exception {
    closeCurrentPartFile(bucketState);

    FileSystem fs = new Path(basePath).getFileSystem(hadoopConf);

    if (!fs.exists(bucketPath)) {
        try {
            if (fs.mkdirs(bucketPath)) {
                LOG.debug("Created new bucket directory: {}", bucketPath);
            }
        } catch (IOException e) {
            throw new RuntimeException("Could not create new bucket path.", e);
        }
    }

    Path partPath = new Path(bucketPath,
            partPrefix + "-" + subtaskIndex + "-" + bucketState.partCounter + partSuffix);

    // This should work since there is only one parallel subtask that tries names with
    // our subtask id. Otherwise we would run into concurrency issues here.
    while (fs.exists(partPath) || fs
            .exists(new Path(partPath.getParent(), pendingPrefix + partPath.getName()).suffix(pendingSuffix))) {
        bucketState.partCounter++;
        partPath = new Path(bucketPath,
                partPrefix + "-" + subtaskIndex + "-" + bucketState.partCounter + partSuffix);
    }

    // increase, so we don't have to check for this name next time
    bucketState.partCounter++;

    LOG.debug("Next part path is {}", partPath.toString());
    bucketState.currentFile = partPath.toString();

    Path inProgressPath = new Path(partPath.getParent(), inProgressPrefix + partPath.getName())
            .suffix(inProgressSuffix);

    // If we don't already have a writer for this bucket, create one
    if (bucketState.writer == null) {
        bucketState.writer = writerTemplate.duplicate();
    }

    bucketState.writer.open(fs, inProgressPath);
    bucketState.isWriterOpen = true;
}

From source file:com.mvdb.platform.action.VersionMerge.java

License:Apache License

/**           
 * @param hdfsFileSystem/*from w  w  w.  j  a  v a  2  s .  c  o m*/
 * @param topPath
 * @return
 * @throws IOException
 */

private static Path[] getInputPaths(FileSystem hdfsFileSystem, Path topPath, String lastMergedDirName,
        String lastcopiedDirName, Path passiveDbPathT) throws IOException {
    Path passiveDbPath = new Path(topPath, "db/mv1");
    if (hdfsFileSystem.exists(passiveDbPath) == false) {
        hdfsFileSystem.mkdirs(passiveDbPath);
    }
    List<Path> pathList = new ArrayList<Path>();
    buildInputPathList(hdfsFileSystem, topPath, pathList, lastMergedDirName, lastcopiedDirName);
    pathList.add(passiveDbPath);
    Path[] inputPaths = pathList.toArray(new Path[0]);
    return inputPaths;
}

From source file:com.mycompany.app.TestStagingDirectoryPermissions.java

License:Apache License

@Test
public void perms() throws IOException, InterruptedException {
    MiniDFSCluster minidfs = null;//  ww w .  j av  a  2s  . co  m
    FileSystem fs = null;
    MiniMRClientCluster minimr = null;
    try {
        Configuration conf = new Configuration(true);
        conf.set("fs.permission.umask-mode", "0077");
        minidfs = new MiniDFSCluster.Builder(conf).build();
        minidfs.waitActive();

        fs = minidfs.getFileSystem();
        conf.set(FileSystem.FS_DEFAULT_NAME_KEY, fs.getUri().toString());
        Path p = path("/in");
        fs.mkdirs(p);

        FSDataOutputStream os = fs.create(new Path(p, "input.txt"));
        os.write("hello!".getBytes("UTF-8"));
        os.close();

        String user = UserGroupInformation.getCurrentUser().getUserName();
        Path home = new Path("/User/" + user);
        fs.mkdirs(home);
        minimr = MiniMRClientClusterFactory.create(this.getClass(), 1, conf);
        JobConf job = new JobConf(minimr.getConfig());

        job.setJobName("PermsTest");
        JobClient client = new JobClient(job);
        FileInputFormat.addInputPath(job, p);
        FileOutputFormat.setOutputPath(job, path("/out"));
        job.setInputFormat(TextInputFormat.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.setMapperClass(MySleepMapper.class);

        job.setNumReduceTasks(1);
        RunningJob submittedJob = client.submitJob(job);

        // Sleep for a bit to let localization finish
        System.out.println("Sleeping...");
        Thread.sleep(3 * 1000l);
        System.out.println("Done sleeping...");
        assertFalse(UserGroupInformation.isSecurityEnabled());

        Path stagingRoot = path("/tmp/hadoop-yarn/staging/" + user + "/.staging/");
        assertTrue(fs.exists(stagingRoot));
        assertEquals(1, fs.listStatus(stagingRoot).length);
        Path staging = fs.listStatus(stagingRoot)[0].getPath();
        Path jobXml = path(staging + "/job.xml");

        assertTrue(fs.exists(jobXml));

        FileStatus fileStatus = fs.getFileStatus(jobXml);
        System.out.println("job.xml permission = " + fileStatus.getPermission());
        assertTrue(fileStatus.getPermission().getOtherAction().implies(FsAction.READ));
        assertTrue(fileStatus.getPermission().getGroupAction().implies(FsAction.READ));

        submittedJob.waitForCompletion();
    } finally {
        if (minimr != null) {
            minimr.stop();
        }
        if (fs != null) {
            fs.close();
        }
        if (minidfs != null) {
            minidfs.shutdown(true);
        }
    }
}

From source file:com.mycompany.mavenpails2.PailMove.java

public static void setApplicationConf() throws IOException {
    Map conf = new HashMap();
    String sers = "backtype.hadoop.ThriftSerialization,org.apache.hadoop.io.serializer.WritableSerialization";
    conf.put("io.serializations", sers);
    Api.setApplicationConf(conf);//w ww  .  j  av  a 2  s.  c o m

    FileSystem fs = FileSystem.get(new Configuration());
    fs.delete(new Path(TEMP_DIR), true);
    fs.mkdirs(new Path(TEMP_DIR));
    /* Configuration conf2 = new Configuration();
    FileSystem fs = FileSystem.get(conf2);
    fs.delete(new Path(TEMP_DIR), true);
    fs.mkdirs(new Path(TEMP_DIR)); */
}

From source file:com.nearinfinity.blur.utils.BlurUtil.java

License:Apache License

public static boolean createPath(FileSystem fileSystem, Path path) throws IOException {
    if (!fileSystem.exists(path)) {
        LOG.info("Path [{0}] does not exist, creating.", path);
        fileSystem.mkdirs(path);
        return false;
    }/*from  w  w  w  .j  av a2  s .  c o  m*/
    return true;
}