Example usage for org.apache.hadoop.fs FileSystem mkdirs

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem mkdirs.

Prototype

public boolean mkdirs(Path f) throws IOException

Source Link

Document

Call #mkdirs(Path,FsPermission) with default permission.

Usage

From source file:com.rockstor.compact.PathUtil.java

License:Apache License

private void checkAndMakeDir(FileSystem dfs, String dir) throws IOException {
    Path path = new Path(dir);
    if (!dfs.exists(path)) {
        dfs.mkdirs(path);
    }//w  ww .j  a  v a  2s. c  o m
}

From source file:com.rockstor.tools.RockStorFsFormat.java

License:Apache License

protected void initDfs() throws IOException {
    RockAccessor.connectHDFS();//  w ww.  jav a 2 s  .  co m
    String rootDir = conf.get("rockstor.rootdir");
    LOG.info("connect to hdfs ok!");
    FileSystem dfs = RockAccessor.getFileSystem();
    dfs.mkdirs(new Path(rootDir));

    rootDir = conf.get("rockstor.data.home");
    dfs.mkdirs(new Path(rootDir));

    rootDir = conf.get("rockstor.compact.dir");
    dfs.mkdirs(new Path(rootDir));

    LOG.info("init rockstor work dir " + rootDir + " OK!");
    RockAccessor.disconnectHDFS();
    LOG.info("disconnect from hdfs ok!");
}

From source file:com.sensei.indexing.hadoop.reduce.ShardWriter.java

License:Apache License

/**
 * Constructor// w ww  . jav a 2 s . c om
 * @param fs
 * @param shard
 * @param tempDir
 * @param iconf
 * @throws IOException
 */
public ShardWriter(FileSystem fs, Shard shard, String tempDir, Configuration iconf) throws IOException {
    logger.info("Construct a shard writer");

    this.iconf = iconf;
    this.fs = fs;
    localFs = FileSystem.getLocal(iconf);
    perm = new Path(shard.getDirectory());
    temp = new Path(tempDir);

    long initGeneration = shard.getGeneration();

    if (localFs.exists(temp)) {
        File tempFile = new File(temp.getName());
        if (tempFile.exists())
            SenseiReducer.deleteDir(tempFile);
    }

    if (!fs.exists(perm)) {
        assert (initGeneration < 0);
        fs.mkdirs(perm);
    } else {
        moveToTrash(iconf, perm);
        fs.mkdirs(perm);
        //      restoreGeneration(fs, perm, initGeneration);
    }
    //    dir =  //new FileSystemDirectory(fs, perm, false, iconf.getConfiguration());
    //        new MixedDirectory(fs, perm, localFs, fs.startLocalOutput(perm, temp),
    //            iconf);

    // analyzer is null because we only use addIndexes, not addDocument
    //    writer =
    //        new IndexWriter(dir, null, 
    //              initGeneration < 0 ? new KeepOnlyLastCommitDeletionPolicy() : new MixedDeletionPolicy(), 
    //                    MaxFieldLength.UNLIMITED);

    //    writer =  new IndexWriter(dir, null, new KeepOnlyLastCommitDeletionPolicy(), MaxFieldLength.UNLIMITED);
    writer = new IndexWriter(FSDirectory.open(new File(tempDir)), null, new KeepOnlyLastCommitDeletionPolicy(),
            MaxFieldLength.UNLIMITED);
    setParameters(iconf);
    //    dir = null;
    //    writer = null;

}

From source file:com.sequenceiq.yarntest.mr.QuasiMonteCarlo.java

License:Apache License

/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi//w w w . j  a v  a2s .c o  m
 */
public static JobID submitPiEstimationMRApp(String jobName, int numMaps, long numPoints, Path tmpDir,
        Configuration conf) throws IOException, ClassNotFoundException, InterruptedException {
    Job job = new Job(conf);
    //setup job conf
    job.setJobName(jobName);
    job.setJarByClass(QuasiMonteCarlo.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setOutputKeyClass(BooleanWritable.class);
    job.setOutputValueClass(LongWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(QmcMapper.class);

    job.setReducerClass(QmcReducer.class);
    job.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    job.setSpeculativeExecution(false);

    //setup input/output directories
    final Path inDir = new Path(tmpDir, "in");
    final Path outDir = new Path(tmpDir, "out");
    FileInputFormat.setInputPaths(job, inDir);
    FileOutputFormat.setOutputPath(job, outDir);

    final FileSystem fs = FileSystem.get(conf);
    if (fs.exists(tmpDir)) {
        fs.delete(tmpDir, true);
        //      throw new IOException("Tmp directory " + fs.makeQualified(tmpDir)
        //          + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }

    //  try {
    //generate an input file for each map task
    for (int i = 0; i < numMaps; ++i) {
        final Path file = new Path(inDir, "part" + i);
        final LongWritable offset = new LongWritable(i * numPoints);
        final LongWritable size = new LongWritable(numPoints);
        final SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, LongWritable.class,
                LongWritable.class, CompressionType.NONE);
        try {
            writer.append(offset, size);
        } finally {
            writer.close();
        }
        System.out.println("Wrote input for Map #" + i);
    }

    //start a map/reduce job
    System.out.println("Starting Job");
    final long startTime = System.currentTimeMillis();
    job.submit();
    //      final double duration = (System.currentTimeMillis() - startTime)/1000.0;
    //      System.out.println("Job Finished in " + duration + " seconds");
    return job.getJobID();

    //    } finally {
    //      fs.delete(tmpDir, true);
    //    }
}

From source file:com.splicemachine.derby.impl.io.HdfsDirFile.java

License:Apache License

@Override
public boolean mkdirs() {
    try {//from  w  w w  .j  a  va2s  .  com
        FileSystem fs = getFileSystem();
        return fs.mkdirs(new Path(path));
    } catch (IOException e) {
        LOG.error(String.format("An exception occurred while making directories in the path '%s'.", path), e);
        return false;
    }
}

From source file:com.splout.db.benchmark.BenchmarkStoreTool.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    JCommander jComm = new JCommander(this);
    jComm.setProgramName("Benchmark-Store Tool");
    try {//  w w  w .j a va2  s .com
        jComm.parse(args);
    } catch (ParameterException e) {
        System.out.println(e.getMessage());
        jComm.usage();
        return -1;
    } catch (Throwable t) {
        t.printStackTrace();
        jComm.usage();
        return -1;
    }

    // Create some input files that will represent the partitions to generate
    Path out = new Path(output);
    FileSystem outFs = out.getFileSystem(getConf());
    HadoopUtils.deleteIfExists(outFs, out);

    Integer min, max, eachPartition;
    int maxKeyDigits;

    try {
        String[] minMax = keySpace.split(":");
        min = Integer.parseInt(minMax[0]);
        max = Integer.parseInt(minMax[1]);
        maxKeyDigits = max.toString().length();

        eachPartition = (max - min) / nPartitions;
    } catch (Exception e) {
        throw new IllegalArgumentException(
                "Key range format is not valid. It must be minKey:maxKey where both minKey and maxKey are integers.");
    }

    FileSystem inFs = FileSystem.get(getConf());
    Path input = new Path("benchmark-store-tool-" + System.currentTimeMillis());
    HadoopUtils.deleteIfExists(inFs, input);
    inFs.mkdirs(input);

    List<PartitionEntry> partitionEntries = new ArrayList<PartitionEntry>();

    // Create as many input files as partitions
    // Each input file will have as value the range that each Mapper will write
    String paddingExp = "%0" + (padding != null ? padding : maxKeyDigits) + "d";
    for (int i = 0; i < nPartitions; i++) {
        int thisMin = (i * eachPartition);
        int thisMax = (i + 1) * eachPartition;
        HadoopUtils.stringToFile(inFs, new Path(input, i + ".txt"), i + "\t" + thisMin + ":" + thisMax);
        PartitionEntry entry = new PartitionEntry();
        entry.setMin(String.format(paddingExp, thisMin));
        entry.setMax(String.format(paddingExp, thisMax));
        entry.setShard(i);
        partitionEntries.add(entry);
    }

    partitionEntries.get(0).setMin(null);
    partitionEntries.get(partitionEntries.size() - 1).setMax(null);

    PartitionMap partitionMap = new PartitionMap(partitionEntries);
    HadoopUtils.stringToFile(outFs, new Path(out, "partition-map"), JSONSerDe.ser(partitionMap));

    List<Field> fields = new ArrayList<Field>();
    fields.add(Field.create(SploutSQLOutputFormat.PARTITION_TUPLE_FIELD, Type.INT));
    fields.addAll(Fields.parse("key:int, value:string"));
    final Schema schema = new Schema(tablename, fields);

    byte[] valueArray = new byte[valueSize];
    for (int i = 0; i < valueSize; i++) {
        valueArray[i] = 'A';
    }
    final String theValue = new String(valueArray);

    if (!FileSystem.getLocal(conf).equals(FileSystem.get(conf))) {
        File nativeLibs = new File("native");
        if (nativeLibs.exists()) {
            SploutHadoopConfiguration.addSQLite4JavaNativeLibsToDC(conf);
        }
    }

    MapOnlyJobBuilder job = new MapOnlyJobBuilder(conf);
    TableSpec tableSpec = new TableSpec(schema, schema.getFields().get(1));

    job.setOutput(new Path(out, "store"),
            new SploutSQLProxyOutputFormat(new SQLite4JavaOutputFormat(1000000, tableSpec)), ITuple.class,
            NullWritable.class);
    job.addInput(input, new HadoopInputFormat(TextInputFormat.class),
            new MapOnlyMapper<LongWritable, Text, ITuple, NullWritable>() {

                ITuple metaTuple = new Tuple(schema);

                protected void map(LongWritable key, Text value, Context context)
                        throws IOException, InterruptedException {

                    String[] partitionRange = value.toString().split("\t");
                    Integer partition = Integer.parseInt(partitionRange[0]);
                    metaTuple.set(SploutSQLOutputFormat.PARTITION_TUPLE_FIELD, partition);
                    String[] minMax = partitionRange[1].split(":");
                    Integer min = Integer.parseInt(minMax[0]);
                    Integer max = Integer.parseInt(minMax[1]);
                    for (int i = min; i < max; i++) {
                        metaTuple.set("key", i);
                        metaTuple.set("value", theValue);
                        context.write(metaTuple, NullWritable.get());
                    }
                }
            });

    job.createJob().waitForCompletion(true);

    HadoopUtils.deleteIfExists(inFs, input);
    return 0;
}

From source file:com.splout.db.common.SploutHadoopConfiguration.java

License:Apache License

/**
 * Adds the SQLite native libraries to the DistributedCache so that they will be present in the java.library.path
 * of the child's Hadoop task.//from  w w  w  .  ja v a 2  s.  c o  m
 * <p/>
 * Usually you don't need to do this as the task will already try to load them from the job's uncompressed JAR, however
 * it is not assured that all Hadoop versions do the uncompressing of the JAR so in this case it's safer to use this.
 */
public static void addSQLite4JavaNativeLibsToDC(Configuration conf, File nativeLibsLocalPath)
        throws IOException, URISyntaxException {
    Path nativeLibHdfs = new Path("splout-native");
    FileSystem fS = FileSystem.get(conf);
    if (fS.exists(nativeLibHdfs)) {
        fS.delete(nativeLibHdfs, true);
    }
    fS.mkdirs(nativeLibHdfs);
    // Copy native libs to HDFS
    File[] natives = nativeLibsLocalPath.listFiles();
    if (natives == null) {
        throw new RuntimeException(
                "natives lib folder not present in local working directory! Are you in SPLOUT_HOME?");
    }
    for (File nativeLib : natives) {
        FileUtil.copy(nativeLib, fS, nativeLibHdfs, false, conf);
    }
    for (FileStatus nativeLibInHdfs : fS.listStatus(nativeLibHdfs)) {
        // http://hadoop.apache.org/docs/r0.20.2/native_libraries.html#Loading+native+libraries+through+DistributedCache
        DistributedCache.createSymlink(conf);
        URI uriToAdd = new URI(
                nativeLibInHdfs.getPath().makeQualified(fS) + "#" + nativeLibInHdfs.getPath().getName());
        DistributedCache.addCacheFile(uriToAdd, conf);
        log.info("Adding to distributed cache: " + uriToAdd);
    }
}

From source file:com.splout.db.hadoop.engine.SploutSQLProxyOutputFormat.java

License:Apache License

@Override
public RecordWriter<ITuple, NullWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {

    long waitTimeHeartBeater = context.getConfiguration().getLong(HeartBeater.WAIT_TIME_CONF, 5000);
    heartBeater = new HeartBeater(context, waitTimeHeartBeater);
    heartBeater.needHeartBeat();/*from  w  w  w  .j  ava  2s .  com*/
    conf = context.getConfiguration();
    this.context = context;

    outputFormat.setConf(context.getConfiguration());

    return new RecordWriter<ITuple, NullWritable>() {

        // Temporary and permanent Paths for properly writing Hadoop output files
        private Map<Integer, Path> permPool = new HashMap<Integer, Path>();
        private Map<Integer, Path> tempPool = new HashMap<Integer, Path>();

        private void initSql(int partition) throws IOException, InterruptedException {
            // HDFS final location of the generated partition file. It will be
            // loaded to the temporary folder in the HDFS than finally will be
            // committed by the OutputCommitter to the proper location.
            FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(
                    SploutSQLProxyOutputFormat.this.context);
            Path perm = new Path(committer.getWorkPath(), partition + ".db");
            FileSystem fs = perm.getFileSystem(conf);

            // Make a task unique name that contains the actual index output name to
            // make debugging simpler
            // Note: if using JVM reuse, the sequence number will not be reset for a
            // new task using the jvm
            Path temp = conf.getLocalPath("mapred.local.dir",
                    "splout_task_" + SploutSQLProxyOutputFormat.this.context.getTaskAttemptID() + '.'
                            + FILE_SEQUENCE.incrementAndGet());

            FileSystem localFileSystem = FileSystem.getLocal(conf);
            if (localFileSystem.exists(temp)) {
                localFileSystem.delete(temp, true);
            }
            localFileSystem.mkdirs(temp);

            Path local = fs.startLocalOutput(perm, new Path(temp, partition + ".db"));

            //
            permPool.put(partition, perm);
            tempPool.put(partition, new Path(temp, partition + ".db"));

            outputFormat.initPartition(partition, local);
        }

        @Override
        public void close(TaskAttemptContext ctx) throws IOException, InterruptedException {
            FileSystem fs = FileSystem.get(ctx.getConfiguration());
            try {
                if (ctx != null) {
                    heartBeater.setProgress(ctx);
                }
                outputFormat.close();
                for (Map.Entry<Integer, Path> entry : permPool.entrySet()) {
                    // Hadoop - completeLocalOutput()
                    fs.completeLocalOutput(entry.getValue(), tempPool.get(entry.getKey()));
                }
            } finally { // in any case, destroy the HeartBeater
                heartBeater.cancelHeartBeat();
            }
        }

        @Override
        public void write(ITuple tuple, NullWritable ignore) throws IOException, InterruptedException {
            int partition = (Integer) tuple.get(SploutSQLOutputFormat.PARTITION_TUPLE_FIELD);
            if (tempPool.get(partition) == null) {
                initSql(partition);
            }
            outputFormat.write(tuple);
        }

    };
}

From source file:com.splout.db.hadoop.TablespaceGenerator.java

License:Apache License

protected void prepareOutput(Configuration conf) throws IOException {
    FileSystem fileSystem = outputPath.getFileSystem(conf);
    fileSystem.mkdirs(outputPath);
}

From source file:com.splout.db.integration.HadoopIntegrationTest.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    // Validate params etc
    JCommander jComm = new JCommander(this);
    jComm.setProgramName("Splout Hadoop Compatibility Integration Test");
    try {//from  w w  w.j ava2s.  co m
        jComm.parse(args);
    } catch (ParameterException e) {
        System.err.println(e.getMessage());
        jComm.usage();
        System.exit(-1);
    }

    Path tmpHdfsPath = new Path(
            "tmp-" + HadoopIntegrationTest.class.getName() + "-" + System.currentTimeMillis());
    FileSystem fS = tmpHdfsPath.getFileSystem(getConf());
    fS.mkdirs(tmpHdfsPath);
    fS.mkdirs(new Path(tmpHdfsPath, "input"));
    fS.mkdirs(new Path(tmpHdfsPath, "output"));
    boolean isLocal = FileSystem.get(conf).equals(FileSystem.getLocal(conf));
    if (!isLocal) {
        SploutHadoopConfiguration.addSQLite4JavaNativeLibsToDC(conf);
    }

    tmpHdfsPath = tmpHdfsPath.makeQualified(fS);

    Path pageCounts = new Path(input);
    FileUtil.copy(FileSystem.getLocal(getConf()), pageCounts, fS, new Path(tmpHdfsPath, "input"), false,
            getConf());

    SimpleGeneratorCMD generator = new SimpleGeneratorCMD();
    generator.setConf(getConf());
    if (generator.run(new String[] { "-tb", "pagecountsintegration", "-t", "pagecounts", "-i",
            tmpHdfsPath + "/input", "-o", tmpHdfsPath + "/output", "-s",
            "projectcode:string, pagename:string, visits:int, bytes:long", "-pby", "projectcode,pagename",
            "-sep", "\" \"", "-p", "2", "-e", engine }) < 0) {
        throw new RuntimeException("Generator failed!");
    }

    SploutClient client = new SploutClient(qnode);
    QNodeStatus status = client.overview();
    long previousVersion = -1;
    if (status.getTablespaceMap().get("pagecountsintegration") != null) {
        previousVersion = status.getTablespaceMap().get("pagecountsintegration").getVersion();
    }

    DeployerCMD deployer = new DeployerCMD();
    deployer.setConf(getConf());
    if (deployer.run(new String[] { "-r", "2", "-q", qnode, "-root", tmpHdfsPath + "/output", "-ts",
            "pagecountsintegration" }) < 0) {
        throw new RuntimeException("Deployer failed!");
    }

    long waitedSoFar = 0;

    status = client.overview();
    while (status.getTablespaceMap().get("pagecountsintegration") == null
            || previousVersion == status.getTablespaceMap().get("pagecountsintegration").getVersion()) {
        Thread.sleep(2000);
        waitedSoFar += 2000;
        status = client.overview();
        if (waitedSoFar > 90000) {
            throw new RuntimeException(
                    "Deploy must have failed in Splout's server. Waiting too much for it to complete.");
        }
    }

    previousVersion = status.getTablespaceMap().get("pagecountsintegration").getVersion();

    QueryStatus qStatus = client.query("pagecountsintegration", "*", "SELECT * FROM pagecounts;", null);
    System.out.println(qStatus.getResult());

    if (qStatus.getResult() == null) {
        throw new RuntimeException("Something failed as query() is returning null!");
    }

    System.out.println("Everything fine.");
    return 1;
}