Example usage for org.apache.hadoop.fs FileSystem mkdirs

List of usage examples for org.apache.hadoop.fs FileSystem mkdirs

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem mkdirs.

Prototype

public boolean mkdirs(Path f) throws IOException 

Source Link

Document

Call #mkdirs(Path,FsPermission) with default permission.

Usage

From source file:io.covert.binary.analysis.BuildSequenceFileFromTarball.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    File inDirOrFile = new File(args[0]);
    Path outputDir = new Path(args[1]);

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    if (!fs.exists(outputDir)) {
        fs.mkdirs(outputDir);
    }//from   w ww .j av a  2s. c  o  m

    if (inDirOrFile.isFile()) {
        load(fs, conf, inDirOrFile, outputDir);
    } else {
        for (File file : inDirOrFile.listFiles()) {
            if (!file.isFile()) {
                System.out.println("Skipping " + file + " (not a file) ...");
                continue;
            }

            load(fs, conf, file, outputDir);
        }
    }

    return 0;
}

From source file:io.covert.binary.analysis.BuildTarBzSequenceFile.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    File inDir = new File(args[0]);
    Path name = new Path(args[1]);

    Text key = new Text();
    BytesWritable val = new BytesWritable();

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    if (!fs.exists(name)) {
        fs.mkdirs(name);
    }//  www  .j  a va2  s .c  om
    for (File file : inDir.listFiles()) {
        Path sequenceName = new Path(name, file.getName() + ".seq");
        System.out.println("Writing to " + sequenceName);
        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, sequenceName, Text.class,
                BytesWritable.class, CompressionType.RECORD);
        if (!file.isFile()) {
            System.out.println("Skipping " + file + " (not a file) ...");
            continue;
        }

        final InputStream is = new FileInputStream(file);
        final TarArchiveInputStream debInputStream = (TarArchiveInputStream) new ArchiveStreamFactory()
                .createArchiveInputStream("tar", is);
        TarArchiveEntry entry = null;
        while ((entry = (TarArchiveEntry) debInputStream.getNextEntry()) != null) {
            if (!entry.isDirectory()) {

                final ByteArrayOutputStream outputFileStream = new ByteArrayOutputStream();
                IOUtils.copy(debInputStream, outputFileStream);
                outputFileStream.close();
                byte[] outputFile = outputFileStream.toByteArray();
                val.set(outputFile, 0, outputFile.length);

                MessageDigest md = MessageDigest.getInstance("MD5");
                md.update(outputFile);
                byte[] digest = md.digest();
                String hexdigest = "";
                for (int i = 0; i < digest.length; i++) {
                    hexdigest += Integer.toString((digest[i] & 0xff) + 0x100, 16).substring(1);
                }
                key.set(hexdigest);
                writer.append(key, val);
            }
        }
        debInputStream.close();
        writer.close();
    }

    return 0;
}

From source file:io.druid.indexer.JobHelper.java

License:Apache License

static void addJarToClassPath(File jarFile, Path distributedClassPath, Path intermediateClassPath,
        FileSystem fs, Job job) throws IOException {
    // Create distributed directory if it does not exist.
    // rename will always fail if destination does not exist.
    fs.mkdirs(distributedClassPath);

    // Non-snapshot jar files are uploaded to the shared classpath.
    final Path hdfsPath = new Path(distributedClassPath, jarFile.getName());
    if (!fs.exists(hdfsPath)) {
        // Muliple jobs can try to upload the jar here,
        // to avoid them from overwriting files, first upload to intermediateClassPath and then rename to the distributedClasspath.
        final Path intermediateHdfsPath = new Path(intermediateClassPath, jarFile.getName());
        uploadJar(jarFile, intermediateHdfsPath, fs);
        IOException exception = null;
        try {/*from w  w  w.  j a v a2s. c  om*/
            log.info("Renaming jar to path[%s]", hdfsPath);
            fs.rename(intermediateHdfsPath, hdfsPath);
            if (!fs.exists(hdfsPath)) {
                throw new IOException(String.format("File does not exist even after moving from[%s] to [%s]",
                        intermediateHdfsPath, hdfsPath));
            }
        } catch (IOException e) {
            // rename failed, possibly due to race condition. check if some other job has uploaded the jar file.
            try {
                if (!fs.exists(hdfsPath)) {
                    log.error(e, "IOException while Renaming jar file");
                    exception = e;
                }
            } catch (IOException e1) {
                e.addSuppressed(e1);
                exception = e;
            }
        } finally {
            try {
                if (fs.exists(intermediateHdfsPath)) {
                    fs.delete(intermediateHdfsPath, false);
                }
            } catch (IOException e) {
                if (exception == null) {
                    exception = e;
                } else {
                    exception.addSuppressed(e);
                }
            }
            if (exception != null) {
                throw exception;
            }
        }
    }
    job.addFileToClassPath(hdfsPath);
}

From source file:io.druid.segment.loading.HdfsDataSegmentPusher.java

License:Open Source License

@Override
public DataSegment push(File inDir, DataSegment segment) throws IOException {
    final String storageDir = DataSegmentPusherUtil.getHdfsStorageDir(segment);
    Path outFile = new Path(String.format("%s/%s/index.zip", config.getStorageDirectory(), storageDir));
    FileSystem fs = outFile.getFileSystem(hadoopConfig);

    fs.mkdirs(outFile.getParent());
    log.info("Compressing files from[%s] to [%s]", inDir, outFile);
    FSDataOutputStream out = null;//w  ww. ja  v a2 s  .c  o  m
    long size;
    try {
        out = fs.create(outFile);

        size = CompressionUtils.zip(inDir, out);

        out.close();
    } finally {
        Closeables.closeQuietly(out);
    }

    return createDescriptorFile(segment.withLoadSpec(makeLoadSpec(outFile)).withSize(size)
            .withBinaryVersion(IndexIO.CURRENT_VERSION_ID), outFile.getParent(), fs);
}

From source file:io.druid.storage.hdfs.HdfsDataSegmentKillerTest.java

License:Apache License

private void makePartitionDirWithIndex(FileSystem fs, Path path) throws IOException {
    Assert.assertTrue(fs.mkdirs(path));
    try (FSDataOutputStream os = fs.create(new Path(path, "index.zip"))) {
    }//from  w  w  w  .  ja  va  2 s  . com
}

From source file:io.druid.storage.hdfs.HdfsDataSegmentPusher.java

License:Apache License

@Override
public DataSegment push(File inDir, DataSegment segment) throws IOException {
    final String storageDir = DataSegmentPusherUtil.getHdfsStorageDir(segment);

    log.info("Copying segment[%s] to HDFS at location[%s/%s]", segment.getIdentifier(),
            config.getStorageDirectory(), storageDir);

    Path outFile = new Path(String.format("%s/%s/index.zip", config.getStorageDirectory(), storageDir));
    FileSystem fs = outFile.getFileSystem(hadoopConfig);

    fs.mkdirs(outFile.getParent());
    log.info("Compressing files from[%s] to [%s]", inDir, outFile);

    final long size;
    try (FSDataOutputStream out = fs.create(outFile)) {
        size = CompressionUtils.zip(inDir, out);
    }/*w  w  w.ja v  a 2s .  c om*/

    return createDescriptorFile(segment.withLoadSpec(makeLoadSpec(outFile)).withSize(size)
            .withBinaryVersion(SegmentUtils.getVersionFromDir(inDir)), outFile.getParent(), fs);
}

From source file:io.fluo.stress.trie.Init.java

License:Apache License

private int buildTree(int nodeSize, FluoConfiguration props, Path tmp, int stopLevel) throws Exception {
    Job job = Job.getInstance(getConf());

    job.setJarByClass(Init.class);

    job.setJobName(Init.class.getName() + "_load");

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.getConfiguration().setInt(TRIE_NODE_SIZE_PROP, nodeSize);
    job.getConfiguration().setInt(TRIE_STOP_LEVEL_PROP, stopLevel);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    SequenceFileInputFormat.addInputPath(job, new Path(tmp, "nums"));

    job.setMapperClass(InitMapper.class);
    job.setCombinerClass(InitCombiner.class);
    job.setReducerClass(InitReducer.class);

    job.setOutputFormatClass(AccumuloFileOutputFormat.class);

    job.setPartitionerClass(RangePartitioner.class);

    FileSystem fs = FileSystem.get(job.getConfiguration());
    Connector conn = AccumuloUtil.getConnector(props);

    Path splitsPath = new Path(tmp, "splits.txt");

    Collection<Text> splits1 = writeSplits(props, fs, conn, splitsPath);

    RangePartitioner.setSplitFile(job, splitsPath.toString());
    job.setNumReduceTasks(splits1.size() + 1);

    Path outPath = new Path(tmp, "out");
    AccumuloFileOutputFormat.setOutputPath(job, outPath);

    boolean success = job.waitForCompletion(true);

    if (success) {
        Path failPath = new Path(tmp, "failures");
        fs.mkdirs(failPath);
        conn.tableOperations().importDirectory(props.getAccumuloTable(), outPath.toString(),
                failPath.toString(), false);
    }//from ww w .j  a  v a2s  .  com
    return success ? 0 : 1;
}

From source file:io.fluo.stress.trie.NumberIngest.java

License:Apache License

private static void setupHdfs(String hadoopPrefix, String testDir, int numMappers, int numPerMapper)
        throws IllegalArgumentException, IOException {
    Configuration config = new Configuration();
    config.addResource(new Path(hadoopPrefix + "/conf/core-site.xml"));
    config.addResource(new Path(hadoopPrefix + "/conf/hdfs-site.xml"));
    @SuppressWarnings("resource")
    FileSystem hdfs = FileSystem.get(config);

    String inputDir = testDir + "/input";

    hdfs.mkdirs(new Path(inputDir));
    try (FSDataOutputStream fos = hdfs.create(new Path(inputDir + "/data"))) {
        for (int i = 0; i < numMappers; i++) {
            fos.writeUTF(Integer.toString(numPerMapper) + "\n");
        }// w  ww  . j a v  a2s  .c  o  m
    }
}

From source file:io.fluo.webindex.data.Copy.java

License:Apache License

public static void main(String[] args) throws Exception {

    if (args.length != 3) {
        log.error("Usage: Copy <pathsFile> <range> <dest>");
        System.exit(1);//w w w . j a  va  2s  . c o  m
    }
    final String hadoopConfDir = IndexEnv.getHadoopConfDir();
    final List<String> copyList = IndexEnv.getPathsRange(args[0], args[1]);
    if (copyList.isEmpty()) {
        log.error("No files to copy given {} {}", args[0], args[1]);
        System.exit(1);
    }

    DataConfig dataConfig = DataConfig.load();

    SparkConf sparkConf = new SparkConf().setAppName("webindex-copy");
    try (JavaSparkContext ctx = new JavaSparkContext(sparkConf)) {

        FileSystem hdfs = FileSystem.get(ctx.hadoopConfiguration());
        Path destPath = new Path(args[2]);
        if (!hdfs.exists(destPath)) {
            hdfs.mkdirs(destPath);
        }

        log.info("Copying {} files (Range {} of paths file {}) from AWS to HDFS {}", copyList.size(), args[1],
                args[0], destPath.toString());

        JavaRDD<String> copyRDD = ctx.parallelize(copyList, dataConfig.getNumExecutorInstances());

        final String prefix = DataConfig.CC_URL_PREFIX;
        final String destDir = destPath.toString();

        copyRDD.foreachPartition(iter -> {
            FileSystem fs = IndexEnv.getHDFS(hadoopConfDir);
            iter.forEachRemaining(ccPath -> {
                try {
                    Path dfsPath = new Path(destDir + "/" + getFilename(ccPath));
                    if (fs.exists(dfsPath)) {
                        log.error("File {} exists in HDFS and should have been previously filtered",
                                dfsPath.getName());
                    } else {
                        String urlToCopy = prefix + ccPath;
                        log.info("Starting copy of {} to {}", urlToCopy, destDir);
                        try (OutputStream out = fs.create(dfsPath);
                                BufferedInputStream in = new BufferedInputStream(
                                        new URL(urlToCopy).openStream())) {
                            IOUtils.copy(in, out);
                        }
                        log.info("Created {}", dfsPath.getName());
                    }
                } catch (IOException e) {
                    log.error("Exception while copying {}", ccPath, e);
                }
            });
        });
    }
}

From source file:io.gzinga.hadoop.TestHadoopGZipRandomAccess.java

License:Apache License

@Test
public void testGZipOutputStream() {
    try {/*from  w w w.  j  a v  a2  s . com*/
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "file:///");
        FileSystem fs = FileSystem.get(conf);
        fs.mkdirs(new Path("target/test"));
        GZipOutputStreamRandomAccess gzip = new GZipOutputStreamRandomAccess(
                fs.create(new Path("target/test/testfile")));
        byte[] str = "This is line\n".getBytes();
        for (int i = 1; i <= 10000; i++) {
            if (i % 100 == 0) {
                gzip.addOffset(i / 100l);
            }
            gzip.write(str);
        }
        Assert.assertEquals(gzip.getOffsetMap().size(), 100);
        gzip.close();
        fs.copyFromLocalFile(new Path(fs.getWorkingDirectory().toString() + "/target/test-classes/testfile1"),
                new Path("target/test/testfile1"));
        FSDataInputStream fin = fs.open(new Path("target/test/testfile"));
        long len = fs.getFileStatus(new Path("target/test/testfile")).getLen();
        SeekableGZipDataInputStream sin = new SeekableGZipDataInputStream(fin, len);
        Assert.assertTrue(GZipInputStreamRandomAccess.isGzipRandomOutputFile(sin));
        fin = fs.open(new Path("target/test/testfile1"));
        sin = new SeekableGZipDataInputStream(fin, len);
        Assert.assertFalse(GZipInputStreamRandomAccess.isGzipRandomOutputFile(sin));
        fin = fs.open(new Path("target/test/testfile"));
        sin = new SeekableGZipDataInputStream(fin, len);
        GZipInputStreamRandomAccess gzin = new GZipInputStreamRandomAccess(sin);
        Assert.assertEquals(gzin.getMetadata().size(), 100);
        Assert.assertTrue(gzin.getMetadata().containsKey(1l));
        Assert.assertTrue(gzin.getMetadata().containsKey(100l));
        Assert.assertFalse(gzin.getMetadata().containsKey(200l));
        gzin.jumpToIndex(50l);
        int count1 = 0;
        while (true) {
            int l = gzin.read();
            if (l == -1) {
                break;
            }
            count1++;
        }
        gzin.jumpToIndex(60l);
        int count2 = 0;
        while (true) {
            int l = gzin.read();
            if (l == -1) {
                break;
            }
            count2++;
        }
        Assert.assertTrue(count1 > count2);
        gzin.close();
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail();
    }
}