Example usage for org.apache.hadoop.fs FileSystem mkdirs

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem mkdirs.

Prototype

public boolean mkdirs(Path f) throws IOException

Source Link

Document

Call #mkdirs(Path,FsPermission) with default permission.

Usage

From source file:com.splunk.shuttl.archiver.filesystem.hadoop.HadoopArchiveFileSystemSlowTest.java

License:Apache License

@Test(groups = { "end-to-end" })
@Parameters(value = { "hadoop.host", "hadoop.port" })
public void rename_dirWithMultipleLevelsOfNonExistingFiles_renamesDirectory(String hadoopHost,
        String hadoopPort) throws IOException {
    FileSystem hadoopFileSystem = TUtilsFunctional.getHadoopFileSystem(hadoopHost, hadoopPort);
    String simpleClassName = getClass().getSimpleName();
    Path path = new Path(simpleClassName + "/1/foo/dir/").makeQualified(hadoopFileSystem);
    Path otherRoot = new Path(simpleClassName + "/2/foo/dir").makeQualified(hadoopFileSystem);

    HadoopArchiveFileSystem realFileStructure = new HadoopArchiveFileSystem(hadoopFileSystem);
    try {/* w w w . j  a v  a2s  . c  o m*/
        hadoopFileSystem.mkdirs(path);
        assertTrue(hadoopFileSystem.exists(path));
        hadoopFileSystem.delete(otherRoot, true);
        assertFalse(hadoopFileSystem.exists(otherRoot));

        // Test
        realFileStructure.rename(path.toUri().getPath(), otherRoot.toUri().getPath());

        assertTrue(hadoopFileSystem.exists(otherRoot));
        assertFalse(hadoopFileSystem.exists(path));
    } finally {
        hadoopFileSystem.delete(new Path("/1"), true);
        hadoopFileSystem.delete(new Path("/2"), true);
    }
}

From source file:com.streamsets.pipeline.spark.SparkStreamingBinding.java

License:Apache License

@Override
public void init() throws Exception {
    for (Object key : properties.keySet()) {
        logMessage("Property => " + key + " => " + properties.getProperty(key.toString()), isRunningInMesos);
    }/*from   www . j  a v a2  s.  com*/
    final SparkConf conf = new SparkConf().setAppName("StreamSets Data Collector - Streaming Mode");
    conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
    final String topic = getProperty(TOPIC);
    final long duration;
    String durationAsString = getProperty(MAX_WAIT_TIME);
    try {
        duration = Long.parseLong(durationAsString);
    } catch (NumberFormatException ex) {
        String msg = "Invalid " + MAX_WAIT_TIME + " '" + durationAsString + "' : " + ex;
        throw new IllegalArgumentException(msg, ex);
    }

    Configuration hadoopConf = new SparkHadoopUtil().newConfiguration(conf);
    if (isRunningInMesos) {
        hadoopConf = getHadoopConf(hadoopConf);
    } else {
        hadoopConf = new Configuration();
    }
    URI hdfsURI = FileSystem.getDefaultUri(hadoopConf);
    logMessage("Default FS URI: " + hdfsURI, isRunningInMesos);
    FileSystem hdfs = (new Path(hdfsURI)).getFileSystem(hadoopConf);
    Path sdcCheckpointPath = new Path(hdfs.getHomeDirectory(),
            ".streamsets-spark-streaming/" + getProperty("sdc.id") + "/" + encode(topic));
    // encode as remote pipeline name might have colon within it
    String pipelineName = encode(getProperty("cluster.pipeline.name"));
    final Path checkPointPath = new Path(sdcCheckpointPath, pipelineName);
    hdfs.mkdirs(checkPointPath);
    if (!hdfs.isDirectory(checkPointPath)) {
        throw new IllegalStateException("Could not create checkpoint path: " + sdcCheckpointPath);
    }
    if (isRunningInMesos) {
        String scheme = hdfsURI.getScheme();
        if (scheme.equals("hdfs")) {
            File mesosBootstrapFile = BootstrapCluster.getMesosBootstrapFile();
            Path mesosBootstrapPath = new Path(checkPointPath, mesosBootstrapFile.getName());
            // in case of hdfs, copy the jar file from local path to hdfs
            hdfs.copyFromLocalFile(false, true, new Path(mesosBootstrapFile.toURI()), mesosBootstrapPath);
            conf.setJars(new String[] { mesosBootstrapPath.toString() });
        } else if (scheme.equals("s3") || scheme.equals("s3n") || scheme.equals("s3a")) {
            // we cant upload the jar to s3 as executors wont understand s3 scheme without the aws jar.
            // So have the jar available on http
            conf.setJars(new String[] { getProperty("mesos.jar.url") });
        } else {
            throw new IllegalStateException("Unsupported scheme: " + scheme);
        }
    }
    JavaStreamingContextFactory javaStreamingContextFactory = new JavaStreamingContextFactoryImpl(conf,
            duration, checkPointPath.toString(), getProperty(METADATA_BROKER_LIST), topic,
            properties.getProperty(AUTO_OFFSET_RESET, "").trim(), isRunningInMesos);

    ssc = JavaStreamingContext.getOrCreate(checkPointPath.toString(), hadoopConf, javaStreamingContextFactory,
            true);
    // mesos tries to stop the context internally, so don't do it here - deadlock bug in spark
    if (!isRunningInMesos) {
        final Thread shutdownHookThread = new Thread("Spark.shutdownHook") {
            @Override
            public void run() {
                LOG.debug("Gracefully stopping Spark Streaming Application");
                ssc.stop(true, true);
                LOG.info("Application stopped");
            }
        };
        Runtime.getRuntime().addShutdownHook(shutdownHookThread);
    }
    logMessage("Making calls through spark context ", isRunningInMesos);
    ssc.start();
}

From source file:com.streamsets.pipeline.stage.destination.hdfs.BaseHdfsTargetIT.java

License:Apache License

@Test
public void testDirValidity() throws Exception {
    //valid dirs/* w  w  w.j  a v  a  2s. c om*/
    testDir("/foo", "/foo", true);
    testDir("/foo/${YY()}", "/foo/bar-${YY()}", true);

    //non absolute dir
    testDir("foo", "/foo", false);
    testDir("/foo", "foo", false);

    FileSystem fs = miniDFS.getFileSystem();
    fs.mkdirs(new Path("/bar"));

    //no permissions
    testDir("/bar/foo", "/foo", false);
    testDir("/foo", "/bar/foo", false);
    testDir("/bar/foo/${YY()}", "/foo/${YY()}", false);
    testDir("/foo/${YY()}", "/bar/foo/${YY()}", false);
}

From source file:com.streamsets.pipeline.stage.destination.hdfs.HdfsTarget.java

License:Apache License

boolean validateHadoopDir(String configName, String dirPathTemplate, List<ConfigIssue> issues) {
    boolean ok;/*  w ww.  ja v a2  s . c  om*/
    if (!dirPathTemplate.startsWith("/")) {
        issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName, Errors.HADOOPFS_40));
        ok = false;
    } else {
        int firstEL = dirPathTemplate.indexOf("$");
        if (firstEL > -1) {
            int lastDir = dirPathTemplate.lastIndexOf("/", firstEL);
            dirPathTemplate = dirPathTemplate.substring(0, lastDir);
        }
        dirPathTemplate = (dirPathTemplate.isEmpty()) ? "/" : dirPathTemplate;
        try {
            Path dir = new Path(dirPathTemplate);
            FileSystem fs = getFileSystemForInitDestroy();
            if (!fs.exists(dir)) {
                try {
                    if (fs.mkdirs(dir)) {
                        ok = true;
                    } else {
                        issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName,
                                Errors.HADOOPFS_41));
                        ok = false;
                    }
                } catch (IOException ex) {
                    issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName,
                            Errors.HADOOPFS_42, ex.toString()));
                    ok = false;
                }
            } else {
                try {
                    Path dummy = new Path(dir, "_sdc-dummy-" + UUID.randomUUID().toString());
                    fs.create(dummy).close();
                    fs.delete(dummy, false);
                    ok = true;
                } catch (IOException ex) {
                    issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName,
                            Errors.HADOOPFS_43, ex.toString()));
                    ok = false;
                }
            }
        } catch (Exception ex) {
            LOG.info("Validation Error: " + Errors.HADOOPFS_44.getMessage(), ex.toString(), ex);
            issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName, Errors.HADOOPFS_44,
                    ex.toString()));
            ok = false;
        }
    }
    return ok;
}

From source file:com.streamsets.pipeline.stage.destination.hdfs.HdfsTargetConfigBean.java

License:Apache License

private boolean validateHadoopDir(final Stage.Context context, final String configName,
        final String configGroup, String dirPathTemplate, final List<Stage.ConfigIssue> issues) {
    final AtomicBoolean ok = new AtomicBoolean(true);
    if (!dirPathTemplate.startsWith("/")) {
        issues.add(context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_40));
        ok.set(false);/* ww  w. j a v a  2  s. c om*/
    } else {
        dirPathTemplate = (dirPathTemplate.isEmpty()) ? "/" : dirPathTemplate;
        try {
            final Path dir = new Path(dirPathTemplate);
            final FileSystem fs = getFileSystemForInitDestroy();
            getUGI().doAs(new PrivilegedExceptionAction<Void>() {
                @Override
                public Void run() throws Exception {
                    if (!fs.exists(dir)) {
                        try {
                            if (fs.mkdirs(dir)) {
                                ok.set(true);
                            } else {
                                issues.add(
                                        context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_41));
                                ok.set(false);
                            }
                        } catch (IOException ex) {
                            issues.add(context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_42,
                                    ex.toString()));
                            ok.set(false);
                        }
                    } else {
                        try {
                            Path dummy = new Path(dir, "_sdc-dummy-" + UUID.randomUUID().toString());
                            fs.create(dummy).close();
                            fs.delete(dummy, false);
                            ok.set(true);
                        } catch (IOException ex) {
                            issues.add(context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_43,
                                    ex.toString()));
                            ok.set(false);
                        }
                    }
                    return null;
                }
            });
        } catch (Exception ex) {
            issues.add(context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_44, ex.toString()));
            ok.set(false);
        }
    }
    return ok.get();
}

From source file:com.streamsets.pipeline.stage.destination.hdfs.metadataexecutor.HdfsMetadataExecutor.java

License:Apache License

/**
 * Ensure that given directory exists.//  w  w w  . ja v  a 2s.c  o  m
 *
 * Creates the directory if it doesn't exists. No-op if it does.
 */
private void ensureDirectoryExists(FileSystem fs, Path path) throws IOException {
    if (!fs.exists(path)) {
        LOG.debug("Creating directory: {}", path);
        if (!fs.mkdirs(path)) {
            throw new IOException("Can't create directory: " + path);
        }
    }
}

From source file:com.streamsets.pipeline.stage.destination.hdfs.metadataxecutor.HdfsMetadataExecutor.java

License:Apache License

@Override
public void write(Batch batch) throws StageException {
    final ELVars variables = getContext().createELVars();
    final FileSystem fs = hdfsConnection.getFs();

    Iterator<Record> it = batch.getRecords();
    while (it.hasNext()) {
        Record record = it.next();//w  w w  .  jav  a  2s.  c om
        RecordEL.setRecordInContext(variables, record);

        // Execute all configured HDFS metadata operations as target user
        try {
            hdfsConnection.getUGI().doAs(new PrivilegedExceptionAction<Void>() {
                @Override
                public Void run() throws Exception {
                    Path workingFile = new Path(evaluate(variables, "filePath", actions.filePath));
                    LOG.info("Working on file: " + workingFile);

                    if (actions.shouldMoveFile) {
                        Path destinationFile = new Path(
                                evaluate(variables, "newLocation", actions.newLocation));

                        Path destinationParent = destinationFile.getParent();
                        if (!fs.exists(destinationParent)) {
                            LOG.debug("Creating parent directory for destination file: {}", destinationParent);
                            if (!fs.mkdirs(destinationParent)) {
                                throw new IOException("Can't create directory: " + destinationParent);
                            }
                        }

                        LOG.debug("Renaming to: {}", destinationFile);
                        if (!fs.rename(workingFile, destinationFile)) {
                            throw new IOException("Can't rename file to: " + destinationFile);
                        }
                        workingFile = destinationFile;
                    }

                    if (actions.shouldChangeOwnership) {
                        String newOwner = evaluate(variables, "newOwner", actions.newOwner);
                        String newGroup = evaluate(variables, "newGroup", actions.newGroup);
                        LOG.debug("Applying ownership: user={} and group={}", newOwner, newGroup);
                        fs.setOwner(workingFile, newOwner, newGroup);
                    }

                    if (actions.shouldSetPermissions) {
                        String stringPerms = evaluate(variables, "newPermissions", actions.newPermissions);
                        FsPermission fsPerms = new FsPermission(stringPerms);
                        LOG.debug("Applying permissions: {} loaded from value '{}'", fsPerms, stringPerms);
                        fs.setPermission(workingFile, fsPerms);
                    }

                    if (actions.shouldSetAcls) {
                        String stringAcls = evaluate(variables, "newAcls", actions.newAcls);
                        List<AclEntry> acls = AclEntry.parseAclSpec(stringAcls, true);
                        LOG.debug("Applying ACLs: {}", stringAcls);
                        fs.setAcl(workingFile, acls);
                    }

                    // Issue event with the final file name (e.g. the renamed one if applicable)
                    EventRecord event = getContext().createEventRecord("file-changed", 1);
                    event.set(Field.create(Field.Type.MAP, new ImmutableMap.Builder<String, Field>()
                            .put("filepath", Field.create(Field.Type.STRING, workingFile.toString())).build()));
                    getContext().toEvent(event);

                    LOG.debug("Done changing metadata on file: {}", workingFile);
                    return null;
                }
            });
        } catch (Exception e) {
            LOG.error("Failure when applying metadata changes to HDFS", e);
            errorRecordHandler.onError(
                    new OnRecordErrorException(record, HdfsMetadataErrors.HDFS_METADATA_000, e.getMessage()));
        }
    }
}

From source file:com.streamsets.pipeline.stage.destination.mapreduce.jobtype.avroconvert.AvroConversionBaseMapper.java

License:Apache License

@Override
protected void map(String input, String output, Context context) throws IOException, InterruptedException {
    FileSystem fs = FileSystem.get(context.getConfiguration());
    Configuration conf = context.getConfiguration();

    LOG.info("Converting input file: {}", input);
    LOG.info("Output directory: {}", output);
    Path inputPath = new Path(input);
    Path outputDir = new Path(output);
    fs.mkdirs(outputDir);

    Path tempFile = new Path(outputDir, getTempFilePrefix() + inputPath.getName());
    if (fs.exists(tempFile)) {
        if (conf.getBoolean(AvroConversionCommonConstants.OVERWRITE_TMP_FILE, false)) {
            fs.delete(tempFile, true);/*w  ww . jav a 2 s . c o  m*/
        } else {
            throw new IOException("Temporary file " + tempFile + " already exists.");
        }
    }
    LOG.info("Using temp file: {}", tempFile);

    // Output file is the same as input except of dropping .avro extension if it exists and appending .parquet or .orc
    String outputFileName = inputPath.getName().replaceAll("\\.avro$", "") + getOutputFileSuffix();
    Path finalFile = new Path(outputDir, outputFileName);
    LOG.info("Final path will be: {}", finalFile);

    // Avro reader
    SeekableInput seekableInput = new FsInput(inputPath, conf);
    DatumReader<GenericRecord> reader = new GenericDatumReader<>();
    FileReader<GenericRecord> fileReader = DataFileReader.openReader(seekableInput, reader);
    Schema avroSchema = fileReader.getSchema();

    initializeWriter(tempFile, avroSchema, conf, context);

    LOG.info("Started reading input file");
    long recordCount = 0;
    try {
        while (fileReader.hasNext()) {
            GenericRecord record = fileReader.next();
            handleAvroRecord(record);

            context.getCounter(Counters.PROCESSED_RECORDS).increment(1);
            recordCount++;
        }
    } catch (Exception e) {
        // Various random stuff can happen while converting, so we wrap the underlying exception with more details
        String message = String.format("Exception at offset %d (record %d): %s", fileReader.tell(), recordCount,
                e.toString());
        throw new IOException(message, e);
    }
    LOG.info("Done reading input file");
    closeWriter();

    LOG.info("Moving temporary file {} to final destination {}", tempFile, finalFile);
    fs.rename(tempFile, finalFile);

    if (!context.getConfiguration().getBoolean(AvroConversionCommonConstants.KEEP_INPUT_FILE, false)) {
        LOG.info("Removing input file", inputPath);
        fs.delete(inputPath, true);
    }

    LOG.info("Done converting input file into output directory {}", output);
}

From source file:com.streamsets.pipeline.stage.lib.hive.HiveMetastoreUtil.java

License:Apache License

/**
 * Returns the hdfs paths where the avro schema is stored after serializing.
 * Path is appended with current time so as to have an ordering.
 * @param rootTableLocation Root Table Location
 * @return Hdfs Path String.//from  w w w  .  j a v a  2  s  .c  o  m
 */
public static String serializeSchemaToHDFS(UserGroupInformation loginUGI, final FileSystem fs,
        final String rootTableLocation, final String schemaJson) throws StageException {
    final String folderPath = rootTableLocation + HiveMetastoreUtil.SEP
            + HiveMetastoreUtil.HDFS_SCHEMA_FOLDER_NAME;
    final Path schemasFolderPath = new Path(folderPath);
    final String path = folderPath + SEP + HiveMetastoreUtil.AVRO_SCHEMA
            + DateFormatUtils.format(new Date(System.currentTimeMillis()), "yyyy-MM-dd--HH_mm_ss");
    try {
        loginUGI.doAs(new PrivilegedExceptionAction<Void>() {
            @Override
            public Void run() throws Exception {
                if (!fs.exists(schemasFolderPath)) {
                    fs.mkdirs(schemasFolderPath);
                }
                Path schemaFilePath = new Path(path);
                //This will never happen unless two HMS targets are writing, we will error out for this
                //and let user handle this via error record handling.
                if (!fs.exists(schemaFilePath)) {
                    try (FSDataOutputStream os = fs.create(schemaFilePath)) {
                        os.writeChars(schemaJson);
                    }
                } else {
                    LOG.error(Utils.format("Already schema file {} exists in HDFS", path));
                    throw new IOException("Already schema file exists");
                }
                return null;
            }
        });
    } catch (Exception e) {
        LOG.error("Error in Writing Schema to HDFS: " + e.toString(), e);
        throw new StageException(Errors.HIVE_18, path, e.getMessage());
    }
    return path;
}

From source file:com.streamsets.pipeline.stage.origin.hdfs.cluster.ClusterHDFSSourceIT.java

License:Apache License

@BeforeClass
public static void setUpBeforeClass() throws Exception {
    minidfsDir = new File("target/minidfs-" + UUID.randomUUID()).getAbsoluteFile();
    minidfsDir.mkdirs();/*w ww  . j  ava 2 s .  com*/
    Assert.assertTrue(minidfsDir.exists());
    System.setProperty(MiniDFSCluster.PROP_TEST_BUILD_DATA, minidfsDir.getPath());
    Configuration conf = new HdfsConfiguration();
    conf.set("dfs.namenode.fs-limits.min-block-size", String.valueOf(32));
    EditLogFileOutputStream.setShouldSkipFsyncForTesting(true);
    miniDFS = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
    dir = new Path(miniDFS.getURI() + "/dir");
    FileSystem fs = miniDFS.getFileSystem();
    fs.mkdirs(dir);
    writeFile(fs, new Path(dir + "/forAllTests/" + "path"), 1000);
    dummyEtc = new File(minidfsDir, "dummy-etc");
    dummyEtc.mkdirs();
    Assert.assertTrue(dummyEtc.exists());
    Configuration dummyConf = new Configuration(false);
    for (String file : new String[] { "core", "hdfs", "mapred", "yarn" }) {
        File siteXml = new File(dummyEtc, file + "-site.xml");
        FileOutputStream out = new FileOutputStream(siteXml);
        dummyConf.writeXml(out);
        out.close();
    }
    resourcesDir = minidfsDir.getAbsolutePath();
    hadoopConfDir = dummyEtc.getName();
    System.setProperty("sdc.resources.dir", resourcesDir);
    ;
}