List of usage examples for org.apache.hadoop.fs FileSystem mkdirs
public boolean mkdirs(Path f) throws IOException
From source file:com.splunk.shuttl.archiver.filesystem.hadoop.HadoopArchiveFileSystemSlowTest.java
License:Apache License
@Test(groups = { "end-to-end" }) @Parameters(value = { "hadoop.host", "hadoop.port" }) public void rename_dirWithMultipleLevelsOfNonExistingFiles_renamesDirectory(String hadoopHost, String hadoopPort) throws IOException { FileSystem hadoopFileSystem = TUtilsFunctional.getHadoopFileSystem(hadoopHost, hadoopPort); String simpleClassName = getClass().getSimpleName(); Path path = new Path(simpleClassName + "/1/foo/dir/").makeQualified(hadoopFileSystem); Path otherRoot = new Path(simpleClassName + "/2/foo/dir").makeQualified(hadoopFileSystem); HadoopArchiveFileSystem realFileStructure = new HadoopArchiveFileSystem(hadoopFileSystem); try {/* w w w . j a v a2s . c o m*/ hadoopFileSystem.mkdirs(path); assertTrue(hadoopFileSystem.exists(path)); hadoopFileSystem.delete(otherRoot, true); assertFalse(hadoopFileSystem.exists(otherRoot)); // Test realFileStructure.rename(path.toUri().getPath(), otherRoot.toUri().getPath()); assertTrue(hadoopFileSystem.exists(otherRoot)); assertFalse(hadoopFileSystem.exists(path)); } finally { hadoopFileSystem.delete(new Path("/1"), true); hadoopFileSystem.delete(new Path("/2"), true); } }
From source file:com.streamsets.pipeline.spark.SparkStreamingBinding.java
License:Apache License
@Override public void init() throws Exception { for (Object key : properties.keySet()) { logMessage("Property => " + key + " => " + properties.getProperty(key.toString()), isRunningInMesos); }/*from www . j a v a2 s. com*/ final SparkConf conf = new SparkConf().setAppName("StreamSets Data Collector - Streaming Mode"); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); final String topic = getProperty(TOPIC); final long duration; String durationAsString = getProperty(MAX_WAIT_TIME); try { duration = Long.parseLong(durationAsString); } catch (NumberFormatException ex) { String msg = "Invalid " + MAX_WAIT_TIME + " '" + durationAsString + "' : " + ex; throw new IllegalArgumentException(msg, ex); } Configuration hadoopConf = new SparkHadoopUtil().newConfiguration(conf); if (isRunningInMesos) { hadoopConf = getHadoopConf(hadoopConf); } else { hadoopConf = new Configuration(); } URI hdfsURI = FileSystem.getDefaultUri(hadoopConf); logMessage("Default FS URI: " + hdfsURI, isRunningInMesos); FileSystem hdfs = (new Path(hdfsURI)).getFileSystem(hadoopConf); Path sdcCheckpointPath = new Path(hdfs.getHomeDirectory(), ".streamsets-spark-streaming/" + getProperty("sdc.id") + "/" + encode(topic)); // encode as remote pipeline name might have colon within it String pipelineName = encode(getProperty("cluster.pipeline.name")); final Path checkPointPath = new Path(sdcCheckpointPath, pipelineName); hdfs.mkdirs(checkPointPath); if (!hdfs.isDirectory(checkPointPath)) { throw new IllegalStateException("Could not create checkpoint path: " + sdcCheckpointPath); } if (isRunningInMesos) { String scheme = hdfsURI.getScheme(); if (scheme.equals("hdfs")) { File mesosBootstrapFile = BootstrapCluster.getMesosBootstrapFile(); Path mesosBootstrapPath = new Path(checkPointPath, mesosBootstrapFile.getName()); // in case of hdfs, copy the jar file from local path to hdfs hdfs.copyFromLocalFile(false, true, new Path(mesosBootstrapFile.toURI()), mesosBootstrapPath); conf.setJars(new String[] { mesosBootstrapPath.toString() }); } else if (scheme.equals("s3") || scheme.equals("s3n") || scheme.equals("s3a")) { // we cant upload the jar to s3 as executors wont understand s3 scheme without the aws jar. // So have the jar available on http conf.setJars(new String[] { getProperty("mesos.jar.url") }); } else { throw new IllegalStateException("Unsupported scheme: " + scheme); } } JavaStreamingContextFactory javaStreamingContextFactory = new JavaStreamingContextFactoryImpl(conf, duration, checkPointPath.toString(), getProperty(METADATA_BROKER_LIST), topic, properties.getProperty(AUTO_OFFSET_RESET, "").trim(), isRunningInMesos); ssc = JavaStreamingContext.getOrCreate(checkPointPath.toString(), hadoopConf, javaStreamingContextFactory, true); // mesos tries to stop the context internally, so don't do it here - deadlock bug in spark if (!isRunningInMesos) { final Thread shutdownHookThread = new Thread("Spark.shutdownHook") { @Override public void run() { LOG.debug("Gracefully stopping Spark Streaming Application"); ssc.stop(true, true); LOG.info("Application stopped"); } }; Runtime.getRuntime().addShutdownHook(shutdownHookThread); } logMessage("Making calls through spark context ", isRunningInMesos); ssc.start(); }
From source file:com.streamsets.pipeline.stage.destination.hdfs.BaseHdfsTargetIT.java
License:Apache License
@Test public void testDirValidity() throws Exception { //valid dirs/* w w w.j a v a 2s. c om*/ testDir("/foo", "/foo", true); testDir("/foo/${YY()}", "/foo/bar-${YY()}", true); //non absolute dir testDir("foo", "/foo", false); testDir("/foo", "foo", false); FileSystem fs = miniDFS.getFileSystem(); fs.mkdirs(new Path("/bar")); //no permissions testDir("/bar/foo", "/foo", false); testDir("/foo", "/bar/foo", false); testDir("/bar/foo/${YY()}", "/foo/${YY()}", false); testDir("/foo/${YY()}", "/bar/foo/${YY()}", false); }
From source file:com.streamsets.pipeline.stage.destination.hdfs.HdfsTarget.java
License:Apache License
boolean validateHadoopDir(String configName, String dirPathTemplate, List<ConfigIssue> issues) { boolean ok;/* w ww. ja v a2 s . c om*/ if (!dirPathTemplate.startsWith("/")) { issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName, Errors.HADOOPFS_40)); ok = false; } else { int firstEL = dirPathTemplate.indexOf("$"); if (firstEL > -1) { int lastDir = dirPathTemplate.lastIndexOf("/", firstEL); dirPathTemplate = dirPathTemplate.substring(0, lastDir); } dirPathTemplate = (dirPathTemplate.isEmpty()) ? "/" : dirPathTemplate; try { Path dir = new Path(dirPathTemplate); FileSystem fs = getFileSystemForInitDestroy(); if (!fs.exists(dir)) { try { if (fs.mkdirs(dir)) { ok = true; } else { issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName, Errors.HADOOPFS_41)); ok = false; } } catch (IOException ex) { issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName, Errors.HADOOPFS_42, ex.toString())); ok = false; } } else { try { Path dummy = new Path(dir, "_sdc-dummy-" + UUID.randomUUID().toString()); fs.create(dummy).close(); fs.delete(dummy, false); ok = true; } catch (IOException ex) { issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName, Errors.HADOOPFS_43, ex.toString())); ok = false; } } } catch (Exception ex) { LOG.info("Validation Error: " + Errors.HADOOPFS_44.getMessage(), ex.toString(), ex); issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), configName, Errors.HADOOPFS_44, ex.toString())); ok = false; } } return ok; }
From source file:com.streamsets.pipeline.stage.destination.hdfs.HdfsTargetConfigBean.java
License:Apache License
private boolean validateHadoopDir(final Stage.Context context, final String configName, final String configGroup, String dirPathTemplate, final List<Stage.ConfigIssue> issues) { final AtomicBoolean ok = new AtomicBoolean(true); if (!dirPathTemplate.startsWith("/")) { issues.add(context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_40)); ok.set(false);/* ww w. j a v a 2 s. c om*/ } else { dirPathTemplate = (dirPathTemplate.isEmpty()) ? "/" : dirPathTemplate; try { final Path dir = new Path(dirPathTemplate); final FileSystem fs = getFileSystemForInitDestroy(); getUGI().doAs(new PrivilegedExceptionAction<Void>() { @Override public Void run() throws Exception { if (!fs.exists(dir)) { try { if (fs.mkdirs(dir)) { ok.set(true); } else { issues.add( context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_41)); ok.set(false); } } catch (IOException ex) { issues.add(context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_42, ex.toString())); ok.set(false); } } else { try { Path dummy = new Path(dir, "_sdc-dummy-" + UUID.randomUUID().toString()); fs.create(dummy).close(); fs.delete(dummy, false); ok.set(true); } catch (IOException ex) { issues.add(context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_43, ex.toString())); ok.set(false); } } return null; } }); } catch (Exception ex) { issues.add(context.createConfigIssue(configGroup, configName, Errors.HADOOPFS_44, ex.toString())); ok.set(false); } } return ok.get(); }
From source file:com.streamsets.pipeline.stage.destination.hdfs.metadataexecutor.HdfsMetadataExecutor.java
License:Apache License
/** * Ensure that given directory exists.// w w w . ja v a 2s.c o m * * Creates the directory if it doesn't exists. No-op if it does. */ private void ensureDirectoryExists(FileSystem fs, Path path) throws IOException { if (!fs.exists(path)) { LOG.debug("Creating directory: {}", path); if (!fs.mkdirs(path)) { throw new IOException("Can't create directory: " + path); } } }
From source file:com.streamsets.pipeline.stage.destination.hdfs.metadataxecutor.HdfsMetadataExecutor.java
License:Apache License
@Override public void write(Batch batch) throws StageException { final ELVars variables = getContext().createELVars(); final FileSystem fs = hdfsConnection.getFs(); Iterator<Record> it = batch.getRecords(); while (it.hasNext()) { Record record = it.next();//w w w . jav a 2s. c om RecordEL.setRecordInContext(variables, record); // Execute all configured HDFS metadata operations as target user try { hdfsConnection.getUGI().doAs(new PrivilegedExceptionAction<Void>() { @Override public Void run() throws Exception { Path workingFile = new Path(evaluate(variables, "filePath", actions.filePath)); LOG.info("Working on file: " + workingFile); if (actions.shouldMoveFile) { Path destinationFile = new Path( evaluate(variables, "newLocation", actions.newLocation)); Path destinationParent = destinationFile.getParent(); if (!fs.exists(destinationParent)) { LOG.debug("Creating parent directory for destination file: {}", destinationParent); if (!fs.mkdirs(destinationParent)) { throw new IOException("Can't create directory: " + destinationParent); } } LOG.debug("Renaming to: {}", destinationFile); if (!fs.rename(workingFile, destinationFile)) { throw new IOException("Can't rename file to: " + destinationFile); } workingFile = destinationFile; } if (actions.shouldChangeOwnership) { String newOwner = evaluate(variables, "newOwner", actions.newOwner); String newGroup = evaluate(variables, "newGroup", actions.newGroup); LOG.debug("Applying ownership: user={} and group={}", newOwner, newGroup); fs.setOwner(workingFile, newOwner, newGroup); } if (actions.shouldSetPermissions) { String stringPerms = evaluate(variables, "newPermissions", actions.newPermissions); FsPermission fsPerms = new FsPermission(stringPerms); LOG.debug("Applying permissions: {} loaded from value '{}'", fsPerms, stringPerms); fs.setPermission(workingFile, fsPerms); } if (actions.shouldSetAcls) { String stringAcls = evaluate(variables, "newAcls", actions.newAcls); List<AclEntry> acls = AclEntry.parseAclSpec(stringAcls, true); LOG.debug("Applying ACLs: {}", stringAcls); fs.setAcl(workingFile, acls); } // Issue event with the final file name (e.g. the renamed one if applicable) EventRecord event = getContext().createEventRecord("file-changed", 1); event.set(Field.create(Field.Type.MAP, new ImmutableMap.Builder<String, Field>() .put("filepath", Field.create(Field.Type.STRING, workingFile.toString())).build())); getContext().toEvent(event); LOG.debug("Done changing metadata on file: {}", workingFile); return null; } }); } catch (Exception e) { LOG.error("Failure when applying metadata changes to HDFS", e); errorRecordHandler.onError( new OnRecordErrorException(record, HdfsMetadataErrors.HDFS_METADATA_000, e.getMessage())); } } }
From source file:com.streamsets.pipeline.stage.destination.mapreduce.jobtype.avroconvert.AvroConversionBaseMapper.java
License:Apache License
@Override protected void map(String input, String output, Context context) throws IOException, InterruptedException { FileSystem fs = FileSystem.get(context.getConfiguration()); Configuration conf = context.getConfiguration(); LOG.info("Converting input file: {}", input); LOG.info("Output directory: {}", output); Path inputPath = new Path(input); Path outputDir = new Path(output); fs.mkdirs(outputDir); Path tempFile = new Path(outputDir, getTempFilePrefix() + inputPath.getName()); if (fs.exists(tempFile)) { if (conf.getBoolean(AvroConversionCommonConstants.OVERWRITE_TMP_FILE, false)) { fs.delete(tempFile, true);/*w ww . jav a 2 s . c o m*/ } else { throw new IOException("Temporary file " + tempFile + " already exists."); } } LOG.info("Using temp file: {}", tempFile); // Output file is the same as input except of dropping .avro extension if it exists and appending .parquet or .orc String outputFileName = inputPath.getName().replaceAll("\\.avro$", "") + getOutputFileSuffix(); Path finalFile = new Path(outputDir, outputFileName); LOG.info("Final path will be: {}", finalFile); // Avro reader SeekableInput seekableInput = new FsInput(inputPath, conf); DatumReader<GenericRecord> reader = new GenericDatumReader<>(); FileReader<GenericRecord> fileReader = DataFileReader.openReader(seekableInput, reader); Schema avroSchema = fileReader.getSchema(); initializeWriter(tempFile, avroSchema, conf, context); LOG.info("Started reading input file"); long recordCount = 0; try { while (fileReader.hasNext()) { GenericRecord record = fileReader.next(); handleAvroRecord(record); context.getCounter(Counters.PROCESSED_RECORDS).increment(1); recordCount++; } } catch (Exception e) { // Various random stuff can happen while converting, so we wrap the underlying exception with more details String message = String.format("Exception at offset %d (record %d): %s", fileReader.tell(), recordCount, e.toString()); throw new IOException(message, e); } LOG.info("Done reading input file"); closeWriter(); LOG.info("Moving temporary file {} to final destination {}", tempFile, finalFile); fs.rename(tempFile, finalFile); if (!context.getConfiguration().getBoolean(AvroConversionCommonConstants.KEEP_INPUT_FILE, false)) { LOG.info("Removing input file", inputPath); fs.delete(inputPath, true); } LOG.info("Done converting input file into output directory {}", output); }
From source file:com.streamsets.pipeline.stage.lib.hive.HiveMetastoreUtil.java
License:Apache License
/** * Returns the hdfs paths where the avro schema is stored after serializing. * Path is appended with current time so as to have an ordering. * @param rootTableLocation Root Table Location * @return Hdfs Path String.//from w w w . j a v a 2 s .c o m */ public static String serializeSchemaToHDFS(UserGroupInformation loginUGI, final FileSystem fs, final String rootTableLocation, final String schemaJson) throws StageException { final String folderPath = rootTableLocation + HiveMetastoreUtil.SEP + HiveMetastoreUtil.HDFS_SCHEMA_FOLDER_NAME; final Path schemasFolderPath = new Path(folderPath); final String path = folderPath + SEP + HiveMetastoreUtil.AVRO_SCHEMA + DateFormatUtils.format(new Date(System.currentTimeMillis()), "yyyy-MM-dd--HH_mm_ss"); try { loginUGI.doAs(new PrivilegedExceptionAction<Void>() { @Override public Void run() throws Exception { if (!fs.exists(schemasFolderPath)) { fs.mkdirs(schemasFolderPath); } Path schemaFilePath = new Path(path); //This will never happen unless two HMS targets are writing, we will error out for this //and let user handle this via error record handling. if (!fs.exists(schemaFilePath)) { try (FSDataOutputStream os = fs.create(schemaFilePath)) { os.writeChars(schemaJson); } } else { LOG.error(Utils.format("Already schema file {} exists in HDFS", path)); throw new IOException("Already schema file exists"); } return null; } }); } catch (Exception e) { LOG.error("Error in Writing Schema to HDFS: " + e.toString(), e); throw new StageException(Errors.HIVE_18, path, e.getMessage()); } return path; }
From source file:com.streamsets.pipeline.stage.origin.hdfs.cluster.ClusterHDFSSourceIT.java
License:Apache License
@BeforeClass public static void setUpBeforeClass() throws Exception { minidfsDir = new File("target/minidfs-" + UUID.randomUUID()).getAbsoluteFile(); minidfsDir.mkdirs();/*w ww . j ava 2 s . com*/ Assert.assertTrue(minidfsDir.exists()); System.setProperty(MiniDFSCluster.PROP_TEST_BUILD_DATA, minidfsDir.getPath()); Configuration conf = new HdfsConfiguration(); conf.set("dfs.namenode.fs-limits.min-block-size", String.valueOf(32)); EditLogFileOutputStream.setShouldSkipFsyncForTesting(true); miniDFS = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); dir = new Path(miniDFS.getURI() + "/dir"); FileSystem fs = miniDFS.getFileSystem(); fs.mkdirs(dir); writeFile(fs, new Path(dir + "/forAllTests/" + "path"), 1000); dummyEtc = new File(minidfsDir, "dummy-etc"); dummyEtc.mkdirs(); Assert.assertTrue(dummyEtc.exists()); Configuration dummyConf = new Configuration(false); for (String file : new String[] { "core", "hdfs", "mapred", "yarn" }) { File siteXml = new File(dummyEtc, file + "-site.xml"); FileOutputStream out = new FileOutputStream(siteXml); dummyConf.writeXml(out); out.close(); } resourcesDir = minidfsDir.getAbsolutePath(); hadoopConfDir = dummyEtc.getName(); System.setProperty("sdc.resources.dir", resourcesDir); ; }