List of usage examples for org.apache.hadoop.fs Path getName
public String getName()
From source file:com.splunk.shuttl.prototype.symlink.BucketBlockSymlinkPrototypeTest.java
License:Apache License
private void handleRawdataDirectory(FileStatus fs, File thawBucket) throws IOException { Path rawdataOnHadoop = fs.getPath(); String rawdataName = rawdataOnHadoop.getName(); assertEquals("rawdata", rawdataName); File rawdataInThaw = new File(thawBucket, rawdataName); assertTrue(rawdataInThaw.mkdirs()); // Create the rawdata directory FileStatus[] lsInRawdata = hadoopFileSystem.listStatus(fs.getPath()); for (FileStatus fs2 : lsInRawdata) { if (fs2.isDir()) throw new IllegalStateException("Cannot be another " + "dir in the rawdata dir"); else/*from ww w .ja v a 2 s . com*/ createSymlinkToPathInDir(fs2.getPath(), rawdataInThaw); } }
From source file:com.splunk.shuttl.prototype.symlink.BucketBlockSymlinkPrototypeTest.java
License:Apache License
private void createSymlinkToPathInDir(Path path, File dir) throws IOException { File fileInDir = new File(dir, path.getName()); DistributedFileSystem dfs = (DistributedFileSystem) hadoopFileSystem; ClientProtocol namenode = dfs.getClient().namenode; String pathOnHadoop = path.toUri().getPath(); LocatedBlocks blockLocations = namenode.getBlockLocations(pathOnHadoop, 0, Long.MAX_VALUE); List<LocatedBlock> locatedBlocks = blockLocations.getLocatedBlocks(); if (!locatedBlocks.isEmpty()) { doSymlinkPathInDir(fileInDir, blockLocations, locatedBlocks); } else {//from www .j av a2 s. c om // Means that they don't have a block and that they are empty files. Just // create them. assertTrue(fileInDir.createNewFile()); } }
From source file:com.streamsets.pipeline.stage.destination.hdfs.metadataexecutor.HdfsMetadataExecutor.java
License:Apache License
@Override public void write(Batch batch) throws StageException { final ELVars variables = getContext().createELVars(); final FileSystem fs = hdfsConnection.getFs(); Iterator<Record> it = batch.getRecords(); while (it.hasNext()) { Record record = it.next();/*w ww . ja v a2 s .c om*/ RecordEL.setRecordInContext(variables, record); // Execute all configured HDFS metadata operations as target user try { hdfsConnection.getUGI().doAs((PrivilegedExceptionAction<Void>) () -> { Path workingFile = new Path(evaluate(variables, "filePath", actions.filePath)); LOG.info("Working on file: " + workingFile); // Create empty file if configured if (actions.taskType == TaskType.CREATE_EMPTY_FILE) { ensureDirectoryExists(fs, workingFile.getParent()); if (!fs.createNewFile(workingFile)) { throw new IOException("Can't create file (probably already exists): " + workingFile); } } if (actions.taskType == TaskType.CHANGE_EXISTING_FILE && (actions.shouldMoveFile || actions.shouldRename)) { Path newPath = workingFile.getParent(); String newName = workingFile.getName(); if (actions.shouldMoveFile) { newPath = new Path(evaluate(variables, "newLocation", actions.newLocation)); } if (actions.shouldRename) { newName = evaluate(variables, "newName", actions.newName); } Path destinationFile = new Path(newPath, newName); ensureDirectoryExists(fs, newPath); LOG.debug("Renaming to: {}", destinationFile); if (!fs.rename(workingFile, destinationFile)) { throw new IOException( Utils.format("Can't rename '{}' to '{}''", workingFile, destinationFile)); } workingFile = destinationFile; } if (actions.taskType.isOneOf(TaskType.CHANGE_EXISTING_FILE, TaskType.CREATE_EMPTY_FILE)) { if (actions.shouldChangeOwnership) { String newOwner = evaluate(variables, "newOwner", actions.newOwner); String newGroup = evaluate(variables, "newGroup", actions.newGroup); LOG.debug("Applying ownership: user={} and group={}", newOwner, newGroup); fs.setOwner(workingFile, newOwner, newGroup); } if (actions.shouldSetPermissions) { String stringPerms = evaluate(variables, "newPermissions", actions.newPermissions); FsPermission fsPerms = HdfsUtils.parseFsPermission(stringPerms); LOG.debug("Applying permissions: {} loaded from value '{}'", fsPerms, stringPerms); fs.setPermission(workingFile, fsPerms); } if (actions.shouldSetAcls) { String stringAcls = evaluate(variables, "newAcls", actions.newAcls); List<AclEntry> acls = AclEntry.parseAclSpec(stringAcls, true); LOG.debug("Applying ACLs: {}", stringAcls); fs.setAcl(workingFile, acls); } } if (actions.taskType == TaskType.REMOVE_FILE) { fs.delete(workingFile, true); } // Issue event with the final file name (e.g. the renamed one if applicable) actions.taskType.getEventCreator().create(getContext()).with("filepath", workingFile.toString()) .with("filename", workingFile.getName()).createAndSend(); LOG.debug("Done changing metadata on file: {}", workingFile); return null; }); } catch (Throwable e) { // Hadoop libraries will wrap any non InterruptedException, RuntimeException, Error or IOException to UndeclaredThrowableException, // so we manually unwrap it here and properly propagate it to user. if (e instanceof UndeclaredThrowableException) { e = e.getCause(); } LOG.error("Failure when applying metadata changes to HDFS", e); errorRecordHandler.onError( new OnRecordErrorException(record, HdfsMetadataErrors.HDFS_METADATA_000, e.getMessage())); } } }
From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestRecordWriterManager.java
License:Apache License
private int getFinalFileNameCount(FileSystem fs, Path dir, final String prefix) throws IOException { return fs.listStatus(dir, new PathFilter() { @Override/* w w w . ja v a2 s.c o m*/ public boolean accept(Path path) { return path.getName().startsWith(prefix); } }).length; }
From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.WholeFileFormatFsHelper.java
License:Apache License
private Path getRenamablePath(FileSystem fs, Path tempPath) throws IOException, OnRecordErrorException { String finalFileName = tempPath.getName().replaceFirst(RecordWriterManager.TMP_FILE_PREFIX, ""); Path finalPath = new Path(tempPath.getParent(), finalFileName); //Checks during rename. checkAndHandleWholeFileExistence(fs, finalPath); return finalPath; }
From source file:com.streamsets.pipeline.stage.destination.mapreduce.jobtype.avroconvert.AvroConversionBaseMapper.java
License:Apache License
@Override protected void map(String input, String output, Context context) throws IOException, InterruptedException { FileSystem fs = FileSystem.get(context.getConfiguration()); Configuration conf = context.getConfiguration(); LOG.info("Converting input file: {}", input); LOG.info("Output directory: {}", output); Path inputPath = new Path(input); Path outputDir = new Path(output); fs.mkdirs(outputDir);/*from ww w . j a va 2s .com*/ Path tempFile = new Path(outputDir, getTempFilePrefix() + inputPath.getName()); if (fs.exists(tempFile)) { if (conf.getBoolean(AvroConversionCommonConstants.OVERWRITE_TMP_FILE, false)) { fs.delete(tempFile, true); } else { throw new IOException("Temporary file " + tempFile + " already exists."); } } LOG.info("Using temp file: {}", tempFile); // Output file is the same as input except of dropping .avro extension if it exists and appending .parquet or .orc String outputFileName = inputPath.getName().replaceAll("\\.avro$", "") + getOutputFileSuffix(); Path finalFile = new Path(outputDir, outputFileName); LOG.info("Final path will be: {}", finalFile); // Avro reader SeekableInput seekableInput = new FsInput(inputPath, conf); DatumReader<GenericRecord> reader = new GenericDatumReader<>(); FileReader<GenericRecord> fileReader = DataFileReader.openReader(seekableInput, reader); Schema avroSchema = fileReader.getSchema(); initializeWriter(tempFile, avroSchema, conf, context); LOG.info("Started reading input file"); long recordCount = 0; try { while (fileReader.hasNext()) { GenericRecord record = fileReader.next(); handleAvroRecord(record); context.getCounter(Counters.PROCESSED_RECORDS).increment(1); recordCount++; } } catch (Exception e) { // Various random stuff can happen while converting, so we wrap the underlying exception with more details String message = String.format("Exception at offset %d (record %d): %s", fileReader.tell(), recordCount, e.toString()); throw new IOException(message, e); } LOG.info("Done reading input file"); closeWriter(); LOG.info("Moving temporary file {} to final destination {}", tempFile, finalFile); fs.rename(tempFile, finalFile); if (!context.getConfiguration().getBoolean(AvroConversionCommonConstants.KEEP_INPUT_FILE, false)) { LOG.info("Removing input file", inputPath); fs.delete(inputPath, true); } LOG.info("Done converting input file into output directory {}", output); }
From source file:com.streamsets.pipeline.stage.origin.hdfs.spooler.HdfsFileSystem.java
License:Apache License
public void addFiles(WrappedFile dirFile, WrappedFile startingFile, List<WrappedFile> toProcess, boolean includeStartingFile, boolean useLastModified) throws IOException { final long scanTime = System.currentTimeMillis(); PathFilter pathFilter = new PathFilter() { @Override/*from w w w.j a v a 2s . c o m*/ public boolean accept(Path entry) { try { FileStatus fileStatus = fs.getFileStatus(entry); if (fileStatus.isDirectory()) { return false; } if (!patternMatches(entry.getName())) { return false; } HdfsFile hdfsFile = new HdfsFile(fs, entry); // SDC-3551: Pick up only files with mtime strictly less than scan time. if (fileStatus.getModificationTime() < scanTime) { if (startingFile == null || startingFile.toString().isEmpty()) { toProcess.add(hdfsFile); } else { int compares = compare(hdfsFile, startingFile, useLastModified); if (includeStartingFile) { if (compares >= 0) { toProcess.add(hdfsFile); } } else { if (compares > 0) { toProcess.add(hdfsFile); } } } } } catch (IOException ex) { LOG.error("Failed to open file {}", entry.toString()); } return false; } }; fs.globStatus(new Path(dirFile.getAbsolutePath(), "*"), pathFilter); }
From source file:com.streamsets.pipeline.stage.origin.hdfs.spooler.HdfsFileSystem.java
License:Apache License
public void archiveFiles(WrappedFile archiveDirPath, List<WrappedFile> toProcess, long timeThreshold) throws IOException { PathFilter pathFilter = new PathFilter() { @Override//from ww w . j a v a 2s.com public boolean accept(Path entry) { try { if (!patternMatches(entry.getName())) { return false; } if (timeThreshold - fs.getFileStatus(entry).getModificationTime() > 0) { toProcess.add(new HdfsFile(fs, entry)); } } catch (IOException ex) { LOG.debug("Failed to open file {}", entry.toString()); } return false; } }; Path path = new Path(archiveDirPath.getAbsolutePath(), "*"); fs.globStatus(path, pathFilter); if (processSubdirectories) { fs.globStatus(new Path(path, "*"), pathFilter); } }
From source file:com.streamsets.pipeline.stage.origin.hdfs.spooler.HdfsFileSystem.java
License:Apache License
public void handleOldFiles(WrappedFile dirpath, WrappedFile startingFile, boolean useLastModified, List<WrappedFile> toProcess) throws IOException { PathFilter pathFilter = new PathFilter() { @Override//from w w w.j a v a2 s .c o m public boolean accept(Path entry) { if (!patternMatches(entry.getName())) { LOG.debug("Ignoring old file '{}' that do not match the file name pattern '{}'", entry.getName(), filePattern); return false; } if (startingFile == null) { return false; } if (compare(new HdfsFile(fs, entry), startingFile, useLastModified) < 0) { toProcess.add(new HdfsFile(fs, entry)); } return false; } }; Path path = new Path(dirpath.getAbsolutePath(), "*"); fs.globStatus(path, pathFilter); if (processSubdirectories) { fs.globStatus(new Path(path, "*"), pathFilter); } }
From source file:com.synerzip.analytics.commoncrawl.common.WarcFileFilter.java
License:Apache License
public boolean accept(Path path) { if (!path.getName().contains(filter)) { return false; }/*ww w. j a v a2 s . c o m*/ if (max < 0) { return true; } if (max < ++count) { return false; } return true; }