List of usage examples for org.apache.hadoop.fs Path getParent
public Path getParent()
From source file:org.apache.drill.test.framework.TestDriver.java
License:Apache License
private static void dfsCopy(Path src, Path dest, String fsMode) throws IOException { FileSystem fs;/*w w w.j a v a2 s .c om*/ FileSystem localFs = FileSystem.getLocal(conf); if (fsMode.equals(LOCALFS)) { fs = FileSystem.getLocal(conf); } else { fs = FileSystem.get(conf); } try { if (localFs.getFileStatus(src).isDirectory()) { for (FileStatus file : localFs.listStatus(src)) { Path srcChild = file.getPath(); Path newDest = new Path(dest + "/" + srcChild.getName()); dfsCopy(srcChild, newDest, fsMode); } } else { if (!fs.exists(dest.getParent())) { fs.mkdirs(dest.getParent()); } if (!fs.exists(dest)) { FileUtil.copy(localFs, src, fs, dest, false, fs.getConf()); LOG.debug("Copying file " + src + " to " + dest); } else { LOG.debug("File " + src + " already exists as " + dest); } } } catch (FileAlreadyExistsException e) { LOG.debug("File " + src + " already exists as " + dest); } catch (IOException e) { LOG.debug("File " + src + " already exists as " + dest); } }
From source file:org.apache.drill.yarn.core.DfsFacade.java
License:Apache License
public void removeDrillFile(String fileName) throws DfsFacadeException { Path destPath = getUploadPath(fileName); try {/*from w ww .ja v a2 s. c om*/ fs.delete(destPath, false); } catch (IOException e) { throw new DfsFacadeException("Failed to delete file: " + destPath.toString(), e); } // Remove the Drill directory, but only if it is now empty. Path dir = destPath.getParent(); try { RemoteIterator<FileStatus> iter = fs.listStatusIterator(dir); if (!iter.hasNext()) { fs.delete(dir, false); } } catch (IOException e) { throw new DfsFacadeException("Failed to delete directory: " + dir.toString(), e); } }
From source file:org.apache.druid.indexer.updater.HadoopConverterJob.java
License:Apache License
public List<DataSegment> run() throws IOException { final JobConf jobConf = new JobConf(); jobConf.setKeepFailedTaskFiles(false); for (Map.Entry<String, String> entry : converterConfig.getHadoopProperties().entrySet()) { jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()"); }/*from w ww . j av a 2s . c o m*/ final List<DataSegment> segments = converterConfig.getSegments(); if (segments.isEmpty()) { throw new IAE("No segments found for datasource [%s]", converterConfig.getDataSource()); } converterConfigIntoConfiguration(converterConfig, segments, jobConf); jobConf.setNumReduceTasks(0); // Map only. Number of map tasks determined by input format jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache())); setJobName(jobConf, segments); if (converterConfig.getJobPriority() != null) { jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority())); } final Job job = Job.getInstance(jobConf); job.setInputFormatClass(ConfigInputFormat.class); job.setMapperClass(ConvertingMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setMapSpeculativeExecution(false); job.setOutputFormatClass(ConvertingOutputFormat.class); JobHelper.setupClasspath(JobHelper.distributedClassPath(jobConf.getWorkingDirectory()), JobHelper.distributedClassPath(getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())), job); Throwable throwable = null; try { job.submit(); log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL()); final boolean success = job.waitForCompletion(true); if (!success) { final TaskReport[] reports = job.getTaskReports(TaskType.MAP); if (reports != null) { for (final TaskReport report : reports) { log.error("Error in task [%s] : %s", report.getTaskId(), Arrays.toString(report.getDiagnostics())); } } return null; } try { loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue(); writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue(); } catch (IOException ex) { log.error(ex, "Could not fetch counters"); } final JobID jobID = job.getJobID(); final Path jobDir = getJobPath(jobID, job.getWorkingDirectory()); final FileSystem fs = jobDir.getFileSystem(job.getConfiguration()); final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true); final List<Path> goodPaths = new ArrayList<>(); while (it.hasNext()) { final LocatedFileStatus locatedFileStatus = it.next(); if (locatedFileStatus.isFile()) { final Path myPath = locatedFileStatus.getPath(); if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) { goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY)); } } } if (goodPaths.isEmpty()) { log.warn("No good data found at [%s]", jobDir); return null; } final List<DataSegment> returnList = ImmutableList .copyOf(Lists.transform(goodPaths, new Function<Path, DataSegment>() { @Nullable @Override public DataSegment apply(final Path input) { try { if (!fs.exists(input)) { throw new ISE("Somehow [%s] was found but [%s] is missing at [%s]", ConvertingOutputFormat.DATA_SUCCESS_KEY, ConvertingOutputFormat.DATA_FILE_KEY, jobDir); } } catch (final IOException e) { throw Throwables.propagate(e); } try (final InputStream stream = fs.open(input)) { return HadoopDruidConverterConfig.jsonMapper.readValue(stream, DataSegment.class); } catch (final IOException e) { throw Throwables.propagate(e); } } })); if (returnList.size() == segments.size()) { return returnList; } else { throw new ISE( "Tasks reported success but result length did not match! Expected %d found %d at path [%s]", segments.size(), returnList.size(), jobDir); } } catch (InterruptedException | ClassNotFoundException e) { RuntimeException exception = Throwables.propagate(e); throwable = exception; throw exception; } catch (Throwable t) { throwable = t; throw t; } finally { try { cleanup(job); } catch (IOException e) { if (throwable != null) { throwable.addSuppressed(e); } else { log.error(e, "Could not clean up job [%s]", job.getJobID()); } } } }
From source file:org.apache.druid.storage.hdfs.HdfsDataSegmentFinder.java
License:Apache License
@Override public Set<DataSegment> findSegments(String workingDirPathStr, boolean updateDescriptor) throws SegmentLoadingException { final Map<String, Pair<DataSegment, Long>> timestampedSegments = new HashMap<>(); final Path workingDirPath = new Path(workingDirPathStr); FileSystem fs;//from w w w. jav a 2 s. c o m try { fs = workingDirPath.getFileSystem(config); log.info(fs.getScheme()); log.info("FileSystem URI:" + fs.getUri().toString()); if (!fs.exists(workingDirPath)) { throw new SegmentLoadingException("Working directory [%s] doesn't exist.", workingDirPath); } if (!fs.isDirectory(workingDirPath)) { throw new SegmentLoadingException("Working directory [%s] is not a directory!?", workingDirPath); } final RemoteIterator<LocatedFileStatus> it = fs.listFiles(workingDirPath, true); while (it.hasNext()) { final LocatedFileStatus locatedFileStatus = it.next(); final Path path = locatedFileStatus.getPath(); if (path.getName().endsWith("descriptor.json")) { // There are 3 supported path formats: // - hdfs://nn1/hdfs_base_directory/data_source_name/interval/version/shardNum/descriptor.json // - hdfs://nn1/hdfs_base_directory/data_source_name/interval/version/shardNum_descriptor.json // - hdfs://nn1/hdfs_base_directory/data_source_name/interval/version/shardNum_UUID_descriptor.json final String descriptorParts[] = path.getName().split("_"); Path indexZip = new Path(path.getParent(), "index.zip"); if (descriptorParts.length > 1) { Preconditions .checkState( descriptorParts.length <= 3 && org.apache.commons.lang.StringUtils.isNumeric(descriptorParts[0]) && "descriptor.json" .equals(descriptorParts[descriptorParts.length - 1]), "Unexpected descriptor filename format [%s]", path); indexZip = new Path(path.getParent(), StringUtils.format("%s_%sindex.zip", descriptorParts[0], descriptorParts.length == 2 ? "" : descriptorParts[1] + "_")); } if (fs.exists(indexZip)) { final DataSegment dataSegment = mapper.readValue(fs.open(path), DataSegment.class); log.info("Found segment [%s] located at [%s]", dataSegment.getIdentifier(), indexZip); final Map<String, Object> loadSpec = dataSegment.getLoadSpec(); final String pathWithoutScheme = indexZip.toUri().getPath(); if (!loadSpec.get("type").equals(HdfsStorageDruidModule.SCHEME) || !loadSpec.get("path").equals(pathWithoutScheme)) { loadSpec.put("type", HdfsStorageDruidModule.SCHEME); loadSpec.put("path", pathWithoutScheme); if (updateDescriptor) { log.info("Updating loadSpec in descriptor.json at [%s] with new path [%s]", path, pathWithoutScheme); mapper.writeValue(fs.create(path, true), dataSegment); } } DataSegmentFinder.putInMapRetainingNewest(timestampedSegments, dataSegment, locatedFileStatus.getModificationTime()); } else { throw new SegmentLoadingException( "index.zip didn't exist at [%s] while descripter.json exists!?", indexZip); } } } } catch (IOException e) { throw new SegmentLoadingException(e, "Problems interacting with filesystem[%s].", workingDirPath); } return timestampedSegments.values().stream().map(x -> x.lhs).collect(Collectors.toSet()); }
From source file:org.apache.druid.storage.hdfs.HdfsDataSegmentKiller.java
License:Apache License
@Override public void kill(DataSegment segment) throws SegmentLoadingException { final Path segmentPath = getPath(segment); log.info("Killing segment[%s] mapped to path[%s]", segment.getId(), segmentPath); try {/* w w w.java 2s.c o m*/ String filename = segmentPath.getName(); final FileSystem fs = segmentPath.getFileSystem(config); if (!filename.endsWith(".zip")) { throw new SegmentLoadingException("Unknown file type[%s]", segmentPath); } else { if (!fs.exists(segmentPath)) { log.warn("Segment path [%s] does not exist", segmentPath); return; } // There are 3 supported path formats: // - hdfs://nn1/hdfs_base_directory/data_source_name/interval/version/shardNum/index.zip // - hdfs://nn1/hdfs_base_directory/data_source_name/interval/version/shardNum_index.zip // - hdfs://nn1/hdfs_base_directory/data_source_name/interval/version/shardNum_UUID_index.zip String[] zipParts = filename.split("_"); Path descriptorPath = new Path(segmentPath.getParent(), "descriptor.json"); if (zipParts.length > 1) { Preconditions.checkState( zipParts.length <= 3 && StringUtils.isNumeric(zipParts[0]) && "index.zip".equals(zipParts[zipParts.length - 1]), "Unexpected segmentPath format [%s]", segmentPath); descriptorPath = new Path(segmentPath.getParent(), org.apache.druid.java.util.common.StringUtils.format("%s_%sdescriptor.json", zipParts[0], zipParts.length == 2 ? "" : zipParts[1] + "_")); } if (!fs.delete(segmentPath, false)) { throw new SegmentLoadingException("Unable to kill segment, failed to delete [%s]", segmentPath.toString()); } // descriptor.json is a file to store segment metadata in deep storage. This file is deprecated and not stored // anymore, but we still delete them if exists. fs.delete(descriptorPath, false); removeEmptyParentDirectories(fs, segmentPath, zipParts.length > 1 ? 2 : 3); } } catch (IOException e) { throw new SegmentLoadingException(e, "Unable to kill segment"); } }
From source file:org.apache.druid.storage.hdfs.HdfsDataSegmentKiller.java
License:Apache License
private void removeEmptyParentDirectories(final FileSystem fs, final Path segmentPath, final int depth) { Path path = segmentPath; try {//from w w w. ja v a2s . c o m for (int i = 1; i <= depth; i++) { path = path.getParent(); if (fs.listStatus(path).length != 0 || !fs.delete(path, false)) { break; } } } catch (Exception e) { log.makeAlert(e, "uncaught exception during segment killer").emit(); } }
From source file:org.apache.druid.storage.hdfs.HdfsDataSegmentPusher.java
License:Apache License
@Override public DataSegment push(final File inDir, final DataSegment segment, final boolean useUniquePath) throws IOException { // For HDFS, useUniquePath does not affect the directory tree but instead affects the filename, which is of the form // '{partitionNum}_index.zip' without unique paths and '{partitionNum}_{UUID}_index.zip' with unique paths. final String storageDir = this.getStorageDir(segment, false); log.info("Copying segment[%s] to HDFS at location[%s/%s]", segment.getId(), fullyQualifiedStorageDirectory.get(), storageDir); Path tmpIndexFile = new Path(StringUtils.format("%s/%s/%s/%s_index.zip", fullyQualifiedStorageDirectory.get(), segment.getDataSource(), UUIDUtils.generateUuid(), segment.getShardSpec().getPartitionNum())); FileSystem fs = tmpIndexFile.getFileSystem(hadoopConfig); fs.mkdirs(tmpIndexFile.getParent()); log.info("Compressing files from[%s] to [%s]", inDir, tmpIndexFile); final long size; final DataSegment dataSegment; try {//w w w.ja v a 2 s.c om try (FSDataOutputStream out = fs.create(tmpIndexFile)) { size = CompressionUtils.zip(inDir, out); } final String uniquePrefix = useUniquePath ? DataSegmentPusher.generateUniquePath() + "_" : ""; final Path outIndexFile = new Path( StringUtils.format("%s/%s/%d_%sindex.zip", fullyQualifiedStorageDirectory.get(), storageDir, segment.getShardSpec().getPartitionNum(), uniquePrefix)); dataSegment = segment.withLoadSpec(makeLoadSpec(outIndexFile.toUri())).withSize(size) .withBinaryVersion(SegmentUtils.getVersionFromDir(inDir)); // Create parent if it does not exist, recreation is not an error fs.mkdirs(outIndexFile.getParent()); copyFilesWithChecks(fs, tmpIndexFile, outIndexFile); } finally { try { if (fs.exists(tmpIndexFile.getParent()) && !fs.delete(tmpIndexFile.getParent(), true)) { log.error("Failed to delete temp directory[%s]", tmpIndexFile.getParent()); } } catch (IOException ex) { log.error(ex, "Failed to delete temp directory[%s]", tmpIndexFile.getParent()); } } return dataSegment; }
From source file:org.apache.druid.storage.hdfs.HdfsFileTimestampVersionFinder.java
License:Apache License
/** * Returns the latest modified file at the uri of interest. * * @param uri Either a directory or a file on HDFS. If it is a file, the parent directory will be searched. * @param pattern A pattern matcher for file names in the directory of interest. Passing `null` results in matching any file in the directory. * * @return The URI of the file with the most recent modified timestamp. *//*from w w w . j ava2 s. c om*/ @Override public URI getLatestVersion(final URI uri, final @Nullable Pattern pattern) { final Path path = new Path(uri); try { return RetryUtils.retry(() -> { final FileSystem fs = path.getFileSystem(config); if (!fs.exists(path)) { return null; } return mostRecentInDir(fs.isDirectory(path) ? path : path.getParent(), pattern); }, shouldRetryPredicate(), DEFAULT_RETRY_COUNT); } catch (Exception e) { throw new RuntimeException(e); } }
From source file:org.apache.falcon.cleanup.AbstractCleanupHandler.java
License:Apache License
private void deleteParentIfEmpty(FileSystem fs, Path parent) throws IOException { FileStatus[] files = fs.listStatus(parent); if (files != null && files.length == 0) { LOG.info("Parent path: {} is empty, deleting path", parent); fs.delete(parent, true);//from ww w . jav a2s .c o m deleteParentIfEmpty(fs, parent.getParent()); } }
From source file:org.apache.falcon.converter.OozieProcessMapper.java
License:Apache License
protected void createWorkflow(Cluster cluster, Process process, Workflow processWorkflow, String wfName, Path parentWfPath) throws FalconException { WORKFLOWAPP wfApp = getWorkflowTemplate(DEFAULT_WF_TEMPLATE); wfApp.setName(wfName);//from ww w .j a v a 2s . c o m try { addLibExtensionsToWorkflow(cluster, wfApp, EntityType.PROCESS, null); } catch (IOException e) { throw new FalconException("Failed to add library extensions for the workflow", e); } String userWfPath = getUserWorkflowPath(cluster, parentWfPath.getParent()).toString(); EngineType engineType = processWorkflow.getEngine(); for (Object object : wfApp.getDecisionOrForkOrJoin()) { if (!(object instanceof ACTION)) { continue; } ACTION action = (ACTION) object; String actionName = action.getName(); if (engineType == EngineType.OOZIE && actionName.equals("user-oozie-workflow")) { action.getSubWorkflow().setAppPath("${nameNode}" + userWfPath); } else if (engineType == EngineType.PIG && actionName.equals("user-pig-job")) { decoratePIGAction(cluster, process, processWorkflow, action.getPig(), parentWfPath); } else if (engineType == EngineType.HIVE && actionName.equals("user-hive-job")) { decorateHiveAction(cluster, process, processWorkflow, action, parentWfPath); } else if (FALCON_ACTIONS.contains(actionName)) { decorateWithOozieRetries(action); } } //Create parent workflow marshal(cluster, wfApp, parentWfPath); }