Example usage for org.apache.hadoop.fs Path getParent

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getParent.

Prototype

public Path getParent()

Source Link

Document

Returns the parent of a path or null if at root.

Usage

From source file:org.apache.drill.test.framework.TestDriver.java

License:Apache License

private static void dfsCopy(Path src, Path dest, String fsMode) throws IOException {

    FileSystem fs;/*w  w  w.j  a v a2  s .c  om*/
    FileSystem localFs = FileSystem.getLocal(conf);

    if (fsMode.equals(LOCALFS)) {
        fs = FileSystem.getLocal(conf);
    } else {
        fs = FileSystem.get(conf);
    }

    try {
        if (localFs.getFileStatus(src).isDirectory()) {
            for (FileStatus file : localFs.listStatus(src)) {
                Path srcChild = file.getPath();
                Path newDest = new Path(dest + "/" + srcChild.getName());
                dfsCopy(srcChild, newDest, fsMode);
            }
        } else {
            if (!fs.exists(dest.getParent())) {
                fs.mkdirs(dest.getParent());
            }
            if (!fs.exists(dest)) {
                FileUtil.copy(localFs, src, fs, dest, false, fs.getConf());
                LOG.debug("Copying file " + src + " to " + dest);
            } else {
                LOG.debug("File " + src + " already exists as " + dest);
            }
        }
    } catch (FileAlreadyExistsException e) {
        LOG.debug("File " + src + " already exists as " + dest);
    } catch (IOException e) {
        LOG.debug("File " + src + " already exists as " + dest);
    }
}

From source file:org.apache.drill.yarn.core.DfsFacade.java

License:Apache License

public void removeDrillFile(String fileName) throws DfsFacadeException {
    Path destPath = getUploadPath(fileName);
    try {/*from   w ww .ja  v a2  s.  c om*/
        fs.delete(destPath, false);
    } catch (IOException e) {
        throw new DfsFacadeException("Failed to delete file: " + destPath.toString(), e);
    }

    // Remove the Drill directory, but only if it is now empty.

    Path dir = destPath.getParent();
    try {
        RemoteIterator<FileStatus> iter = fs.listStatusIterator(dir);
        if (!iter.hasNext()) {
            fs.delete(dir, false);
        }
    } catch (IOException e) {
        throw new DfsFacadeException("Failed to delete directory: " + dir.toString(), e);
    }
}

From source file:org.apache.druid.indexer.updater.HadoopConverterJob.java

License:Apache License

public List<DataSegment> run() throws IOException {
    final JobConf jobConf = new JobConf();
    jobConf.setKeepFailedTaskFiles(false);
    for (Map.Entry<String, String> entry : converterConfig.getHadoopProperties().entrySet()) {
        jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()");
    }/*from w ww .  j av  a  2s .  c o  m*/
    final List<DataSegment> segments = converterConfig.getSegments();
    if (segments.isEmpty()) {
        throw new IAE("No segments found for datasource [%s]", converterConfig.getDataSource());
    }
    converterConfigIntoConfiguration(converterConfig, segments, jobConf);

    jobConf.setNumReduceTasks(0); // Map only. Number of map tasks determined by input format
    jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache()));

    setJobName(jobConf, segments);

    if (converterConfig.getJobPriority() != null) {
        jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority()));
    }

    final Job job = Job.getInstance(jobConf);

    job.setInputFormatClass(ConfigInputFormat.class);
    job.setMapperClass(ConvertingMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setMapSpeculativeExecution(false);
    job.setOutputFormatClass(ConvertingOutputFormat.class);

    JobHelper.setupClasspath(JobHelper.distributedClassPath(jobConf.getWorkingDirectory()),
            JobHelper.distributedClassPath(getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())),
            job);

    Throwable throwable = null;
    try {
        job.submit();
        log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());
        final boolean success = job.waitForCompletion(true);
        if (!success) {
            final TaskReport[] reports = job.getTaskReports(TaskType.MAP);
            if (reports != null) {
                for (final TaskReport report : reports) {
                    log.error("Error in task [%s] : %s", report.getTaskId(),
                            Arrays.toString(report.getDiagnostics()));
                }
            }
            return null;
        }
        try {
            loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue();
            writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue();
        } catch (IOException ex) {
            log.error(ex, "Could not fetch counters");
        }
        final JobID jobID = job.getJobID();

        final Path jobDir = getJobPath(jobID, job.getWorkingDirectory());
        final FileSystem fs = jobDir.getFileSystem(job.getConfiguration());
        final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true);
        final List<Path> goodPaths = new ArrayList<>();
        while (it.hasNext()) {
            final LocatedFileStatus locatedFileStatus = it.next();
            if (locatedFileStatus.isFile()) {
                final Path myPath = locatedFileStatus.getPath();
                if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) {
                    goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY));
                }
            }
        }
        if (goodPaths.isEmpty()) {
            log.warn("No good data found at [%s]", jobDir);
            return null;
        }
        final List<DataSegment> returnList = ImmutableList
                .copyOf(Lists.transform(goodPaths, new Function<Path, DataSegment>() {
                    @Nullable
                    @Override
                    public DataSegment apply(final Path input) {
                        try {
                            if (!fs.exists(input)) {
                                throw new ISE("Somehow [%s] was found but [%s] is missing at [%s]",
                                        ConvertingOutputFormat.DATA_SUCCESS_KEY,
                                        ConvertingOutputFormat.DATA_FILE_KEY, jobDir);
                            }
                        } catch (final IOException e) {
                            throw Throwables.propagate(e);
                        }
                        try (final InputStream stream = fs.open(input)) {
                            return HadoopDruidConverterConfig.jsonMapper.readValue(stream, DataSegment.class);
                        } catch (final IOException e) {
                            throw Throwables.propagate(e);
                        }
                    }
                }));
        if (returnList.size() == segments.size()) {
            return returnList;
        } else {
            throw new ISE(
                    "Tasks reported success but result length did not match! Expected %d found %d at path [%s]",
                    segments.size(), returnList.size(), jobDir);
        }
    } catch (InterruptedException | ClassNotFoundException e) {
        RuntimeException exception = Throwables.propagate(e);
        throwable = exception;
        throw exception;
    } catch (Throwable t) {
        throwable = t;
        throw t;
    } finally {
        try {
            cleanup(job);
        } catch (IOException e) {
            if (throwable != null) {
                throwable.addSuppressed(e);
            } else {
                log.error(e, "Could not clean up job [%s]", job.getJobID());
            }
        }
    }
}

From source file:org.apache.druid.storage.hdfs.HdfsDataSegmentFinder.java

License:Apache License

@Override
public Set<DataSegment> findSegments(String workingDirPathStr, boolean updateDescriptor)
        throws SegmentLoadingException {
    final Map<String, Pair<DataSegment, Long>> timestampedSegments = new HashMap<>();
    final Path workingDirPath = new Path(workingDirPathStr);
    FileSystem fs;//from  w w  w. jav a  2  s. c o m
    try {
        fs = workingDirPath.getFileSystem(config);

        log.info(fs.getScheme());
        log.info("FileSystem URI:" + fs.getUri().toString());

        if (!fs.exists(workingDirPath)) {
            throw new SegmentLoadingException("Working directory [%s] doesn't exist.", workingDirPath);
        }

        if (!fs.isDirectory(workingDirPath)) {
            throw new SegmentLoadingException("Working directory [%s] is not a directory!?", workingDirPath);
        }

        final RemoteIterator<LocatedFileStatus> it = fs.listFiles(workingDirPath, true);
        while (it.hasNext()) {
            final LocatedFileStatus locatedFileStatus = it.next();
            final Path path = locatedFileStatus.getPath();
            if (path.getName().endsWith("descriptor.json")) {

                // There are 3 supported path formats:
                //    - hdfs://nn1/hdfs_base_directory/data_source_name/interval/version/shardNum/descriptor.json
                //    - hdfs://nn1/hdfs_base_directory/data_source_name/interval/version/shardNum_descriptor.json
                //    - hdfs://nn1/hdfs_base_directory/data_source_name/interval/version/shardNum_UUID_descriptor.json
                final String descriptorParts[] = path.getName().split("_");

                Path indexZip = new Path(path.getParent(), "index.zip");
                if (descriptorParts.length > 1) {
                    Preconditions
                            .checkState(
                                    descriptorParts.length <= 3
                                            && org.apache.commons.lang.StringUtils.isNumeric(descriptorParts[0])
                                            && "descriptor.json"
                                                    .equals(descriptorParts[descriptorParts.length - 1]),
                                    "Unexpected descriptor filename format [%s]", path);

                    indexZip = new Path(path.getParent(), StringUtils.format("%s_%sindex.zip",
                            descriptorParts[0], descriptorParts.length == 2 ? "" : descriptorParts[1] + "_"));
                }

                if (fs.exists(indexZip)) {
                    final DataSegment dataSegment = mapper.readValue(fs.open(path), DataSegment.class);
                    log.info("Found segment [%s] located at [%s]", dataSegment.getIdentifier(), indexZip);

                    final Map<String, Object> loadSpec = dataSegment.getLoadSpec();
                    final String pathWithoutScheme = indexZip.toUri().getPath();

                    if (!loadSpec.get("type").equals(HdfsStorageDruidModule.SCHEME)
                            || !loadSpec.get("path").equals(pathWithoutScheme)) {
                        loadSpec.put("type", HdfsStorageDruidModule.SCHEME);
                        loadSpec.put("path", pathWithoutScheme);
                        if (updateDescriptor) {
                            log.info("Updating loadSpec in descriptor.json at [%s] with new path [%s]", path,
                                    pathWithoutScheme);
                            mapper.writeValue(fs.create(path, true), dataSegment);
                        }
                    }

                    DataSegmentFinder.putInMapRetainingNewest(timestampedSegments, dataSegment,
                            locatedFileStatus.getModificationTime());
                } else {
                    throw new SegmentLoadingException(
                            "index.zip didn't exist at [%s] while descripter.json exists!?", indexZip);
                }
            }
        }
    } catch (IOException e) {
        throw new SegmentLoadingException(e, "Problems interacting with filesystem[%s].", workingDirPath);
    }

    return timestampedSegments.values().stream().map(x -> x.lhs).collect(Collectors.toSet());
}

From source file:org.apache.druid.storage.hdfs.HdfsDataSegmentKiller.java

License:Apache License

@Override
public void kill(DataSegment segment) throws SegmentLoadingException {
    final Path segmentPath = getPath(segment);
    log.info("Killing segment[%s] mapped to path[%s]", segment.getId(), segmentPath);

    try {/*  w w w.java  2s.c  o m*/
        String filename = segmentPath.getName();
        final FileSystem fs = segmentPath.getFileSystem(config);
        if (!filename.endsWith(".zip")) {
            throw new SegmentLoadingException("Unknown file type[%s]", segmentPath);
        } else {

            if (!fs.exists(segmentPath)) {
                log.warn("Segment path [%s] does not exist", segmentPath);
                return;
            }

            // There are 3 supported path formats:
            //    - hdfs://nn1/hdfs_base_directory/data_source_name/interval/version/shardNum/index.zip
            //    - hdfs://nn1/hdfs_base_directory/data_source_name/interval/version/shardNum_index.zip
            //    - hdfs://nn1/hdfs_base_directory/data_source_name/interval/version/shardNum_UUID_index.zip
            String[] zipParts = filename.split("_");

            Path descriptorPath = new Path(segmentPath.getParent(), "descriptor.json");
            if (zipParts.length > 1) {
                Preconditions.checkState(
                        zipParts.length <= 3 && StringUtils.isNumeric(zipParts[0])
                                && "index.zip".equals(zipParts[zipParts.length - 1]),
                        "Unexpected segmentPath format [%s]", segmentPath);

                descriptorPath = new Path(segmentPath.getParent(),
                        org.apache.druid.java.util.common.StringUtils.format("%s_%sdescriptor.json",
                                zipParts[0], zipParts.length == 2 ? "" : zipParts[1] + "_"));
            }

            if (!fs.delete(segmentPath, false)) {
                throw new SegmentLoadingException("Unable to kill segment, failed to delete [%s]",
                        segmentPath.toString());
            }

            // descriptor.json is a file to store segment metadata in deep storage. This file is deprecated and not stored
            // anymore, but we still delete them if exists.
            fs.delete(descriptorPath, false);

            removeEmptyParentDirectories(fs, segmentPath, zipParts.length > 1 ? 2 : 3);
        }
    } catch (IOException e) {
        throw new SegmentLoadingException(e, "Unable to kill segment");
    }
}

From source file:org.apache.druid.storage.hdfs.HdfsDataSegmentKiller.java

License:Apache License

private void removeEmptyParentDirectories(final FileSystem fs, final Path segmentPath, final int depth) {
    Path path = segmentPath;
    try {//from  w  w  w. ja v  a2s  .  c o m
        for (int i = 1; i <= depth; i++) {
            path = path.getParent();
            if (fs.listStatus(path).length != 0 || !fs.delete(path, false)) {
                break;
            }
        }
    } catch (Exception e) {
        log.makeAlert(e, "uncaught exception during segment killer").emit();
    }
}

From source file:org.apache.druid.storage.hdfs.HdfsDataSegmentPusher.java

License:Apache License

@Override
public DataSegment push(final File inDir, final DataSegment segment, final boolean useUniquePath)
        throws IOException {
    // For HDFS, useUniquePath does not affect the directory tree but instead affects the filename, which is of the form
    // '{partitionNum}_index.zip' without unique paths and '{partitionNum}_{UUID}_index.zip' with unique paths.
    final String storageDir = this.getStorageDir(segment, false);

    log.info("Copying segment[%s] to HDFS at location[%s/%s]", segment.getId(),
            fullyQualifiedStorageDirectory.get(), storageDir);

    Path tmpIndexFile = new Path(StringUtils.format("%s/%s/%s/%s_index.zip",
            fullyQualifiedStorageDirectory.get(), segment.getDataSource(), UUIDUtils.generateUuid(),
            segment.getShardSpec().getPartitionNum()));
    FileSystem fs = tmpIndexFile.getFileSystem(hadoopConfig);

    fs.mkdirs(tmpIndexFile.getParent());
    log.info("Compressing files from[%s] to [%s]", inDir, tmpIndexFile);

    final long size;
    final DataSegment dataSegment;
    try {//w w  w.ja  v  a  2  s.c  om
        try (FSDataOutputStream out = fs.create(tmpIndexFile)) {
            size = CompressionUtils.zip(inDir, out);
        }

        final String uniquePrefix = useUniquePath ? DataSegmentPusher.generateUniquePath() + "_" : "";
        final Path outIndexFile = new Path(
                StringUtils.format("%s/%s/%d_%sindex.zip", fullyQualifiedStorageDirectory.get(), storageDir,
                        segment.getShardSpec().getPartitionNum(), uniquePrefix));

        dataSegment = segment.withLoadSpec(makeLoadSpec(outIndexFile.toUri())).withSize(size)
                .withBinaryVersion(SegmentUtils.getVersionFromDir(inDir));

        // Create parent if it does not exist, recreation is not an error
        fs.mkdirs(outIndexFile.getParent());
        copyFilesWithChecks(fs, tmpIndexFile, outIndexFile);
    } finally {
        try {
            if (fs.exists(tmpIndexFile.getParent()) && !fs.delete(tmpIndexFile.getParent(), true)) {
                log.error("Failed to delete temp directory[%s]", tmpIndexFile.getParent());
            }
        } catch (IOException ex) {
            log.error(ex, "Failed to delete temp directory[%s]", tmpIndexFile.getParent());
        }
    }

    return dataSegment;
}

From source file:org.apache.druid.storage.hdfs.HdfsFileTimestampVersionFinder.java

License:Apache License

/**
 * Returns the latest modified file at the uri of interest.
 *
 * @param uri     Either a directory or a file on HDFS. If it is a file, the parent directory will be searched.
 * @param pattern A pattern matcher for file names in the directory of interest. Passing `null` results in matching any file in the directory.
 *
 * @return The URI of the file with the most recent modified timestamp.
 *//*from   w w w  . j ava2 s.  c om*/
@Override
public URI getLatestVersion(final URI uri, final @Nullable Pattern pattern) {
    final Path path = new Path(uri);
    try {
        return RetryUtils.retry(() -> {
            final FileSystem fs = path.getFileSystem(config);
            if (!fs.exists(path)) {
                return null;
            }
            return mostRecentInDir(fs.isDirectory(path) ? path : path.getParent(), pattern);
        }, shouldRetryPredicate(), DEFAULT_RETRY_COUNT);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:org.apache.falcon.cleanup.AbstractCleanupHandler.java

License:Apache License

private void deleteParentIfEmpty(FileSystem fs, Path parent) throws IOException {
    FileStatus[] files = fs.listStatus(parent);
    if (files != null && files.length == 0) {
        LOG.info("Parent path: {} is empty, deleting path", parent);
        fs.delete(parent, true);//from   ww  w . jav a2s  .c o  m
        deleteParentIfEmpty(fs, parent.getParent());
    }
}

From source file:org.apache.falcon.converter.OozieProcessMapper.java

License:Apache License

protected void createWorkflow(Cluster cluster, Process process, Workflow processWorkflow, String wfName,
        Path parentWfPath) throws FalconException {
    WORKFLOWAPP wfApp = getWorkflowTemplate(DEFAULT_WF_TEMPLATE);
    wfApp.setName(wfName);//from ww  w  .j  a  v a  2s . c  o m
    try {
        addLibExtensionsToWorkflow(cluster, wfApp, EntityType.PROCESS, null);
    } catch (IOException e) {
        throw new FalconException("Failed to add library extensions for the workflow", e);
    }

    String userWfPath = getUserWorkflowPath(cluster, parentWfPath.getParent()).toString();
    EngineType engineType = processWorkflow.getEngine();
    for (Object object : wfApp.getDecisionOrForkOrJoin()) {
        if (!(object instanceof ACTION)) {
            continue;
        }

        ACTION action = (ACTION) object;
        String actionName = action.getName();
        if (engineType == EngineType.OOZIE && actionName.equals("user-oozie-workflow")) {
            action.getSubWorkflow().setAppPath("${nameNode}" + userWfPath);
        } else if (engineType == EngineType.PIG && actionName.equals("user-pig-job")) {
            decoratePIGAction(cluster, process, processWorkflow, action.getPig(), parentWfPath);
        } else if (engineType == EngineType.HIVE && actionName.equals("user-hive-job")) {
            decorateHiveAction(cluster, process, processWorkflow, action, parentWfPath);
        } else if (FALCON_ACTIONS.contains(actionName)) {
            decorateWithOozieRetries(action);
        }
    }

    //Create parent workflow
    marshal(cluster, wfApp, parentWfPath);
}