Example usage for org.apache.hadoop.fs FileSystem listFiles

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listFiles.

Prototype

public RemoteIterator<LocatedFileStatus> listFiles(final Path f, final boolean recursive)
        throws FileNotFoundException, IOException

Source Link

Document

List the statuses and block locations of the files in the given path.

Usage

From source file:edu.iu.datasource.HarpDAALDataSource.java

License:Apache License

public List<double[]> loadDenseCSVFiles(String inputFile, int nFeatures, String sep) {//{{{

    Path inputFilePaths = new Path(inputFile);
    List<String> inputFileList = new LinkedList<>();

    try {/*from   w  ww.j  a  va 2 s .  co m*/
        FileSystem fs = inputFilePaths.getFileSystem(conf);
        RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(inputFilePaths, true);

        while (iterator.hasNext()) {
            String name = iterator.next().getPath().toUri().toString();
            inputFileList.add(name);
        }

    } catch (IOException e) {
        LOG.error("Fail to get test files", e);
    }

    List<double[]> points = new LinkedList<double[]>();

    FSDataInputStream in = null;

    //loop over all the files in the list
    ListIterator<String> file_itr = inputFileList.listIterator();
    while (file_itr.hasNext()) {
        String file_name = file_itr.next();
        LOG.info("read in file name: " + file_name);

        Path file_path = new Path(file_name);
        try {

            FileSystem fs = file_path.getFileSystem(conf);
            in = fs.open(file_path);

        } catch (Exception e) {
            LOG.error("Fail to open file " + e.toString());
            return null;
        }

        //read file content
        try {
            while (true) {
                String line = in.readLine();
                if (line == null)
                    break;

                String[] lineData = line.split(sep);
                double[] cell = new double[nFeatures];

                for (int t = 0; t < nFeatures; t++)
                    cell[t] = Double.parseDouble(lineData[t]);

                points.add(cell);
            }

            in.close();

        } catch (Exception e) {
            LOG.error("Fail to read data " + e.toString());
            return null;
        }

    }

    return points;

}

From source file:edu.iu.datasource.HarpDAALDataSource.java

License:Apache License

public List<COO> loadCOOFiles(String FilePath, String regex) {//{{{
    List<String> FilePathsList = new LinkedList<>();
    Path path = new Path(FilePath);
    try {/*from  www . ja  v  a 2  s.  c o  m*/
        FileSystem fs = path.getFileSystem(this.conf);
        RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(path, true);
        while (iterator.hasNext()) {
            String name = iterator.next().getPath().toUri().toString();
            FilePathsList.add(name);
        }
    } catch (IOException e) {
        LOG.error("Fail to get test files", e);
    }

    MTReader reader = new MTReader();
    List<COO> output = reader.readCOO(FilePathsList, regex, this.conf, this.harpthreads);
    // this.totallines = reader.getTotalLines();
    // this.totalPoints = reader.getTotalPoints();
    return output;
}

From source file:edu.iu.datasource.HarpDAALDataSource.java

License:Apache License

public NumericTable loadCSRNumericTable(String inputFiles, String sep, DaalContext context) throws IOException {//{{{

    Path inputFilePaths = new Path(inputFiles);
    List<String> inputFileList = new LinkedList<>();

    try {/*w  ww . j a  v a2  s.  c o  m*/
        FileSystem fs = inputFilePaths.getFileSystem(conf);
        RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(inputFilePaths, true);

        while (iterator.hasNext()) {
            String name = iterator.next().getPath().toUri().toString();
            inputFileList.add(name);
        }

    } catch (IOException e) {
        LOG.error("Fail to get test files", e);
    }

    if (inputFileList.size() > 1) {
        LOG.info("Error CSR data shall be within a single file");
        return null;
    }

    String filename = inputFileList.get(0);
    return loadCSRNumericTableImpl(filename, sep, context);
}

From source file:edu.iu.datasource.HarpDAALDataSource.java

License:Apache License

public NumericTable[] loadCSRNumericTableAndLabel(String inputFiles, String sep, DaalContext context)
        throws IOException {//{{{

    Path inputFilePaths = new Path(inputFiles);
    List<String> inputFileList = new LinkedList<>();

    try {//  ww  w .  ja  v a 2  s.com
        FileSystem fs = inputFilePaths.getFileSystem(conf);
        RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(inputFilePaths, true);

        while (iterator.hasNext()) {
            String name = iterator.next().getPath().toUri().toString();
            inputFileList.add(name);
        }

    } catch (IOException e) {
        LOG.error("Fail to get test files", e);
    }

    if (inputFileList.size() > 1) {
        LOG.info("Error CSR data shall be within a single file");
        return null;
    }

    String filename = inputFileList.get(0);

    return loadCSRNumericTableAndLabelImpl(filename, sep, context);

}

From source file:gaffer.accumulostore.operation.hdfs.handler.job.tool.FetchElementsFromHdfsTool.java

License:Apache License

private void checkHdfsDirectories(final AddElementsFromHdfs operation) throws IOException {
    LOGGER.info("Checking that the correct HDFS directories exist");
    final FileSystem fs = FileSystem.get(getConf());

    final Path outputPath = new Path(operation.getOutputPath());
    LOGGER.info("Ensuring output directory {} doesn't exist", outputPath);
    if (fs.exists(outputPath)) {
        if (fs.listFiles(outputPath, true).hasNext()) {
            LOGGER.error("Output directory exists and is not empty: {}", outputPath);
            throw new IllegalArgumentException("Output directory exists and is not empty: " + outputPath);
        }/*w  w  w . ja va  2s.  c o  m*/
        LOGGER.info("Output directory exists and is empty so deleting: {}", outputPath);
        fs.delete(outputPath, true);
    }

    final Path failurePath = new Path(operation.getFailurePath());
    LOGGER.info("Ensuring failure directory {} exists", failurePath);
    if (fs.exists(failurePath)) {
        if (fs.listFiles(failurePath, true).hasNext()) {
            LOGGER.error("Failure directory exists and is not empty: {}", failurePath);
            throw new IllegalArgumentException("Failure directory is not empty: " + failurePath);
        }
    } else {
        LOGGER.info("Failure directory doesn't exist so creating: {}", failurePath);
        fs.mkdirs(failurePath);
    }
    IngestUtils.setDirectoryPermsForAccumulo(fs, failurePath);
}

From source file:hws.core.JobClient.java

License:Apache License

public void run(String[] args) throws Exception {
    //final String command = args[0];
    //final int n = Integer.valueOf(args[1]);
    //final Path jarPath = new Path(args[2]);
    Options options = new Options();
    /*options.addOption(OptionBuilder.withLongOpt("jar")
                           .withDescription( "Jar path" )
                           .hasArg()/*from ww  w  .  ja v  a 2 s  .c om*/
                           .withArgName("JarPath")
                           .create());
    options.addOption(OptionBuilder.withLongOpt("scheduler")
                           .withDescription( "Scheduler class name" )
                           .hasArg()
                           .withArgName("ClassName")
                           .create());
    */options.addOption(OptionBuilder.withLongOpt("zk-servers")
            .withDescription("List of the ZooKeeper servers").hasArgs().withArgName("zkAddrs").create("zks"));
    //options.addOption("l", "list", false, "list modules");
    options.addOption(OptionBuilder.withLongOpt("load").withDescription("load new modules").hasArgs()
            .withArgName("XMLFiles").create());
    /*options.addOption(OptionBuilder.withLongOpt( "remove" )
                           .withDescription( "remove modules" )
                           .hasArgs()
                           .withArgName("ModuleNames")
                           .create("rm"));
    */CommandLineParser parser = new BasicParser();
    CommandLine cmd = parser.parse(options, args);

    //Path jarPath = null;
    //String schedulerClassName = null;
    String[] xmlFileNames = null;
    //String []moduleNames = null;
    String zksArgs = "";
    String[] zkServers = null;
    if (cmd.hasOption("zks")) {
        zksArgs = "-zks";
        zkServers = cmd.getOptionValues("zks");
        for (String zks : zkServers) {
            zksArgs += " " + zks;
        }
    }

    //Logger setup
    //FSDataOutputStream writer = FileSystem.get(conf).create(new Path("hdfs:///hws/apps/"+appIdStr+"/logs/jobClient.log"));
    //Logger.addOutputStream(writer);

    /*if(cmd.hasOption("l")){
       LOG.warn("Argument --list (-l) is not supported yet.");
    }
    if(cmd.hasOption("jar")){
       jarPath = new Path(cmd.getOptionValue("jar")); 
    }
    if(cmd.hasOption("scheduler")){
       schedulerClassName = cmd.getOptionValue("scheduler");
    }*/
    if (cmd.hasOption("load")) {
        xmlFileNames = cmd.getOptionValues("load");
    } /*else if(cmd.hasOption("rm")){
        moduleNames = cmd.getOptionValues("rm");
      }*/

    //LOG.info("Jar-Path "+jarPath);
    if (xmlFileNames != null) {
        String paths = "";
        for (String path : xmlFileNames) {
            paths += path + "; ";
        }
        LOG.info("Load XMLs: " + paths);
    }
    /*if(moduleNames!=null){
       String modules = "";
       for(String module: moduleNames){
          modules += module+"; ";
       }
       LOG.info("remove: "+modules);
    }*/
    // Create yarnClient
    YarnConfiguration conf = new YarnConfiguration();
    YarnClient yarnClient = YarnClient.createYarnClient();
    yarnClient.init(conf);
    yarnClient.start();

    // Create application via yarnClient
    YarnClientApplication app = yarnClient.createApplication();

    System.out.println("LOG Path: " + ApplicationConstants.LOG_DIR_EXPANSION_VAR);
    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);

    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    ApplicationId appId = appContext.getApplicationId();

    ZkClient zk = new ZkClient(zkServers[0]); //TODO select a ZooKeeper server
    if (!zk.exists("/hadoop-watershed")) {
        zk.createPersistent("/hadoop-watershed", "");
    }
    zk.createPersistent("/hadoop-watershed/" + appId.toString(), "");

    FileSystem fs = FileSystem.get(conf);

    LOG.info("Collecting files to upload");
    fs.mkdirs(new Path("hdfs:///hws/apps/" + appId.toString()));
    fs.mkdirs(new Path("hdfs:///hws/apps/" + appId.toString() + "/logs"));

    ModulePipeline modulePipeline = ModulePipeline.fromXMLFiles(xmlFileNames);
    LOG.info("Uploading files to HDFS");
    for (String path : modulePipeline.files()) {
        uploadFile(fs, new File(path), appId);
    }
    LOG.info("Upload finished");

    String modulePipelineJson = Json.dumps(modulePipeline);
    String modulePipelineBase64 = Base64.encodeBase64String(StringUtils.getBytesUtf8(modulePipelineJson))
            .replaceAll("\\s", "");
    LOG.info("ModulePipeline: " + modulePipelineJson);
    //LOG.info("ModulePipeline: "+modulePipelineBase64);
    amContainer.setCommands(Collections.singletonList("$JAVA_HOME/bin/java" + " -Xmx256M"
            + " hws.core.JobMaster" + " -aid " + appId.toString() + " --load " + modulePipelineBase64 + " "
            + zksArgs + " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout" + " 2>"
            + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr"));

    // Setup jar for ApplicationMaster
    //LocalResource appMasterJar = Records.newRecord(LocalResource.class);
    //setupAppMasterJar(jarPath, appMasterJar);
    //amContainer.setLocalResources(Collections.singletonMap("hws.jar", appMasterJar));

    LOG.info("Listing files for YARN-Watershed");
    RemoteIterator<LocatedFileStatus> filesIterator = fs.listFiles(new Path("hdfs:///hws/bin/"), false);
    Map<String, LocalResource> resources = new HashMap<String, LocalResource>();
    LOG.info("Files setup as resource");
    while (filesIterator.hasNext()) {
        LocatedFileStatus fileStatus = filesIterator.next();
        // Setup jar for ApplicationMaster
        LocalResource containerJar = Records.newRecord(LocalResource.class);
        ContainerUtils.setupContainerJar(fs, fileStatus.getPath(), containerJar);
        resources.put(fileStatus.getPath().getName(), containerJar);
    }
    LOG.info("container resource setup");
    amContainer.setLocalResources(resources);

    fs.close(); //closing FileSystem interface

    // Setup CLASSPATH for ApplicationMaster
    Map<String, String> appMasterEnv = new HashMap<String, String>();
    ContainerUtils.setupContainerEnv(appMasterEnv, conf);
    amContainer.setEnvironment(appMasterEnv);

    // Set up resource type requirements for ApplicationMaster
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(256);
    capability.setVirtualCores(1);

    // Finally, set-up ApplicationSubmissionContext for the application
    //ApplicationSubmissionContext appContext = 
    //app.getApplicationSubmissionContext();
    appContext.setApplicationName("Hadoop-Watershed"); // application name
    appContext.setAMContainerSpec(amContainer);
    appContext.setResource(capability);
    appContext.setQueue("default"); // queue 

    // Submit application
    LOG.info("Submitting application " + appId);
    yarnClient.submitApplication(appContext);

    LOG.info("Waiting for containers to finish");
    zk.waitUntilExists("/hadoop-watershed/" + appId.toString() + "/done", TimeUnit.MILLISECONDS, 250);
    ApplicationReport appReport = yarnClient.getApplicationReport(appId);
    YarnApplicationState appState = appReport.getYarnApplicationState();
    while (appState != YarnApplicationState.FINISHED && appState != YarnApplicationState.KILLED
            && appState != YarnApplicationState.FAILED) {
        Thread.sleep(100);
        appReport = yarnClient.getApplicationReport(appId);
        appState = appReport.getYarnApplicationState();
    }

    System.out.println("Application " + appId + " finished with" + " state " + appState + " at "
            + appReport.getFinishTime());

    System.out.println("deleting " + appId.toString() + " znode");
    zk.deleteRecursive("/hadoop-watershed/" + appId.toString()); //TODO remove app folder from ZooKeeper
}

From source file:hws.core.JobMaster.java

License:Apache License

public void onContainersAllocated(List<Container> containers) {
    FileSystem fs = null;
    try {/*from  ww w. jav  a  2 s .c  o m*/
        fs = FileSystem.get(getConfiguration());
    } catch (IOException e) {
        Logger.severe(e.toString());
    }
    for (Container container : containers) {
        try {
            //PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("/home/yarn/rcor/yarn/app-master-log.out")));
            Logger.info("Selecting instance to container: " + container.getId().toString());
            //dado o container, escolher a instancia que tem dado de entrada mais perto daquele container
            InstanceInfo instanceInfo = null;
            if (instances.get(modulePipeline.get(currentModuleIndex).filterInfo().name())
                    .instancesBuilt() >= modulePipeline.get(currentModuleIndex).numFilterInstances()) {
                currentModuleIndex++;
            }
            if (currentModuleIndex < modulePipeline.size()) {
                instanceInfo = instances.get(modulePipeline.get(currentModuleIndex).filterInfo().name())
                        .build();
            } else
                break;

            String instanceInfoBase64 = Base64
                    .encodeBase64String(StringUtils.getBytesUtf8(Json.dumps(instanceInfo)))
                    .replaceAll("\\s", "");
            // Launch container by create ContainerLaunchContext
            ContainerLaunchContext ctx = Records.newRecord(ContainerLaunchContext.class);
            ctx.setCommands(Collections.singletonList(
                    "$JAVA_HOME/bin/java -Xmx256M hws.core.InstanceDriver --load " + instanceInfoBase64
                            + " -aid " + this.appIdStr + " -cid " + container.getId().toString() + " "
                            + this.zksArgs + " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout"
                            + " 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr"));

            Logger.info("Listing YARN-Watershed files for app-id: " + this.appIdStr);
            RemoteIterator<LocatedFileStatus> files = fs.listFiles(new Path("hdfs:///hws/bin/"), false);
            Map<String, LocalResource> resources = new HashMap<String, LocalResource>();
            Logger.info("Setup YARN-Watershed files as resources");
            while (files.hasNext()) {
                LocatedFileStatus fileStatus = files.next();
                // Setup jar for ApplicationMaster
                LocalResource containerJar = Records.newRecord(LocalResource.class);
                ContainerUtils.setupContainerJar(fs, fileStatus.getPath(), containerJar);
                resources.put(fileStatus.getPath().getName(), containerJar);
            }

            Logger.info("Listing application files for app-id: " + this.appIdStr);
            files = fs.listFiles(new Path("hdfs:///hws/apps/" + this.appIdStr + "/"), false);
            Logger.info("Setup application files as resources");
            while (files.hasNext()) {
                LocatedFileStatus fileStatus = files.next();
                // Setup jar for ApplicationMaster
                LocalResource containerJar = Records.newRecord(LocalResource.class);
                ContainerUtils.setupContainerJar(fs, fileStatus.getPath(), containerJar);
                resources.put(fileStatus.getPath().getName(), containerJar);
            }
            Logger.info("container resource setup");
            ctx.setLocalResources(resources);

            Logger.info("Environment setup");
            // Setup CLASSPATH for ApplicationMaster
            Map<String, String> containerEnv = new HashMap<String, String>();
            ContainerUtils.setupContainerEnv(containerEnv, getConfiguration());
            ctx.setEnvironment(containerEnv);
            Logger.info("Starting containers");

            Logger.info("[AM] Launching container " + container.getId());
            nmClient.startContainer(container, ctx);
            Logger.info("Container started!");
            /*String znode = "/hadoop-watershed/"+this.appIdStr+"/"+instanceInfo.filterInfo().name()+"/"+instanceInfo.instanceId();
            out.println("Saving instance znode: "+znode);
            out.flush();
            zk.createPersistent(znode, "");
            zk.createPersistent(znode+"/host", container.getNodeId().getHost());
            out.println("saved location: "+container.getNodeId().getHost());
            out.flush();
            */
            if (instances.get(modulePipeline.get(currentModuleIndex).filterInfo().name())
                    .instancesBuilt() >= modulePipeline.get(currentModuleIndex).numFilterInstances()) {
                Logger.info("Starting via ZooKeeper filter: " + instanceInfo.filterInfo().name());
                zk.createPersistent("/hadoop-watershed/" + this.appIdStr + "/"
                        + instanceInfo.filterInfo().name() + "/start", "");
            }
            //out.close();
        } catch (Exception e) {
            Logger.severe("[AM] Error launching container " + container.getId() + " " + e);
        }
    }
    try {
        fs.close();
    } catch (IOException e) {
        Logger.severe(e.toString());
    }
}

From source file:io.druid.indexer.updater.HadoopConverterJob.java

License:Apache License

public List<DataSegment> run() throws IOException {
    final JobConf jobConf = new JobConf();
    jobConf.setKeepFailedTaskFiles(false);
    for (Map.Entry<String, String> entry : converterConfig.getHadoopProperties().entrySet()) {
        jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()");
    }//from  w w w. j  a v a  2s .  c  o m
    final List<DataSegment> segments = converterConfig.getSegments();
    if (segments.isEmpty()) {
        throw new IAE("No segments found for datasource [%s]", converterConfig.getDataSource());
    }
    converterConfigIntoConfiguration(converterConfig, segments, jobConf);

    jobConf.setNumReduceTasks(0);// Map only. Number of map tasks determined by input format
    jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache()));

    setJobName(jobConf, segments);

    if (converterConfig.getJobPriority() != null) {
        jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority()));
    }

    final Job job = Job.getInstance(jobConf);

    job.setInputFormatClass(ConfigInputFormat.class);
    job.setMapperClass(ConvertingMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setMapSpeculativeExecution(false);
    job.setOutputFormatClass(ConvertingOutputFormat.class);

    JobHelper.setupClasspath(JobHelper.distributedClassPath(jobConf.getWorkingDirectory()),
            JobHelper.distributedClassPath(getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())),
            job);

    Throwable throwable = null;
    try {
        job.submit();
        log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());
        final boolean success = job.waitForCompletion(true);
        if (!success) {
            final TaskReport[] reports = job.getTaskReports(TaskType.MAP);
            if (reports != null) {
                for (final TaskReport report : reports) {
                    log.error("Error in task [%s] : %s", report.getTaskId(),
                            Arrays.toString(report.getDiagnostics()));
                }
            }
            return null;
        }
        try {
            loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue();
            writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue();
        } catch (IOException ex) {
            log.error(ex, "Could not fetch counters");
        }
        final JobID jobID = job.getJobID();

        final Path jobDir = getJobPath(jobID, job.getWorkingDirectory());
        final FileSystem fs = jobDir.getFileSystem(job.getConfiguration());
        final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true);
        final List<Path> goodPaths = new ArrayList<>();
        while (it.hasNext()) {
            final LocatedFileStatus locatedFileStatus = it.next();
            if (locatedFileStatus.isFile()) {
                final Path myPath = locatedFileStatus.getPath();
                if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) {
                    goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY));
                }
            }
        }
        if (goodPaths.isEmpty()) {
            log.warn("No good data found at [%s]", jobDir);
            return null;
        }
        final List<DataSegment> returnList = ImmutableList
                .copyOf(Lists.transform(goodPaths, new Function<Path, DataSegment>() {
                    @Nullable
                    @Override
                    public DataSegment apply(final Path input) {
                        try {
                            if (!fs.exists(input)) {
                                throw new ISE("Somehow [%s] was found but [%s] is missing at [%s]",
                                        ConvertingOutputFormat.DATA_SUCCESS_KEY,
                                        ConvertingOutputFormat.DATA_FILE_KEY, jobDir);
                            }
                        } catch (final IOException e) {
                            throw Throwables.propagate(e);
                        }
                        try (final InputStream stream = fs.open(input)) {
                            return HadoopDruidConverterConfig.jsonMapper.readValue(stream, DataSegment.class);
                        } catch (final IOException e) {
                            throw Throwables.propagate(e);
                        }
                    }
                }));
        if (returnList.size() == segments.size()) {
            return returnList;
        } else {
            throw new ISE(
                    "Tasks reported success but result length did not match! Expected %d found %d at path [%s]",
                    segments.size(), returnList.size(), jobDir);
        }
    } catch (InterruptedException | ClassNotFoundException e) {
        RuntimeException exception = Throwables.propagate(e);
        throwable = exception;
        throw exception;
    } catch (Throwable t) {
        throwable = t;
        throw t;
    } finally {
        try {
            cleanup(job);
        } catch (IOException e) {
            if (throwable != null) {
                throwable.addSuppressed(e);
            } else {
                log.error(e, "Could not clean up job [%s]", job.getJobID());
            }
        }
    }
}

From source file:io.druid.storage.hdfs.HdfsDataSegmentFinder.java

License:Apache License

@Override
public Set<DataSegment> findSegments(String workingDirPathStr, boolean updateDescriptor)
        throws SegmentLoadingException {
    final Set<DataSegment> segments = Sets.newHashSet();
    final Path workingDirPath = new Path(workingDirPathStr);
    FileSystem fs;
    try {/*ww w  .j  av  a 2s  . c om*/
        fs = workingDirPath.getFileSystem(config);

        log.info(fs.getScheme());
        log.info("FileSystem URI:" + fs.getUri().toString());

        if (!fs.exists(workingDirPath)) {
            throw new SegmentLoadingException("Working directory [%s] doesn't exist.", workingDirPath);
        }

        if (!fs.isDirectory(workingDirPath)) {
            throw new SegmentLoadingException("Working directory [%s] is not a directory!?", workingDirPath);
        }

        final RemoteIterator<LocatedFileStatus> it = fs.listFiles(workingDirPath, true);
        while (it.hasNext()) {
            final LocatedFileStatus locatedFileStatus = it.next();
            final Path path = locatedFileStatus.getPath();
            if (path.getName().endsWith("descriptor.json")) {
                final Path indexZip;
                final String descriptorParts[] = path.getName().split("_");
                if (descriptorParts.length == 2 && descriptorParts[1].equals("descriptor.json")
                        && org.apache.commons.lang.StringUtils.isNumeric(descriptorParts[0])) {
                    indexZip = new Path(path.getParent(),
                            StringUtils.format("%s_index.zip", descriptorParts[0]));
                } else {
                    indexZip = new Path(path.getParent(), "index.zip");
                }
                if (fs.exists(indexZip)) {
                    final DataSegment dataSegment = mapper.readValue(fs.open(path), DataSegment.class);
                    log.info("Found segment [%s] located at [%s]", dataSegment.getIdentifier(), indexZip);

                    final Map<String, Object> loadSpec = dataSegment.getLoadSpec();
                    final String pathWithoutScheme = indexZip.toUri().getPath();

                    if (!loadSpec.get("type").equals(HdfsStorageDruidModule.SCHEME)
                            || !loadSpec.get("path").equals(pathWithoutScheme)) {
                        loadSpec.put("type", HdfsStorageDruidModule.SCHEME);
                        loadSpec.put("path", pathWithoutScheme);
                        if (updateDescriptor) {
                            log.info("Updating loadSpec in descriptor.json at [%s] with new path [%s]", path,
                                    pathWithoutScheme);
                            mapper.writeValue(fs.create(path, true), dataSegment);
                        }
                    }
                    segments.add(dataSegment);
                } else {
                    throw new SegmentLoadingException(
                            "index.zip didn't exist at [%s] while descripter.json exists!?", indexZip);
                }
            }
        }
    } catch (IOException e) {
        throw new SegmentLoadingException(e, "Problems interacting with filesystem[%s].", workingDirPath);
    }

    return segments;
}

From source file:io.druid.storage.hdfs.HdfsDataSegmentPuller.java

License:Apache License

public FileUtils.FileCopyResult getSegmentFiles(final Path path, final File outDir)
        throws SegmentLoadingException {
    final LocalFileSystem localFileSystem = new LocalFileSystem();
    try {/*from   w  w w.  j  a  v  a2s .c  o m*/
        final FileSystem fs = path.getFileSystem(config);
        if (fs.isDirectory(path)) {

            // --------    directory     ---------

            try {
                return RetryUtils.retry(new Callable<FileUtils.FileCopyResult>() {
                    @Override
                    public FileUtils.FileCopyResult call() throws Exception {
                        if (!fs.exists(path)) {
                            throw new SegmentLoadingException("No files found at [%s]", path.toString());
                        }

                        final RemoteIterator<LocatedFileStatus> children = fs.listFiles(path, false);
                        final ArrayList<FileUtils.FileCopyResult> localChildren = new ArrayList<>();
                        final FileUtils.FileCopyResult result = new FileUtils.FileCopyResult();
                        while (children.hasNext()) {
                            final LocatedFileStatus child = children.next();
                            final Path childPath = child.getPath();
                            final String fname = childPath.getName();
                            if (fs.isDirectory(childPath)) {
                                log.warn("[%s] is a child directory, skipping", childPath.toString());
                            } else {
                                final File outFile = new File(outDir, fname);

                                // Actual copy
                                fs.copyToLocalFile(childPath, new Path(outFile.toURI()));
                                result.addFile(outFile);
                            }
                        }
                        log.info("Copied %d bytes from [%s] to [%s]", result.size(), path.toString(),
                                outDir.getAbsolutePath());
                        return result;
                    }

                }, shouldRetryPredicate(), DEFAULT_RETRY_COUNT);
            } catch (Exception e) {
                throw Throwables.propagate(e);
            }
        } else if (CompressionUtils.isZip(path.getName())) {

            // --------    zip     ---------

            final FileUtils.FileCopyResult result = CompressionUtils.unzip(new ByteSource() {
                @Override
                public InputStream openStream() throws IOException {
                    return getInputStream(path);
                }
            }, outDir, shouldRetryPredicate(), false);

            log.info("Unzipped %d bytes from [%s] to [%s]", result.size(), path.toString(),
                    outDir.getAbsolutePath());

            return result;
        } else if (CompressionUtils.isGz(path.getName())) {

            // --------    gzip     ---------

            final String fname = path.getName();
            final File outFile = new File(outDir, CompressionUtils.getGzBaseName(fname));
            final FileUtils.FileCopyResult result = CompressionUtils.gunzip(new ByteSource() {
                @Override
                public InputStream openStream() throws IOException {
                    return getInputStream(path);
                }
            }, outFile);

            log.info("Gunzipped %d bytes from [%s] to [%s]", result.size(), path.toString(),
                    outFile.getAbsolutePath());
            return result;
        } else {
            throw new SegmentLoadingException("Do not know how to handle file type at [%s]", path.toString());
        }
    } catch (IOException e) {
        throw new SegmentLoadingException(e, "Error loading [%s]", path.toString());
    }
}