Example usage for org.apache.hadoop.fs FileSystem listFiles

List of usage examples for org.apache.hadoop.fs FileSystem listFiles

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listFiles.

Prototype

public RemoteIterator<LocatedFileStatus> listFiles(final Path f, final boolean recursive)
        throws FileNotFoundException, IOException 

Source Link

Document

List the statuses and block locations of the files in the given path.

Usage

From source file:edu.iu.datasource.HarpDAALDataSource.java

License:Apache License

public List<double[]> loadDenseCSVFiles(String inputFile, int nFeatures, String sep) {//{{{

    Path inputFilePaths = new Path(inputFile);
    List<String> inputFileList = new LinkedList<>();

    try {/*from   w  ww.j  a  va 2 s .  co m*/
        FileSystem fs = inputFilePaths.getFileSystem(conf);
        RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(inputFilePaths, true);

        while (iterator.hasNext()) {
            String name = iterator.next().getPath().toUri().toString();
            inputFileList.add(name);
        }

    } catch (IOException e) {
        LOG.error("Fail to get test files", e);
    }

    List<double[]> points = new LinkedList<double[]>();

    FSDataInputStream in = null;

    //loop over all the files in the list
    ListIterator<String> file_itr = inputFileList.listIterator();
    while (file_itr.hasNext()) {
        String file_name = file_itr.next();
        LOG.info("read in file name: " + file_name);

        Path file_path = new Path(file_name);
        try {

            FileSystem fs = file_path.getFileSystem(conf);
            in = fs.open(file_path);

        } catch (Exception e) {
            LOG.error("Fail to open file " + e.toString());
            return null;
        }

        //read file content
        try {
            while (true) {
                String line = in.readLine();
                if (line == null)
                    break;

                String[] lineData = line.split(sep);
                double[] cell = new double[nFeatures];

                for (int t = 0; t < nFeatures; t++)
                    cell[t] = Double.parseDouble(lineData[t]);

                points.add(cell);
            }

            in.close();

        } catch (Exception e) {
            LOG.error("Fail to read data " + e.toString());
            return null;
        }

    }

    return points;

}

From source file:edu.iu.datasource.HarpDAALDataSource.java

License:Apache License

public List<COO> loadCOOFiles(String FilePath, String regex) {//{{{
    List<String> FilePathsList = new LinkedList<>();
    Path path = new Path(FilePath);
    try {/*from  www . ja  v  a 2  s.  c o  m*/
        FileSystem fs = path.getFileSystem(this.conf);
        RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(path, true);
        while (iterator.hasNext()) {
            String name = iterator.next().getPath().toUri().toString();
            FilePathsList.add(name);
        }
    } catch (IOException e) {
        LOG.error("Fail to get test files", e);
    }

    MTReader reader = new MTReader();
    List<COO> output = reader.readCOO(FilePathsList, regex, this.conf, this.harpthreads);
    // this.totallines = reader.getTotalLines();
    // this.totalPoints = reader.getTotalPoints();
    return output;
}

From source file:edu.iu.datasource.HarpDAALDataSource.java

License:Apache License

public NumericTable loadCSRNumericTable(String inputFiles, String sep, DaalContext context) throws IOException {//{{{

    Path inputFilePaths = new Path(inputFiles);
    List<String> inputFileList = new LinkedList<>();

    try {/*w  ww . j a  v a2  s.  c o  m*/
        FileSystem fs = inputFilePaths.getFileSystem(conf);
        RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(inputFilePaths, true);

        while (iterator.hasNext()) {
            String name = iterator.next().getPath().toUri().toString();
            inputFileList.add(name);
        }

    } catch (IOException e) {
        LOG.error("Fail to get test files", e);
    }

    if (inputFileList.size() > 1) {
        LOG.info("Error CSR data shall be within a single file");
        return null;
    }

    String filename = inputFileList.get(0);
    return loadCSRNumericTableImpl(filename, sep, context);
}

From source file:edu.iu.datasource.HarpDAALDataSource.java

License:Apache License

public NumericTable[] loadCSRNumericTableAndLabel(String inputFiles, String sep, DaalContext context)
        throws IOException {//{{{

    Path inputFilePaths = new Path(inputFiles);
    List<String> inputFileList = new LinkedList<>();

    try {//  ww  w .  ja  v a 2  s.com
        FileSystem fs = inputFilePaths.getFileSystem(conf);
        RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(inputFilePaths, true);

        while (iterator.hasNext()) {
            String name = iterator.next().getPath().toUri().toString();
            inputFileList.add(name);
        }

    } catch (IOException e) {
        LOG.error("Fail to get test files", e);
    }

    if (inputFileList.size() > 1) {
        LOG.info("Error CSR data shall be within a single file");
        return null;
    }

    String filename = inputFileList.get(0);

    return loadCSRNumericTableAndLabelImpl(filename, sep, context);

}

From source file:gaffer.accumulostore.operation.hdfs.handler.job.tool.FetchElementsFromHdfsTool.java

License:Apache License

private void checkHdfsDirectories(final AddElementsFromHdfs operation) throws IOException {
    LOGGER.info("Checking that the correct HDFS directories exist");
    final FileSystem fs = FileSystem.get(getConf());

    final Path outputPath = new Path(operation.getOutputPath());
    LOGGER.info("Ensuring output directory {} doesn't exist", outputPath);
    if (fs.exists(outputPath)) {
        if (fs.listFiles(outputPath, true).hasNext()) {
            LOGGER.error("Output directory exists and is not empty: {}", outputPath);
            throw new IllegalArgumentException("Output directory exists and is not empty: " + outputPath);
        }/*w  w  w . ja va  2s.  c o  m*/
        LOGGER.info("Output directory exists and is empty so deleting: {}", outputPath);
        fs.delete(outputPath, true);
    }

    final Path failurePath = new Path(operation.getFailurePath());
    LOGGER.info("Ensuring failure directory {} exists", failurePath);
    if (fs.exists(failurePath)) {
        if (fs.listFiles(failurePath, true).hasNext()) {
            LOGGER.error("Failure directory exists and is not empty: {}", failurePath);
            throw new IllegalArgumentException("Failure directory is not empty: " + failurePath);
        }
    } else {
        LOGGER.info("Failure directory doesn't exist so creating: {}", failurePath);
        fs.mkdirs(failurePath);
    }
    IngestUtils.setDirectoryPermsForAccumulo(fs, failurePath);
}

From source file:hws.core.JobClient.java

License:Apache License

public void run(String[] args) throws Exception {
    //final String command = args[0];
    //final int n = Integer.valueOf(args[1]);
    //final Path jarPath = new Path(args[2]);
    Options options = new Options();
    /*options.addOption(OptionBuilder.withLongOpt("jar")
                           .withDescription( "Jar path" )
                           .hasArg()/*from ww  w  .  ja v  a 2 s  .c om*/
                           .withArgName("JarPath")
                           .create());
    options.addOption(OptionBuilder.withLongOpt("scheduler")
                           .withDescription( "Scheduler class name" )
                           .hasArg()
                           .withArgName("ClassName")
                           .create());
    */options.addOption(OptionBuilder.withLongOpt("zk-servers")
            .withDescription("List of the ZooKeeper servers").hasArgs().withArgName("zkAddrs").create("zks"));
    //options.addOption("l", "list", false, "list modules");
    options.addOption(OptionBuilder.withLongOpt("load").withDescription("load new modules").hasArgs()
            .withArgName("XMLFiles").create());
    /*options.addOption(OptionBuilder.withLongOpt( "remove" )
                           .withDescription( "remove modules" )
                           .hasArgs()
                           .withArgName("ModuleNames")
                           .create("rm"));
    */CommandLineParser parser = new BasicParser();
    CommandLine cmd = parser.parse(options, args);

    //Path jarPath = null;
    //String schedulerClassName = null;
    String[] xmlFileNames = null;
    //String []moduleNames = null;
    String zksArgs = "";
    String[] zkServers = null;
    if (cmd.hasOption("zks")) {
        zksArgs = "-zks";
        zkServers = cmd.getOptionValues("zks");
        for (String zks : zkServers) {
            zksArgs += " " + zks;
        }
    }

    //Logger setup
    //FSDataOutputStream writer = FileSystem.get(conf).create(new Path("hdfs:///hws/apps/"+appIdStr+"/logs/jobClient.log"));
    //Logger.addOutputStream(writer);

    /*if(cmd.hasOption("l")){
       LOG.warn("Argument --list (-l) is not supported yet.");
    }
    if(cmd.hasOption("jar")){
       jarPath = new Path(cmd.getOptionValue("jar")); 
    }
    if(cmd.hasOption("scheduler")){
       schedulerClassName = cmd.getOptionValue("scheduler");
    }*/
    if (cmd.hasOption("load")) {
        xmlFileNames = cmd.getOptionValues("load");
    } /*else if(cmd.hasOption("rm")){
        moduleNames = cmd.getOptionValues("rm");
      }*/

    //LOG.info("Jar-Path "+jarPath);
    if (xmlFileNames != null) {
        String paths = "";
        for (String path : xmlFileNames) {
            paths += path + "; ";
        }
        LOG.info("Load XMLs: " + paths);
    }
    /*if(moduleNames!=null){
       String modules = "";
       for(String module: moduleNames){
          modules += module+"; ";
       }
       LOG.info("remove: "+modules);
    }*/
    // Create yarnClient
    YarnConfiguration conf = new YarnConfiguration();
    YarnClient yarnClient = YarnClient.createYarnClient();
    yarnClient.init(conf);
    yarnClient.start();

    // Create application via yarnClient
    YarnClientApplication app = yarnClient.createApplication();

    System.out.println("LOG Path: " + ApplicationConstants.LOG_DIR_EXPANSION_VAR);
    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);

    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    ApplicationId appId = appContext.getApplicationId();

    ZkClient zk = new ZkClient(zkServers[0]); //TODO select a ZooKeeper server
    if (!zk.exists("/hadoop-watershed")) {
        zk.createPersistent("/hadoop-watershed", "");
    }
    zk.createPersistent("/hadoop-watershed/" + appId.toString(), "");

    FileSystem fs = FileSystem.get(conf);

    LOG.info("Collecting files to upload");
    fs.mkdirs(new Path("hdfs:///hws/apps/" + appId.toString()));
    fs.mkdirs(new Path("hdfs:///hws/apps/" + appId.toString() + "/logs"));

    ModulePipeline modulePipeline = ModulePipeline.fromXMLFiles(xmlFileNames);
    LOG.info("Uploading files to HDFS");
    for (String path : modulePipeline.files()) {
        uploadFile(fs, new File(path), appId);
    }
    LOG.info("Upload finished");

    String modulePipelineJson = Json.dumps(modulePipeline);
    String modulePipelineBase64 = Base64.encodeBase64String(StringUtils.getBytesUtf8(modulePipelineJson))
            .replaceAll("\\s", "");
    LOG.info("ModulePipeline: " + modulePipelineJson);
    //LOG.info("ModulePipeline: "+modulePipelineBase64);
    amContainer.setCommands(Collections.singletonList("$JAVA_HOME/bin/java" + " -Xmx256M"
            + " hws.core.JobMaster" + " -aid " + appId.toString() + " --load " + modulePipelineBase64 + " "
            + zksArgs + " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout" + " 2>"
            + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr"));

    // Setup jar for ApplicationMaster
    //LocalResource appMasterJar = Records.newRecord(LocalResource.class);
    //setupAppMasterJar(jarPath, appMasterJar);
    //amContainer.setLocalResources(Collections.singletonMap("hws.jar", appMasterJar));

    LOG.info("Listing files for YARN-Watershed");
    RemoteIterator<LocatedFileStatus> filesIterator = fs.listFiles(new Path("hdfs:///hws/bin/"), false);
    Map<String, LocalResource> resources = new HashMap<String, LocalResource>();
    LOG.info("Files setup as resource");
    while (filesIterator.hasNext()) {
        LocatedFileStatus fileStatus = filesIterator.next();
        // Setup jar for ApplicationMaster
        LocalResource containerJar = Records.newRecord(LocalResource.class);
        ContainerUtils.setupContainerJar(fs, fileStatus.getPath(), containerJar);
        resources.put(fileStatus.getPath().getName(), containerJar);
    }
    LOG.info("container resource setup");
    amContainer.setLocalResources(resources);

    fs.close(); //closing FileSystem interface

    // Setup CLASSPATH for ApplicationMaster
    Map<String, String> appMasterEnv = new HashMap<String, String>();
    ContainerUtils.setupContainerEnv(appMasterEnv, conf);
    amContainer.setEnvironment(appMasterEnv);

    // Set up resource type requirements for ApplicationMaster
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(256);
    capability.setVirtualCores(1);

    // Finally, set-up ApplicationSubmissionContext for the application
    //ApplicationSubmissionContext appContext = 
    //app.getApplicationSubmissionContext();
    appContext.setApplicationName("Hadoop-Watershed"); // application name
    appContext.setAMContainerSpec(amContainer);
    appContext.setResource(capability);
    appContext.setQueue("default"); // queue 

    // Submit application
    LOG.info("Submitting application " + appId);
    yarnClient.submitApplication(appContext);

    LOG.info("Waiting for containers to finish");
    zk.waitUntilExists("/hadoop-watershed/" + appId.toString() + "/done", TimeUnit.MILLISECONDS, 250);
    ApplicationReport appReport = yarnClient.getApplicationReport(appId);
    YarnApplicationState appState = appReport.getYarnApplicationState();
    while (appState != YarnApplicationState.FINISHED && appState != YarnApplicationState.KILLED
            && appState != YarnApplicationState.FAILED) {
        Thread.sleep(100);
        appReport = yarnClient.getApplicationReport(appId);
        appState = appReport.getYarnApplicationState();
    }

    System.out.println("Application " + appId + " finished with" + " state " + appState + " at "
            + appReport.getFinishTime());

    System.out.println("deleting " + appId.toString() + " znode");
    zk.deleteRecursive("/hadoop-watershed/" + appId.toString()); //TODO remove app folder from ZooKeeper
}

From source file:hws.core.JobMaster.java

License:Apache License

public void onContainersAllocated(List<Container> containers) {
    FileSystem fs = null;
    try {/*from  ww w. jav  a  2 s .c  o m*/
        fs = FileSystem.get(getConfiguration());
    } catch (IOException e) {
        Logger.severe(e.toString());
    }
    for (Container container : containers) {
        try {
            //PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("/home/yarn/rcor/yarn/app-master-log.out")));
            Logger.info("Selecting instance to container: " + container.getId().toString());
            //dado o container, escolher a instancia que tem dado de entrada mais perto daquele container
            InstanceInfo instanceInfo = null;
            if (instances.get(modulePipeline.get(currentModuleIndex).filterInfo().name())
                    .instancesBuilt() >= modulePipeline.get(currentModuleIndex).numFilterInstances()) {
                currentModuleIndex++;
            }
            if (currentModuleIndex < modulePipeline.size()) {
                instanceInfo = instances.get(modulePipeline.get(currentModuleIndex).filterInfo().name())
                        .build();
            } else
                break;

            String instanceInfoBase64 = Base64
                    .encodeBase64String(StringUtils.getBytesUtf8(Json.dumps(instanceInfo)))
                    .replaceAll("\\s", "");
            // Launch container by create ContainerLaunchContext
            ContainerLaunchContext ctx = Records.newRecord(ContainerLaunchContext.class);
            ctx.setCommands(Collections.singletonList(
                    "$JAVA_HOME/bin/java -Xmx256M hws.core.InstanceDriver --load " + instanceInfoBase64
                            + " -aid " + this.appIdStr + " -cid " + container.getId().toString() + " "
                            + this.zksArgs + " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout"
                            + " 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr"));

            Logger.info("Listing YARN-Watershed files for app-id: " + this.appIdStr);
            RemoteIterator<LocatedFileStatus> files = fs.listFiles(new Path("hdfs:///hws/bin/"), false);
            Map<String, LocalResource> resources = new HashMap<String, LocalResource>();
            Logger.info("Setup YARN-Watershed files as resources");
            while (files.hasNext()) {
                LocatedFileStatus fileStatus = files.next();
                // Setup jar for ApplicationMaster
                LocalResource containerJar = Records.newRecord(LocalResource.class);
                ContainerUtils.setupContainerJar(fs, fileStatus.getPath(), containerJar);
                resources.put(fileStatus.getPath().getName(), containerJar);
            }

            Logger.info("Listing application files for app-id: " + this.appIdStr);
            files = fs.listFiles(new Path("hdfs:///hws/apps/" + this.appIdStr + "/"), false);
            Logger.info("Setup application files as resources");
            while (files.hasNext()) {
                LocatedFileStatus fileStatus = files.next();
                // Setup jar for ApplicationMaster
                LocalResource containerJar = Records.newRecord(LocalResource.class);
                ContainerUtils.setupContainerJar(fs, fileStatus.getPath(), containerJar);
                resources.put(fileStatus.getPath().getName(), containerJar);
            }
            Logger.info("container resource setup");
            ctx.setLocalResources(resources);

            Logger.info("Environment setup");
            // Setup CLASSPATH for ApplicationMaster
            Map<String, String> containerEnv = new HashMap<String, String>();
            ContainerUtils.setupContainerEnv(containerEnv, getConfiguration());
            ctx.setEnvironment(containerEnv);
            Logger.info("Starting containers");

            Logger.info("[AM] Launching container " + container.getId());
            nmClient.startContainer(container, ctx);
            Logger.info("Container started!");
            /*String znode = "/hadoop-watershed/"+this.appIdStr+"/"+instanceInfo.filterInfo().name()+"/"+instanceInfo.instanceId();
            out.println("Saving instance znode: "+znode);
            out.flush();
            zk.createPersistent(znode, "");
            zk.createPersistent(znode+"/host", container.getNodeId().getHost());
            out.println("saved location: "+container.getNodeId().getHost());
            out.flush();
            */
            if (instances.get(modulePipeline.get(currentModuleIndex).filterInfo().name())
                    .instancesBuilt() >= modulePipeline.get(currentModuleIndex).numFilterInstances()) {
                Logger.info("Starting via ZooKeeper filter: " + instanceInfo.filterInfo().name());
                zk.createPersistent("/hadoop-watershed/" + this.appIdStr + "/"
                        + instanceInfo.filterInfo().name() + "/start", "");
            }
            //out.close();
        } catch (Exception e) {
            Logger.severe("[AM] Error launching container " + container.getId() + " " + e);
        }
    }
    try {
        fs.close();
    } catch (IOException e) {
        Logger.severe(e.toString());
    }
}

From source file:io.druid.indexer.updater.HadoopConverterJob.java

License:Apache License

public List<DataSegment> run() throws IOException {
    final JobConf jobConf = new JobConf();
    jobConf.setKeepFailedTaskFiles(false);
    for (Map.Entry<String, String> entry : converterConfig.getHadoopProperties().entrySet()) {
        jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()");
    }//from  w w w. j  a v a  2s .  c  o m
    final List<DataSegment> segments = converterConfig.getSegments();
    if (segments.isEmpty()) {
        throw new IAE("No segments found for datasource [%s]", converterConfig.getDataSource());
    }
    converterConfigIntoConfiguration(converterConfig, segments, jobConf);

    jobConf.setNumReduceTasks(0);// Map only. Number of map tasks determined by input format
    jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache()));

    setJobName(jobConf, segments);

    if (converterConfig.getJobPriority() != null) {
        jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority()));
    }

    final Job job = Job.getInstance(jobConf);

    job.setInputFormatClass(ConfigInputFormat.class);
    job.setMapperClass(ConvertingMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setMapSpeculativeExecution(false);
    job.setOutputFormatClass(ConvertingOutputFormat.class);

    JobHelper.setupClasspath(JobHelper.distributedClassPath(jobConf.getWorkingDirectory()),
            JobHelper.distributedClassPath(getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())),
            job);

    Throwable throwable = null;
    try {
        job.submit();
        log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());
        final boolean success = job.waitForCompletion(true);
        if (!success) {
            final TaskReport[] reports = job.getTaskReports(TaskType.MAP);
            if (reports != null) {
                for (final TaskReport report : reports) {
                    log.error("Error in task [%s] : %s", report.getTaskId(),
                            Arrays.toString(report.getDiagnostics()));
                }
            }
            return null;
        }
        try {
            loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue();
            writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue();
        } catch (IOException ex) {
            log.error(ex, "Could not fetch counters");
        }
        final JobID jobID = job.getJobID();

        final Path jobDir = getJobPath(jobID, job.getWorkingDirectory());
        final FileSystem fs = jobDir.getFileSystem(job.getConfiguration());
        final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true);
        final List<Path> goodPaths = new ArrayList<>();
        while (it.hasNext()) {
            final LocatedFileStatus locatedFileStatus = it.next();
            if (locatedFileStatus.isFile()) {
                final Path myPath = locatedFileStatus.getPath();
                if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) {
                    goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY));
                }
            }
        }
        if (goodPaths.isEmpty()) {
            log.warn("No good data found at [%s]", jobDir);
            return null;
        }
        final List<DataSegment> returnList = ImmutableList
                .copyOf(Lists.transform(goodPaths, new Function<Path, DataSegment>() {
                    @Nullable
                    @Override
                    public DataSegment apply(final Path input) {
                        try {
                            if (!fs.exists(input)) {
                                throw new ISE("Somehow [%s] was found but [%s] is missing at [%s]",
                                        ConvertingOutputFormat.DATA_SUCCESS_KEY,
                                        ConvertingOutputFormat.DATA_FILE_KEY, jobDir);
                            }
                        } catch (final IOException e) {
                            throw Throwables.propagate(e);
                        }
                        try (final InputStream stream = fs.open(input)) {
                            return HadoopDruidConverterConfig.jsonMapper.readValue(stream, DataSegment.class);
                        } catch (final IOException e) {
                            throw Throwables.propagate(e);
                        }
                    }
                }));
        if (returnList.size() == segments.size()) {
            return returnList;
        } else {
            throw new ISE(
                    "Tasks reported success but result length did not match! Expected %d found %d at path [%s]",
                    segments.size(), returnList.size(), jobDir);
        }
    } catch (InterruptedException | ClassNotFoundException e) {
        RuntimeException exception = Throwables.propagate(e);
        throwable = exception;
        throw exception;
    } catch (Throwable t) {
        throwable = t;
        throw t;
    } finally {
        try {
            cleanup(job);
        } catch (IOException e) {
            if (throwable != null) {
                throwable.addSuppressed(e);
            } else {
                log.error(e, "Could not clean up job [%s]", job.getJobID());
            }
        }
    }
}

From source file:io.druid.storage.hdfs.HdfsDataSegmentFinder.java

License:Apache License

@Override
public Set<DataSegment> findSegments(String workingDirPathStr, boolean updateDescriptor)
        throws SegmentLoadingException {
    final Set<DataSegment> segments = Sets.newHashSet();
    final Path workingDirPath = new Path(workingDirPathStr);
    FileSystem fs;
    try {/*ww w  .j  av  a 2s  . c om*/
        fs = workingDirPath.getFileSystem(config);

        log.info(fs.getScheme());
        log.info("FileSystem URI:" + fs.getUri().toString());

        if (!fs.exists(workingDirPath)) {
            throw new SegmentLoadingException("Working directory [%s] doesn't exist.", workingDirPath);
        }

        if (!fs.isDirectory(workingDirPath)) {
            throw new SegmentLoadingException("Working directory [%s] is not a directory!?", workingDirPath);
        }

        final RemoteIterator<LocatedFileStatus> it = fs.listFiles(workingDirPath, true);
        while (it.hasNext()) {
            final LocatedFileStatus locatedFileStatus = it.next();
            final Path path = locatedFileStatus.getPath();
            if (path.getName().endsWith("descriptor.json")) {
                final Path indexZip;
                final String descriptorParts[] = path.getName().split("_");
                if (descriptorParts.length == 2 && descriptorParts[1].equals("descriptor.json")
                        && org.apache.commons.lang.StringUtils.isNumeric(descriptorParts[0])) {
                    indexZip = new Path(path.getParent(),
                            StringUtils.format("%s_index.zip", descriptorParts[0]));
                } else {
                    indexZip = new Path(path.getParent(), "index.zip");
                }
                if (fs.exists(indexZip)) {
                    final DataSegment dataSegment = mapper.readValue(fs.open(path), DataSegment.class);
                    log.info("Found segment [%s] located at [%s]", dataSegment.getIdentifier(), indexZip);

                    final Map<String, Object> loadSpec = dataSegment.getLoadSpec();
                    final String pathWithoutScheme = indexZip.toUri().getPath();

                    if (!loadSpec.get("type").equals(HdfsStorageDruidModule.SCHEME)
                            || !loadSpec.get("path").equals(pathWithoutScheme)) {
                        loadSpec.put("type", HdfsStorageDruidModule.SCHEME);
                        loadSpec.put("path", pathWithoutScheme);
                        if (updateDescriptor) {
                            log.info("Updating loadSpec in descriptor.json at [%s] with new path [%s]", path,
                                    pathWithoutScheme);
                            mapper.writeValue(fs.create(path, true), dataSegment);
                        }
                    }
                    segments.add(dataSegment);
                } else {
                    throw new SegmentLoadingException(
                            "index.zip didn't exist at [%s] while descripter.json exists!?", indexZip);
                }
            }
        }
    } catch (IOException e) {
        throw new SegmentLoadingException(e, "Problems interacting with filesystem[%s].", workingDirPath);
    }

    return segments;
}

From source file:io.druid.storage.hdfs.HdfsDataSegmentPuller.java

License:Apache License

public FileUtils.FileCopyResult getSegmentFiles(final Path path, final File outDir)
        throws SegmentLoadingException {
    final LocalFileSystem localFileSystem = new LocalFileSystem();
    try {/*from   w  w w.  j  a  v  a2s .c  o m*/
        final FileSystem fs = path.getFileSystem(config);
        if (fs.isDirectory(path)) {

            // --------    directory     ---------

            try {
                return RetryUtils.retry(new Callable<FileUtils.FileCopyResult>() {
                    @Override
                    public FileUtils.FileCopyResult call() throws Exception {
                        if (!fs.exists(path)) {
                            throw new SegmentLoadingException("No files found at [%s]", path.toString());
                        }

                        final RemoteIterator<LocatedFileStatus> children = fs.listFiles(path, false);
                        final ArrayList<FileUtils.FileCopyResult> localChildren = new ArrayList<>();
                        final FileUtils.FileCopyResult result = new FileUtils.FileCopyResult();
                        while (children.hasNext()) {
                            final LocatedFileStatus child = children.next();
                            final Path childPath = child.getPath();
                            final String fname = childPath.getName();
                            if (fs.isDirectory(childPath)) {
                                log.warn("[%s] is a child directory, skipping", childPath.toString());
                            } else {
                                final File outFile = new File(outDir, fname);

                                // Actual copy
                                fs.copyToLocalFile(childPath, new Path(outFile.toURI()));
                                result.addFile(outFile);
                            }
                        }
                        log.info("Copied %d bytes from [%s] to [%s]", result.size(), path.toString(),
                                outDir.getAbsolutePath());
                        return result;
                    }

                }, shouldRetryPredicate(), DEFAULT_RETRY_COUNT);
            } catch (Exception e) {
                throw Throwables.propagate(e);
            }
        } else if (CompressionUtils.isZip(path.getName())) {

            // --------    zip     ---------

            final FileUtils.FileCopyResult result = CompressionUtils.unzip(new ByteSource() {
                @Override
                public InputStream openStream() throws IOException {
                    return getInputStream(path);
                }
            }, outDir, shouldRetryPredicate(), false);

            log.info("Unzipped %d bytes from [%s] to [%s]", result.size(), path.toString(),
                    outDir.getAbsolutePath());

            return result;
        } else if (CompressionUtils.isGz(path.getName())) {

            // --------    gzip     ---------

            final String fname = path.getName();
            final File outFile = new File(outDir, CompressionUtils.getGzBaseName(fname));
            final FileUtils.FileCopyResult result = CompressionUtils.gunzip(new ByteSource() {
                @Override
                public InputStream openStream() throws IOException {
                    return getInputStream(path);
                }
            }, outFile);

            log.info("Gunzipped %d bytes from [%s] to [%s]", result.size(), path.toString(),
                    outFile.getAbsolutePath());
            return result;
        } else {
            throw new SegmentLoadingException("Do not know how to handle file type at [%s]", path.toString());
        }
    } catch (IOException e) {
        throw new SegmentLoadingException(e, "Error loading [%s]", path.toString());
    }
}