List of usage examples for org.apache.hadoop.fs FileSystem listFiles
public RemoteIterator<LocatedFileStatus> listFiles(final Path f, final boolean recursive) throws FileNotFoundException, IOException
From source file:edu.iu.datasource.HarpDAALDataSource.java
License:Apache License
public List<double[]> loadDenseCSVFiles(String inputFile, int nFeatures, String sep) {//{{{ Path inputFilePaths = new Path(inputFile); List<String> inputFileList = new LinkedList<>(); try {/*from w ww.j a va 2 s . co m*/ FileSystem fs = inputFilePaths.getFileSystem(conf); RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(inputFilePaths, true); while (iterator.hasNext()) { String name = iterator.next().getPath().toUri().toString(); inputFileList.add(name); } } catch (IOException e) { LOG.error("Fail to get test files", e); } List<double[]> points = new LinkedList<double[]>(); FSDataInputStream in = null; //loop over all the files in the list ListIterator<String> file_itr = inputFileList.listIterator(); while (file_itr.hasNext()) { String file_name = file_itr.next(); LOG.info("read in file name: " + file_name); Path file_path = new Path(file_name); try { FileSystem fs = file_path.getFileSystem(conf); in = fs.open(file_path); } catch (Exception e) { LOG.error("Fail to open file " + e.toString()); return null; } //read file content try { while (true) { String line = in.readLine(); if (line == null) break; String[] lineData = line.split(sep); double[] cell = new double[nFeatures]; for (int t = 0; t < nFeatures; t++) cell[t] = Double.parseDouble(lineData[t]); points.add(cell); } in.close(); } catch (Exception e) { LOG.error("Fail to read data " + e.toString()); return null; } } return points; }
From source file:edu.iu.datasource.HarpDAALDataSource.java
License:Apache License
public List<COO> loadCOOFiles(String FilePath, String regex) {//{{{ List<String> FilePathsList = new LinkedList<>(); Path path = new Path(FilePath); try {/*from www . ja v a 2 s. c o m*/ FileSystem fs = path.getFileSystem(this.conf); RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(path, true); while (iterator.hasNext()) { String name = iterator.next().getPath().toUri().toString(); FilePathsList.add(name); } } catch (IOException e) { LOG.error("Fail to get test files", e); } MTReader reader = new MTReader(); List<COO> output = reader.readCOO(FilePathsList, regex, this.conf, this.harpthreads); // this.totallines = reader.getTotalLines(); // this.totalPoints = reader.getTotalPoints(); return output; }
From source file:edu.iu.datasource.HarpDAALDataSource.java
License:Apache License
public NumericTable loadCSRNumericTable(String inputFiles, String sep, DaalContext context) throws IOException {//{{{ Path inputFilePaths = new Path(inputFiles); List<String> inputFileList = new LinkedList<>(); try {/*w ww . j a v a2 s. c o m*/ FileSystem fs = inputFilePaths.getFileSystem(conf); RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(inputFilePaths, true); while (iterator.hasNext()) { String name = iterator.next().getPath().toUri().toString(); inputFileList.add(name); } } catch (IOException e) { LOG.error("Fail to get test files", e); } if (inputFileList.size() > 1) { LOG.info("Error CSR data shall be within a single file"); return null; } String filename = inputFileList.get(0); return loadCSRNumericTableImpl(filename, sep, context); }
From source file:edu.iu.datasource.HarpDAALDataSource.java
License:Apache License
public NumericTable[] loadCSRNumericTableAndLabel(String inputFiles, String sep, DaalContext context) throws IOException {//{{{ Path inputFilePaths = new Path(inputFiles); List<String> inputFileList = new LinkedList<>(); try {// ww w . ja v a 2 s.com FileSystem fs = inputFilePaths.getFileSystem(conf); RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(inputFilePaths, true); while (iterator.hasNext()) { String name = iterator.next().getPath().toUri().toString(); inputFileList.add(name); } } catch (IOException e) { LOG.error("Fail to get test files", e); } if (inputFileList.size() > 1) { LOG.info("Error CSR data shall be within a single file"); return null; } String filename = inputFileList.get(0); return loadCSRNumericTableAndLabelImpl(filename, sep, context); }
From source file:gaffer.accumulostore.operation.hdfs.handler.job.tool.FetchElementsFromHdfsTool.java
License:Apache License
private void checkHdfsDirectories(final AddElementsFromHdfs operation) throws IOException { LOGGER.info("Checking that the correct HDFS directories exist"); final FileSystem fs = FileSystem.get(getConf()); final Path outputPath = new Path(operation.getOutputPath()); LOGGER.info("Ensuring output directory {} doesn't exist", outputPath); if (fs.exists(outputPath)) { if (fs.listFiles(outputPath, true).hasNext()) { LOGGER.error("Output directory exists and is not empty: {}", outputPath); throw new IllegalArgumentException("Output directory exists and is not empty: " + outputPath); }/*w w w . ja va 2s. c o m*/ LOGGER.info("Output directory exists and is empty so deleting: {}", outputPath); fs.delete(outputPath, true); } final Path failurePath = new Path(operation.getFailurePath()); LOGGER.info("Ensuring failure directory {} exists", failurePath); if (fs.exists(failurePath)) { if (fs.listFiles(failurePath, true).hasNext()) { LOGGER.error("Failure directory exists and is not empty: {}", failurePath); throw new IllegalArgumentException("Failure directory is not empty: " + failurePath); } } else { LOGGER.info("Failure directory doesn't exist so creating: {}", failurePath); fs.mkdirs(failurePath); } IngestUtils.setDirectoryPermsForAccumulo(fs, failurePath); }
From source file:hws.core.JobClient.java
License:Apache License
public void run(String[] args) throws Exception { //final String command = args[0]; //final int n = Integer.valueOf(args[1]); //final Path jarPath = new Path(args[2]); Options options = new Options(); /*options.addOption(OptionBuilder.withLongOpt("jar") .withDescription( "Jar path" ) .hasArg()/*from ww w . ja v a 2 s .c om*/ .withArgName("JarPath") .create()); options.addOption(OptionBuilder.withLongOpt("scheduler") .withDescription( "Scheduler class name" ) .hasArg() .withArgName("ClassName") .create()); */options.addOption(OptionBuilder.withLongOpt("zk-servers") .withDescription("List of the ZooKeeper servers").hasArgs().withArgName("zkAddrs").create("zks")); //options.addOption("l", "list", false, "list modules"); options.addOption(OptionBuilder.withLongOpt("load").withDescription("load new modules").hasArgs() .withArgName("XMLFiles").create()); /*options.addOption(OptionBuilder.withLongOpt( "remove" ) .withDescription( "remove modules" ) .hasArgs() .withArgName("ModuleNames") .create("rm")); */CommandLineParser parser = new BasicParser(); CommandLine cmd = parser.parse(options, args); //Path jarPath = null; //String schedulerClassName = null; String[] xmlFileNames = null; //String []moduleNames = null; String zksArgs = ""; String[] zkServers = null; if (cmd.hasOption("zks")) { zksArgs = "-zks"; zkServers = cmd.getOptionValues("zks"); for (String zks : zkServers) { zksArgs += " " + zks; } } //Logger setup //FSDataOutputStream writer = FileSystem.get(conf).create(new Path("hdfs:///hws/apps/"+appIdStr+"/logs/jobClient.log")); //Logger.addOutputStream(writer); /*if(cmd.hasOption("l")){ LOG.warn("Argument --list (-l) is not supported yet."); } if(cmd.hasOption("jar")){ jarPath = new Path(cmd.getOptionValue("jar")); } if(cmd.hasOption("scheduler")){ schedulerClassName = cmd.getOptionValue("scheduler"); }*/ if (cmd.hasOption("load")) { xmlFileNames = cmd.getOptionValues("load"); } /*else if(cmd.hasOption("rm")){ moduleNames = cmd.getOptionValues("rm"); }*/ //LOG.info("Jar-Path "+jarPath); if (xmlFileNames != null) { String paths = ""; for (String path : xmlFileNames) { paths += path + "; "; } LOG.info("Load XMLs: " + paths); } /*if(moduleNames!=null){ String modules = ""; for(String module: moduleNames){ modules += module+"; "; } LOG.info("remove: "+modules); }*/ // Create yarnClient YarnConfiguration conf = new YarnConfiguration(); YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(conf); yarnClient.start(); // Create application via yarnClient YarnClientApplication app = yarnClient.createApplication(); System.out.println("LOG Path: " + ApplicationConstants.LOG_DIR_EXPANSION_VAR); // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); ZkClient zk = new ZkClient(zkServers[0]); //TODO select a ZooKeeper server if (!zk.exists("/hadoop-watershed")) { zk.createPersistent("/hadoop-watershed", ""); } zk.createPersistent("/hadoop-watershed/" + appId.toString(), ""); FileSystem fs = FileSystem.get(conf); LOG.info("Collecting files to upload"); fs.mkdirs(new Path("hdfs:///hws/apps/" + appId.toString())); fs.mkdirs(new Path("hdfs:///hws/apps/" + appId.toString() + "/logs")); ModulePipeline modulePipeline = ModulePipeline.fromXMLFiles(xmlFileNames); LOG.info("Uploading files to HDFS"); for (String path : modulePipeline.files()) { uploadFile(fs, new File(path), appId); } LOG.info("Upload finished"); String modulePipelineJson = Json.dumps(modulePipeline); String modulePipelineBase64 = Base64.encodeBase64String(StringUtils.getBytesUtf8(modulePipelineJson)) .replaceAll("\\s", ""); LOG.info("ModulePipeline: " + modulePipelineJson); //LOG.info("ModulePipeline: "+modulePipelineBase64); amContainer.setCommands(Collections.singletonList("$JAVA_HOME/bin/java" + " -Xmx256M" + " hws.core.JobMaster" + " -aid " + appId.toString() + " --load " + modulePipelineBase64 + " " + zksArgs + " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout" + " 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr")); // Setup jar for ApplicationMaster //LocalResource appMasterJar = Records.newRecord(LocalResource.class); //setupAppMasterJar(jarPath, appMasterJar); //amContainer.setLocalResources(Collections.singletonMap("hws.jar", appMasterJar)); LOG.info("Listing files for YARN-Watershed"); RemoteIterator<LocatedFileStatus> filesIterator = fs.listFiles(new Path("hdfs:///hws/bin/"), false); Map<String, LocalResource> resources = new HashMap<String, LocalResource>(); LOG.info("Files setup as resource"); while (filesIterator.hasNext()) { LocatedFileStatus fileStatus = filesIterator.next(); // Setup jar for ApplicationMaster LocalResource containerJar = Records.newRecord(LocalResource.class); ContainerUtils.setupContainerJar(fs, fileStatus.getPath(), containerJar); resources.put(fileStatus.getPath().getName(), containerJar); } LOG.info("container resource setup"); amContainer.setLocalResources(resources); fs.close(); //closing FileSystem interface // Setup CLASSPATH for ApplicationMaster Map<String, String> appMasterEnv = new HashMap<String, String>(); ContainerUtils.setupContainerEnv(appMasterEnv, conf); amContainer.setEnvironment(appMasterEnv); // Set up resource type requirements for ApplicationMaster Resource capability = Records.newRecord(Resource.class); capability.setMemory(256); capability.setVirtualCores(1); // Finally, set-up ApplicationSubmissionContext for the application //ApplicationSubmissionContext appContext = //app.getApplicationSubmissionContext(); appContext.setApplicationName("Hadoop-Watershed"); // application name appContext.setAMContainerSpec(amContainer); appContext.setResource(capability); appContext.setQueue("default"); // queue // Submit application LOG.info("Submitting application " + appId); yarnClient.submitApplication(appContext); LOG.info("Waiting for containers to finish"); zk.waitUntilExists("/hadoop-watershed/" + appId.toString() + "/done", TimeUnit.MILLISECONDS, 250); ApplicationReport appReport = yarnClient.getApplicationReport(appId); YarnApplicationState appState = appReport.getYarnApplicationState(); while (appState != YarnApplicationState.FINISHED && appState != YarnApplicationState.KILLED && appState != YarnApplicationState.FAILED) { Thread.sleep(100); appReport = yarnClient.getApplicationReport(appId); appState = appReport.getYarnApplicationState(); } System.out.println("Application " + appId + " finished with" + " state " + appState + " at " + appReport.getFinishTime()); System.out.println("deleting " + appId.toString() + " znode"); zk.deleteRecursive("/hadoop-watershed/" + appId.toString()); //TODO remove app folder from ZooKeeper }
From source file:hws.core.JobMaster.java
License:Apache License
public void onContainersAllocated(List<Container> containers) { FileSystem fs = null; try {/*from ww w. jav a 2 s .c o m*/ fs = FileSystem.get(getConfiguration()); } catch (IOException e) { Logger.severe(e.toString()); } for (Container container : containers) { try { //PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("/home/yarn/rcor/yarn/app-master-log.out"))); Logger.info("Selecting instance to container: " + container.getId().toString()); //dado o container, escolher a instancia que tem dado de entrada mais perto daquele container InstanceInfo instanceInfo = null; if (instances.get(modulePipeline.get(currentModuleIndex).filterInfo().name()) .instancesBuilt() >= modulePipeline.get(currentModuleIndex).numFilterInstances()) { currentModuleIndex++; } if (currentModuleIndex < modulePipeline.size()) { instanceInfo = instances.get(modulePipeline.get(currentModuleIndex).filterInfo().name()) .build(); } else break; String instanceInfoBase64 = Base64 .encodeBase64String(StringUtils.getBytesUtf8(Json.dumps(instanceInfo))) .replaceAll("\\s", ""); // Launch container by create ContainerLaunchContext ContainerLaunchContext ctx = Records.newRecord(ContainerLaunchContext.class); ctx.setCommands(Collections.singletonList( "$JAVA_HOME/bin/java -Xmx256M hws.core.InstanceDriver --load " + instanceInfoBase64 + " -aid " + this.appIdStr + " -cid " + container.getId().toString() + " " + this.zksArgs + " 1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout" + " 2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr")); Logger.info("Listing YARN-Watershed files for app-id: " + this.appIdStr); RemoteIterator<LocatedFileStatus> files = fs.listFiles(new Path("hdfs:///hws/bin/"), false); Map<String, LocalResource> resources = new HashMap<String, LocalResource>(); Logger.info("Setup YARN-Watershed files as resources"); while (files.hasNext()) { LocatedFileStatus fileStatus = files.next(); // Setup jar for ApplicationMaster LocalResource containerJar = Records.newRecord(LocalResource.class); ContainerUtils.setupContainerJar(fs, fileStatus.getPath(), containerJar); resources.put(fileStatus.getPath().getName(), containerJar); } Logger.info("Listing application files for app-id: " + this.appIdStr); files = fs.listFiles(new Path("hdfs:///hws/apps/" + this.appIdStr + "/"), false); Logger.info("Setup application files as resources"); while (files.hasNext()) { LocatedFileStatus fileStatus = files.next(); // Setup jar for ApplicationMaster LocalResource containerJar = Records.newRecord(LocalResource.class); ContainerUtils.setupContainerJar(fs, fileStatus.getPath(), containerJar); resources.put(fileStatus.getPath().getName(), containerJar); } Logger.info("container resource setup"); ctx.setLocalResources(resources); Logger.info("Environment setup"); // Setup CLASSPATH for ApplicationMaster Map<String, String> containerEnv = new HashMap<String, String>(); ContainerUtils.setupContainerEnv(containerEnv, getConfiguration()); ctx.setEnvironment(containerEnv); Logger.info("Starting containers"); Logger.info("[AM] Launching container " + container.getId()); nmClient.startContainer(container, ctx); Logger.info("Container started!"); /*String znode = "/hadoop-watershed/"+this.appIdStr+"/"+instanceInfo.filterInfo().name()+"/"+instanceInfo.instanceId(); out.println("Saving instance znode: "+znode); out.flush(); zk.createPersistent(znode, ""); zk.createPersistent(znode+"/host", container.getNodeId().getHost()); out.println("saved location: "+container.getNodeId().getHost()); out.flush(); */ if (instances.get(modulePipeline.get(currentModuleIndex).filterInfo().name()) .instancesBuilt() >= modulePipeline.get(currentModuleIndex).numFilterInstances()) { Logger.info("Starting via ZooKeeper filter: " + instanceInfo.filterInfo().name()); zk.createPersistent("/hadoop-watershed/" + this.appIdStr + "/" + instanceInfo.filterInfo().name() + "/start", ""); } //out.close(); } catch (Exception e) { Logger.severe("[AM] Error launching container " + container.getId() + " " + e); } } try { fs.close(); } catch (IOException e) { Logger.severe(e.toString()); } }
From source file:io.druid.indexer.updater.HadoopConverterJob.java
License:Apache License
public List<DataSegment> run() throws IOException { final JobConf jobConf = new JobConf(); jobConf.setKeepFailedTaskFiles(false); for (Map.Entry<String, String> entry : converterConfig.getHadoopProperties().entrySet()) { jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()"); }//from w w w. j a v a 2s . c o m final List<DataSegment> segments = converterConfig.getSegments(); if (segments.isEmpty()) { throw new IAE("No segments found for datasource [%s]", converterConfig.getDataSource()); } converterConfigIntoConfiguration(converterConfig, segments, jobConf); jobConf.setNumReduceTasks(0);// Map only. Number of map tasks determined by input format jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache())); setJobName(jobConf, segments); if (converterConfig.getJobPriority() != null) { jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority())); } final Job job = Job.getInstance(jobConf); job.setInputFormatClass(ConfigInputFormat.class); job.setMapperClass(ConvertingMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setMapSpeculativeExecution(false); job.setOutputFormatClass(ConvertingOutputFormat.class); JobHelper.setupClasspath(JobHelper.distributedClassPath(jobConf.getWorkingDirectory()), JobHelper.distributedClassPath(getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())), job); Throwable throwable = null; try { job.submit(); log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL()); final boolean success = job.waitForCompletion(true); if (!success) { final TaskReport[] reports = job.getTaskReports(TaskType.MAP); if (reports != null) { for (final TaskReport report : reports) { log.error("Error in task [%s] : %s", report.getTaskId(), Arrays.toString(report.getDiagnostics())); } } return null; } try { loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue(); writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue(); } catch (IOException ex) { log.error(ex, "Could not fetch counters"); } final JobID jobID = job.getJobID(); final Path jobDir = getJobPath(jobID, job.getWorkingDirectory()); final FileSystem fs = jobDir.getFileSystem(job.getConfiguration()); final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true); final List<Path> goodPaths = new ArrayList<>(); while (it.hasNext()) { final LocatedFileStatus locatedFileStatus = it.next(); if (locatedFileStatus.isFile()) { final Path myPath = locatedFileStatus.getPath(); if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) { goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY)); } } } if (goodPaths.isEmpty()) { log.warn("No good data found at [%s]", jobDir); return null; } final List<DataSegment> returnList = ImmutableList .copyOf(Lists.transform(goodPaths, new Function<Path, DataSegment>() { @Nullable @Override public DataSegment apply(final Path input) { try { if (!fs.exists(input)) { throw new ISE("Somehow [%s] was found but [%s] is missing at [%s]", ConvertingOutputFormat.DATA_SUCCESS_KEY, ConvertingOutputFormat.DATA_FILE_KEY, jobDir); } } catch (final IOException e) { throw Throwables.propagate(e); } try (final InputStream stream = fs.open(input)) { return HadoopDruidConverterConfig.jsonMapper.readValue(stream, DataSegment.class); } catch (final IOException e) { throw Throwables.propagate(e); } } })); if (returnList.size() == segments.size()) { return returnList; } else { throw new ISE( "Tasks reported success but result length did not match! Expected %d found %d at path [%s]", segments.size(), returnList.size(), jobDir); } } catch (InterruptedException | ClassNotFoundException e) { RuntimeException exception = Throwables.propagate(e); throwable = exception; throw exception; } catch (Throwable t) { throwable = t; throw t; } finally { try { cleanup(job); } catch (IOException e) { if (throwable != null) { throwable.addSuppressed(e); } else { log.error(e, "Could not clean up job [%s]", job.getJobID()); } } } }
From source file:io.druid.storage.hdfs.HdfsDataSegmentFinder.java
License:Apache License
@Override public Set<DataSegment> findSegments(String workingDirPathStr, boolean updateDescriptor) throws SegmentLoadingException { final Set<DataSegment> segments = Sets.newHashSet(); final Path workingDirPath = new Path(workingDirPathStr); FileSystem fs; try {/*ww w .j av a 2s . c om*/ fs = workingDirPath.getFileSystem(config); log.info(fs.getScheme()); log.info("FileSystem URI:" + fs.getUri().toString()); if (!fs.exists(workingDirPath)) { throw new SegmentLoadingException("Working directory [%s] doesn't exist.", workingDirPath); } if (!fs.isDirectory(workingDirPath)) { throw new SegmentLoadingException("Working directory [%s] is not a directory!?", workingDirPath); } final RemoteIterator<LocatedFileStatus> it = fs.listFiles(workingDirPath, true); while (it.hasNext()) { final LocatedFileStatus locatedFileStatus = it.next(); final Path path = locatedFileStatus.getPath(); if (path.getName().endsWith("descriptor.json")) { final Path indexZip; final String descriptorParts[] = path.getName().split("_"); if (descriptorParts.length == 2 && descriptorParts[1].equals("descriptor.json") && org.apache.commons.lang.StringUtils.isNumeric(descriptorParts[0])) { indexZip = new Path(path.getParent(), StringUtils.format("%s_index.zip", descriptorParts[0])); } else { indexZip = new Path(path.getParent(), "index.zip"); } if (fs.exists(indexZip)) { final DataSegment dataSegment = mapper.readValue(fs.open(path), DataSegment.class); log.info("Found segment [%s] located at [%s]", dataSegment.getIdentifier(), indexZip); final Map<String, Object> loadSpec = dataSegment.getLoadSpec(); final String pathWithoutScheme = indexZip.toUri().getPath(); if (!loadSpec.get("type").equals(HdfsStorageDruidModule.SCHEME) || !loadSpec.get("path").equals(pathWithoutScheme)) { loadSpec.put("type", HdfsStorageDruidModule.SCHEME); loadSpec.put("path", pathWithoutScheme); if (updateDescriptor) { log.info("Updating loadSpec in descriptor.json at [%s] with new path [%s]", path, pathWithoutScheme); mapper.writeValue(fs.create(path, true), dataSegment); } } segments.add(dataSegment); } else { throw new SegmentLoadingException( "index.zip didn't exist at [%s] while descripter.json exists!?", indexZip); } } } } catch (IOException e) { throw new SegmentLoadingException(e, "Problems interacting with filesystem[%s].", workingDirPath); } return segments; }
From source file:io.druid.storage.hdfs.HdfsDataSegmentPuller.java
License:Apache License
public FileUtils.FileCopyResult getSegmentFiles(final Path path, final File outDir) throws SegmentLoadingException { final LocalFileSystem localFileSystem = new LocalFileSystem(); try {/*from w w w. j a v a2s .c o m*/ final FileSystem fs = path.getFileSystem(config); if (fs.isDirectory(path)) { // -------- directory --------- try { return RetryUtils.retry(new Callable<FileUtils.FileCopyResult>() { @Override public FileUtils.FileCopyResult call() throws Exception { if (!fs.exists(path)) { throw new SegmentLoadingException("No files found at [%s]", path.toString()); } final RemoteIterator<LocatedFileStatus> children = fs.listFiles(path, false); final ArrayList<FileUtils.FileCopyResult> localChildren = new ArrayList<>(); final FileUtils.FileCopyResult result = new FileUtils.FileCopyResult(); while (children.hasNext()) { final LocatedFileStatus child = children.next(); final Path childPath = child.getPath(); final String fname = childPath.getName(); if (fs.isDirectory(childPath)) { log.warn("[%s] is a child directory, skipping", childPath.toString()); } else { final File outFile = new File(outDir, fname); // Actual copy fs.copyToLocalFile(childPath, new Path(outFile.toURI())); result.addFile(outFile); } } log.info("Copied %d bytes from [%s] to [%s]", result.size(), path.toString(), outDir.getAbsolutePath()); return result; } }, shouldRetryPredicate(), DEFAULT_RETRY_COUNT); } catch (Exception e) { throw Throwables.propagate(e); } } else if (CompressionUtils.isZip(path.getName())) { // -------- zip --------- final FileUtils.FileCopyResult result = CompressionUtils.unzip(new ByteSource() { @Override public InputStream openStream() throws IOException { return getInputStream(path); } }, outDir, shouldRetryPredicate(), false); log.info("Unzipped %d bytes from [%s] to [%s]", result.size(), path.toString(), outDir.getAbsolutePath()); return result; } else if (CompressionUtils.isGz(path.getName())) { // -------- gzip --------- final String fname = path.getName(); final File outFile = new File(outDir, CompressionUtils.getGzBaseName(fname)); final FileUtils.FileCopyResult result = CompressionUtils.gunzip(new ByteSource() { @Override public InputStream openStream() throws IOException { return getInputStream(path); } }, outFile); log.info("Gunzipped %d bytes from [%s] to [%s]", result.size(), path.toString(), outFile.getAbsolutePath()); return result; } else { throw new SegmentLoadingException("Do not know how to handle file type at [%s]", path.toString()); } } catch (IOException e) { throw new SegmentLoadingException(e, "Error loading [%s]", path.toString()); } }