List of usage examples for org.apache.hadoop.fs FileSystem listFiles
public RemoteIterator<LocatedFileStatus> listFiles(final Path f, final boolean recursive) throws FileNotFoundException, IOException
From source file:org.apache.drill.TestDynamicUDFSupport.java
License:Apache License
@Test public void testSuccessfulRegistration() throws Exception { copyDefaultJarsToStagingArea();//w w w . j a va2 s .co m String summary = "The following UDFs in jar %s have been registered:\n" + "[custom_lower(VARCHAR-REQUIRED)]"; testBuilder().sqlQuery("create function using jar '%s'", default_binary_name).unOrdered() .baselineColumns("ok", "summary").baselineValues(true, String.format(summary, default_binary_name)) .go(); RemoteFunctionRegistry remoteFunctionRegistry = getDrillbitContext().getRemoteFunctionRegistry(); FileSystem fs = remoteFunctionRegistry.getFs(); assertFalse("Staging area should be empty", fs.listFiles(remoteFunctionRegistry.getStagingArea(), false).hasNext()); assertFalse("Temporary area should be empty", fs.listFiles(remoteFunctionRegistry.getTmpArea(), false).hasNext()); assertTrue("Binary should be present in registry area", fs.exists(new Path(remoteFunctionRegistry.getRegistryArea(), default_binary_name))); assertTrue("Source should be present in registry area", fs.exists(new Path(remoteFunctionRegistry.getRegistryArea(), default_source_name))); Registry registry = remoteFunctionRegistry.getRegistry(new DataChangeVersion()); assertEquals("Registry should contain one jar", registry.getJarList().size(), 1); assertEquals(registry.getJar(0).getName(), default_binary_name); }
From source file:org.apache.drill.TestDynamicUDFSupport.java
License:Apache License
@Test public void testSuccessfulRegistrationAfterSeveralRetryAttempts() throws Exception { RemoteFunctionRegistry remoteFunctionRegistry = spyRemoteFunctionRegistry(); copyDefaultJarsToStagingArea();/*from w w w.jav a 2 s . c om*/ doThrow(new VersionMismatchException("Version mismatch detected", 1)) .doThrow(new VersionMismatchException("Version mismatch detected", 1)).doCallRealMethod() .when(remoteFunctionRegistry).updateRegistry(any(Registry.class), any(DataChangeVersion.class)); String summary = "The following UDFs in jar %s have been registered:\n" + "[custom_lower(VARCHAR-REQUIRED)]"; testBuilder().sqlQuery("create function using jar '%s'", default_binary_name).unOrdered() .baselineColumns("ok", "summary").baselineValues(true, String.format(summary, default_binary_name)) .go(); verify(remoteFunctionRegistry, times(3)).updateRegistry(any(Registry.class), any(DataChangeVersion.class)); FileSystem fs = remoteFunctionRegistry.getFs(); assertFalse("Staging area should be empty", fs.listFiles(remoteFunctionRegistry.getStagingArea(), false).hasNext()); assertFalse("Temporary area should be empty", fs.listFiles(remoteFunctionRegistry.getTmpArea(), false).hasNext()); assertTrue("Binary should be present in registry area", fs.exists(new Path(remoteFunctionRegistry.getRegistryArea(), default_binary_name))); assertTrue("Source should be present in registry area", fs.exists(new Path(remoteFunctionRegistry.getRegistryArea(), default_source_name))); Registry registry = remoteFunctionRegistry.getRegistry(new DataChangeVersion()); assertEquals("Registry should contain one jar", registry.getJarList().size(), 1); assertEquals(registry.getJar(0).getName(), default_binary_name); }
From source file:org.apache.drill.TestDynamicUDFSupport.java
License:Apache License
@Test public void testSuccessfulUnregistrationAfterSeveralRetryAttempts() throws Exception { RemoteFunctionRegistry remoteFunctionRegistry = spyRemoteFunctionRegistry(); copyDefaultJarsToStagingArea();/*w w w. j ava 2 s. c o m*/ test("create function using jar '%s'", default_binary_name); reset(remoteFunctionRegistry); doThrow(new VersionMismatchException("Version mismatch detected", 1)) .doThrow(new VersionMismatchException("Version mismatch detected", 1)).doCallRealMethod() .when(remoteFunctionRegistry).updateRegistry(any(Registry.class), any(DataChangeVersion.class)); String summary = "The following UDFs in jar %s have been unregistered:\n" + "[custom_lower(VARCHAR-REQUIRED)]"; testBuilder().sqlQuery("drop function using jar '%s'", default_binary_name).unOrdered() .baselineColumns("ok", "summary").baselineValues(true, String.format(summary, default_binary_name)) .go(); verify(remoteFunctionRegistry, times(3)).updateRegistry(any(Registry.class), any(DataChangeVersion.class)); FileSystem fs = remoteFunctionRegistry.getFs(); assertFalse("Registry area should be empty", fs.listFiles(remoteFunctionRegistry.getRegistryArea(), false).hasNext()); assertEquals("Registry should be empty", remoteFunctionRegistry.getRegistry(new DataChangeVersion()).getJarList().size(), 0); }
From source file:org.apache.drill.TestDynamicUDFSupport.java
License:Apache License
@Test public void testExceedRetryAttemptsDuringRegistration() throws Exception { RemoteFunctionRegistry remoteFunctionRegistry = spyRemoteFunctionRegistry(); copyDefaultJarsToStagingArea();//from w ww. j av a2 s . c o m doThrow(new VersionMismatchException("Version mismatch detected", 1)).when(remoteFunctionRegistry) .updateRegistry(any(Registry.class), any(DataChangeVersion.class)); String summary = "Failed to update remote function registry. Exceeded retry attempts limit."; testBuilder().sqlQuery("create function using jar '%s'", default_binary_name).unOrdered() .baselineColumns("ok", "summary").baselineValues(false, summary).go(); verify(remoteFunctionRegistry, times(remoteFunctionRegistry.getRetryAttempts() + 1)) .updateRegistry(any(Registry.class), any(DataChangeVersion.class)); FileSystem fs = remoteFunctionRegistry.getFs(); assertTrue("Binary should be present in staging area", fs.exists(new Path(remoteFunctionRegistry.getStagingArea(), default_binary_name))); assertTrue("Source should be present in staging area", fs.exists(new Path(remoteFunctionRegistry.getStagingArea(), default_source_name))); assertFalse("Registry area should be empty", fs.listFiles(remoteFunctionRegistry.getRegistryArea(), false).hasNext()); assertFalse("Temporary area should be empty", fs.listFiles(remoteFunctionRegistry.getTmpArea(), false).hasNext()); assertEquals("Registry should be empty", remoteFunctionRegistry.getRegistry(new DataChangeVersion()).getJarList().size(), 0); }
From source file:org.apache.drill.TestDynamicUDFSupport.java
License:Apache License
@Test public void testRegistrationFailDuringRegistryUpdate() throws Exception { final RemoteFunctionRegistry remoteFunctionRegistry = spyRemoteFunctionRegistry(); final FileSystem fs = remoteFunctionRegistry.getFs(); final String errorMessage = "Failure during remote registry update."; doAnswer(new Answer<Void>() { @Override/* w w w . j ava2 s . c om*/ public Void answer(InvocationOnMock invocation) throws Throwable { assertTrue("Binary should be present in registry area", fs.exists(new Path(remoteFunctionRegistry.getRegistryArea(), default_binary_name))); assertTrue("Source should be present in registry area", fs.exists(new Path(remoteFunctionRegistry.getRegistryArea(), default_source_name))); throw new RuntimeException(errorMessage); } }).when(remoteFunctionRegistry).updateRegistry(any(Registry.class), any(DataChangeVersion.class)); copyDefaultJarsToStagingArea(); testBuilder().sqlQuery("create function using jar '%s'", default_binary_name).unOrdered() .baselineColumns("ok", "summary").baselineValues(false, errorMessage).go(); assertFalse("Registry area should be empty", fs.listFiles(remoteFunctionRegistry.getRegistryArea(), false).hasNext()); assertFalse("Temporary area should be empty", fs.listFiles(remoteFunctionRegistry.getTmpArea(), false).hasNext()); assertTrue("Binary should be present in staging area", fs.exists(new Path(remoteFunctionRegistry.getStagingArea(), default_binary_name))); assertTrue("Source should be present in staging area", fs.exists(new Path(remoteFunctionRegistry.getStagingArea(), default_source_name))); }
From source file:org.apache.druid.indexer.updater.HadoopConverterJob.java
License:Apache License
public List<DataSegment> run() throws IOException { final JobConf jobConf = new JobConf(); jobConf.setKeepFailedTaskFiles(false); for (Map.Entry<String, String> entry : converterConfig.getHadoopProperties().entrySet()) { jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()"); }//from ww w . j ava2 s. c o m final List<DataSegment> segments = converterConfig.getSegments(); if (segments.isEmpty()) { throw new IAE("No segments found for datasource [%s]", converterConfig.getDataSource()); } converterConfigIntoConfiguration(converterConfig, segments, jobConf); jobConf.setNumReduceTasks(0); // Map only. Number of map tasks determined by input format jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache())); setJobName(jobConf, segments); if (converterConfig.getJobPriority() != null) { jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority())); } final Job job = Job.getInstance(jobConf); job.setInputFormatClass(ConfigInputFormat.class); job.setMapperClass(ConvertingMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setMapSpeculativeExecution(false); job.setOutputFormatClass(ConvertingOutputFormat.class); JobHelper.setupClasspath(JobHelper.distributedClassPath(jobConf.getWorkingDirectory()), JobHelper.distributedClassPath(getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())), job); Throwable throwable = null; try { job.submit(); log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL()); final boolean success = job.waitForCompletion(true); if (!success) { final TaskReport[] reports = job.getTaskReports(TaskType.MAP); if (reports != null) { for (final TaskReport report : reports) { log.error("Error in task [%s] : %s", report.getTaskId(), Arrays.toString(report.getDiagnostics())); } } return null; } try { loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue(); writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue(); } catch (IOException ex) { log.error(ex, "Could not fetch counters"); } final JobID jobID = job.getJobID(); final Path jobDir = getJobPath(jobID, job.getWorkingDirectory()); final FileSystem fs = jobDir.getFileSystem(job.getConfiguration()); final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true); final List<Path> goodPaths = new ArrayList<>(); while (it.hasNext()) { final LocatedFileStatus locatedFileStatus = it.next(); if (locatedFileStatus.isFile()) { final Path myPath = locatedFileStatus.getPath(); if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) { goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY)); } } } if (goodPaths.isEmpty()) { log.warn("No good data found at [%s]", jobDir); return null; } final List<DataSegment> returnList = ImmutableList .copyOf(Lists.transform(goodPaths, new Function<Path, DataSegment>() { @Nullable @Override public DataSegment apply(final Path input) { try { if (!fs.exists(input)) { throw new ISE("Somehow [%s] was found but [%s] is missing at [%s]", ConvertingOutputFormat.DATA_SUCCESS_KEY, ConvertingOutputFormat.DATA_FILE_KEY, jobDir); } } catch (final IOException e) { throw Throwables.propagate(e); } try (final InputStream stream = fs.open(input)) { return HadoopDruidConverterConfig.jsonMapper.readValue(stream, DataSegment.class); } catch (final IOException e) { throw Throwables.propagate(e); } } })); if (returnList.size() == segments.size()) { return returnList; } else { throw new ISE( "Tasks reported success but result length did not match! Expected %d found %d at path [%s]", segments.size(), returnList.size(), jobDir); } } catch (InterruptedException | ClassNotFoundException e) { RuntimeException exception = Throwables.propagate(e); throwable = exception; throw exception; } catch (Throwable t) { throwable = t; throw t; } finally { try { cleanup(job); } catch (IOException e) { if (throwable != null) { throwable.addSuppressed(e); } else { log.error(e, "Could not clean up job [%s]", job.getJobID()); } } } }
From source file:org.apache.druid.storage.hdfs.HdfsDataSegmentFinder.java
License:Apache License
@Override public Set<DataSegment> findSegments(String workingDirPathStr, boolean updateDescriptor) throws SegmentLoadingException { final Map<String, Pair<DataSegment, Long>> timestampedSegments = new HashMap<>(); final Path workingDirPath = new Path(workingDirPathStr); FileSystem fs; try {//from w w w . j a va2 s .co m fs = workingDirPath.getFileSystem(config); log.info(fs.getScheme()); log.info("FileSystem URI:" + fs.getUri().toString()); if (!fs.exists(workingDirPath)) { throw new SegmentLoadingException("Working directory [%s] doesn't exist.", workingDirPath); } if (!fs.isDirectory(workingDirPath)) { throw new SegmentLoadingException("Working directory [%s] is not a directory!?", workingDirPath); } final RemoteIterator<LocatedFileStatus> it = fs.listFiles(workingDirPath, true); while (it.hasNext()) { final LocatedFileStatus locatedFileStatus = it.next(); final Path path = locatedFileStatus.getPath(); if (path.getName().endsWith("descriptor.json")) { // There are 3 supported path formats: // - hdfs://nn1/hdfs_base_directory/data_source_name/interval/version/shardNum/descriptor.json // - hdfs://nn1/hdfs_base_directory/data_source_name/interval/version/shardNum_descriptor.json // - hdfs://nn1/hdfs_base_directory/data_source_name/interval/version/shardNum_UUID_descriptor.json final String descriptorParts[] = path.getName().split("_"); Path indexZip = new Path(path.getParent(), "index.zip"); if (descriptorParts.length > 1) { Preconditions .checkState( descriptorParts.length <= 3 && org.apache.commons.lang.StringUtils.isNumeric(descriptorParts[0]) && "descriptor.json" .equals(descriptorParts[descriptorParts.length - 1]), "Unexpected descriptor filename format [%s]", path); indexZip = new Path(path.getParent(), StringUtils.format("%s_%sindex.zip", descriptorParts[0], descriptorParts.length == 2 ? "" : descriptorParts[1] + "_")); } if (fs.exists(indexZip)) { final DataSegment dataSegment = mapper.readValue(fs.open(path), DataSegment.class); log.info("Found segment [%s] located at [%s]", dataSegment.getIdentifier(), indexZip); final Map<String, Object> loadSpec = dataSegment.getLoadSpec(); final String pathWithoutScheme = indexZip.toUri().getPath(); if (!loadSpec.get("type").equals(HdfsStorageDruidModule.SCHEME) || !loadSpec.get("path").equals(pathWithoutScheme)) { loadSpec.put("type", HdfsStorageDruidModule.SCHEME); loadSpec.put("path", pathWithoutScheme); if (updateDescriptor) { log.info("Updating loadSpec in descriptor.json at [%s] with new path [%s]", path, pathWithoutScheme); mapper.writeValue(fs.create(path, true), dataSegment); } } DataSegmentFinder.putInMapRetainingNewest(timestampedSegments, dataSegment, locatedFileStatus.getModificationTime()); } else { throw new SegmentLoadingException( "index.zip didn't exist at [%s] while descripter.json exists!?", indexZip); } } } } catch (IOException e) { throw new SegmentLoadingException(e, "Problems interacting with filesystem[%s].", workingDirPath); } return timestampedSegments.values().stream().map(x -> x.lhs).collect(Collectors.toSet()); }
From source file:org.apache.druid.storage.hdfs.HdfsDataSegmentPuller.java
License:Apache License
FileUtils.FileCopyResult getSegmentFiles(final Path path, final File outDir) throws SegmentLoadingException { try {/*www . java 2 s .c o m*/ org.apache.commons.io.FileUtils.forceMkdir(outDir); } catch (IOException e) { throw new SegmentLoadingException(e, ""); } try { final FileSystem fs = path.getFileSystem(config); if (fs.isDirectory(path)) { // -------- directory --------- try { return RetryUtils.retry(() -> { if (!fs.exists(path)) { throw new SegmentLoadingException("No files found at [%s]", path.toString()); } final RemoteIterator<LocatedFileStatus> children = fs.listFiles(path, false); final FileUtils.FileCopyResult result = new FileUtils.FileCopyResult(); while (children.hasNext()) { final LocatedFileStatus child = children.next(); final Path childPath = child.getPath(); final String fname = childPath.getName(); if (fs.isDirectory(childPath)) { log.warn("[%s] is a child directory, skipping", childPath.toString()); } else { final File outFile = new File(outDir, fname); try (final FSDataInputStream in = fs.open(childPath)) { NativeIO.chunkedCopy(in, outFile); } result.addFile(outFile); } } log.info("Copied %d bytes from [%s] to [%s]", result.size(), path.toString(), outDir.getAbsolutePath()); return result; }, shouldRetryPredicate(), DEFAULT_RETRY_COUNT); } catch (Exception e) { throw new RuntimeException(e); } } else if (CompressionUtils.isZip(path.getName())) { // -------- zip --------- final FileUtils.FileCopyResult result = CompressionUtils.unzip(new ByteSource() { @Override public InputStream openStream() throws IOException { return getInputStream(path); } }, outDir, shouldRetryPredicate(), false); log.info("Unzipped %d bytes from [%s] to [%s]", result.size(), path.toString(), outDir.getAbsolutePath()); return result; } else if (CompressionUtils.isGz(path.getName())) { // -------- gzip --------- final String fname = path.getName(); final File outFile = new File(outDir, CompressionUtils.getGzBaseName(fname)); final FileUtils.FileCopyResult result = CompressionUtils.gunzip(new ByteSource() { @Override public InputStream openStream() throws IOException { return getInputStream(path); } }, outFile); log.info("Gunzipped %d bytes from [%s] to [%s]", result.size(), path.toString(), outFile.getAbsolutePath()); return result; } else { throw new SegmentLoadingException("Do not know how to handle file type at [%s]", path.toString()); } } catch (IOException e) { throw new SegmentLoadingException(e, "Error loading [%s]", path.toString()); } }
From source file:org.apache.falcon.extensions.store.ExtensionStore.java
License:Apache License
public String getResource(final String extensionResourcePath) throws FalconException { StringBuilder definition = new StringBuilder(); Path resourcePath = new Path(extensionResourcePath); FileSystem fileSystem = HadoopClientFactory.get().createFalconFileSystem(resourcePath.toUri()); try {//from www .j a va 2s . co m if (fileSystem.isFile(resourcePath)) { definition.append(getExtensionResource(extensionResourcePath.toString())); } else { RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem.listFiles(resourcePath, false); while (fileStatusListIterator.hasNext()) { LocatedFileStatus fileStatus = fileStatusListIterator.next(); Path filePath = fileStatus.getPath(); definition.append("Contents of file ").append(filePath.getName()).append(":\n"); definition.append(getExtensionResource(filePath.toString())).append("\n \n"); } } } catch (IOException e) { LOG.error("Exception while getting file(s) with path : " + extensionResourcePath, e); throw new StoreAccessException(e); } return definition.toString(); }
From source file:org.apache.falcon.regression.core.util.HadoopUtil.java
License:Apache License
/** * Recursively retrieves all data file names from a given location. * @param fs filesystem/*from www .ja v a 2 s.c om*/ * @param location given location * @return list of all files * @throws IOException */ public static List<Path> getAllFilesRecursivelyHDFS(FileSystem fs, Path location) throws IOException { List<Path> returnList = new ArrayList<>(); RemoteIterator<LocatedFileStatus> remoteIterator; try { remoteIterator = fs.listFiles(location, true); } catch (FileNotFoundException e) { LOGGER.info("Path '" + location + "' is not found on " + fs.getUri()); return returnList; } while (remoteIterator.hasNext()) { Path path = remoteIterator.next().getPath(); if (!path.toUri().toString().contains("_SUCCESS")) { returnList.add(path); } } return returnList; }