List of usage examples for org.apache.hadoop.fs LocatedFileStatus getLen
public long getLen()
From source file:ch.cern.db.hdfs.DistributedFileSystemMetadata.java
License:GNU General Public License
public LinkedList<BlockLocation> getBlockLocations(Path path) throws IOException { LOG.info("Collecting block locations..."); LinkedList<BlockLocation> blockLocations = new LinkedList<BlockLocation>(); RemoteIterator<LocatedFileStatus> statuses = listFiles(path, true); int hasNextCode = hasNextCode(statuses); while (hasNextCode > 0) { if (hasNextCode > 1) { hasNextCode = hasNextCode(statuses); continue; }/*from w w w . j av a 2 s . c o m*/ LocatedFileStatus fileStatus = statuses.next(); if (fileStatus.isFile()) { BlockLocation[] blockLocations_tmp = getFileBlockLocations(fileStatus, 0, fileStatus.getLen()); blockLocations.addAll(Arrays.asList(blockLocations_tmp)); } int size = blockLocations.size(); if (size > 0 && size % 5000 == 0) LOG.info("Collected " + size + " locations. Still in progress..."); if (size >= MAX_NUMBER_OF_LOCATIONS) { LOG.info("Reached max number of locations to collect. The amount will be representative enough."); break; } hasNextCode = hasNextCode(statuses); } LOG.info("Collected " + blockLocations.size() + " locations."); if (isHdfsBlocksMetadataEnabled()) { BlockStorageLocation[] blockStorageLocations = getFileBlockStorageLocations(blockLocations); blockLocations.clear(); blockLocations.addAll(Arrays.asList(blockStorageLocations)); } else { LOG.error("VolumnId/DiskId can not be collected since " + "dfs.datanode.hdfs-blocks-metadata.enabled is not enabled."); } return blockLocations; }
From source file:com.facebook.presto.hive.BackgroundHiveSplitLoader.java
License:Apache License
private CompletableFuture<?> loadSplits() throws IOException { HiveFileIterator files = fileIterators.poll(); if (files == null) { HivePartitionMetadata partition = partitions.poll(); if (partition == null) { return COMPLETED_FUTURE; }/*from w w w . ja v a2 s . com*/ loadPartition(partition); return COMPLETED_FUTURE; } while (files.hasNext() && !stopped) { LocatedFileStatus file = files.next(); if (isDirectory(file)) { if (recursiveDirWalkerEnabled) { HiveFileIterator fileIterator = new HiveFileIterator(file.getPath(), files.getFileSystem(), files.getDirectoryLister(), files.getNamenodeStats(), files.getPartitionName(), files.getInputFormat(), files.getSchema(), files.getPartitionKeys(), files.getEffectivePredicate(), files.getColumnCoercions()); fileIterators.add(fileIterator); } } else { boolean splittable = isSplittable(files.getInputFormat(), hdfsEnvironment.getFileSystem(session.getUser(), file.getPath()), file.getPath()); CompletableFuture<?> future = hiveSplitSource.addToQueue(createHiveSplits(files.getPartitionName(), file.getPath().toString(), file.getBlockLocations(), 0, file.getLen(), files.getSchema(), files.getPartitionKeys(), splittable, session, OptionalInt.empty(), files.getEffectivePredicate(), files.getColumnCoercions())); if (!future.isDone()) { fileIterators.addFirst(files); return future; } } } // No need to put the iterator back, since it's either empty or we've stopped return COMPLETED_FUTURE; }
From source file:com.facebook.presto.hive.util.InternalHiveSplitFactory.java
License:Apache License
private Optional<InternalHiveSplit> createInternalHiveSplit(LocatedFileStatus status, OptionalInt bucketNumber, boolean splittable) { splittable = splittable && isSplittable(inputFormat, fileSystem, status.getPath()); return createInternalHiveSplit(status.getPath(), status.getBlockLocations(), 0, status.getLen(), status.getLen(), bucketNumber, splittable); }
From source file:com.toy.TomcatContainerRunnable.java
License:Apache License
@Override public void run() { LOG.info("Setting up Tomcat container launch for container id {} / war {}", container.getId(), war); ContainerLaunchContext ctx = Records.newRecord(ContainerLaunchContext.class); // Set the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); try {//from ww w . jav a 2 s . co m final RemoteIterator<LocatedFileStatus> libs = fs.listFiles(path, false); while (libs.hasNext()) { final LocatedFileStatus next = libs.next(); LOG.debug("Register {} for container", next.getPath()); LocalResource lib = Records.newRecord(LocalResource.class); lib.setType(LocalResourceType.FILE); lib.setVisibility(LocalResourceVisibility.APPLICATION); lib.setResource(ConverterUtils.getYarnUrlFromURI(next.getPath().toUri())); lib.setTimestamp(next.getModificationTime()); lib.setSize(next.getLen()); localResources.put(next.getPath().getName(), lib); } ctx.setLocalResources(localResources); } catch (IOException e) { LOG.error("Error while fetching Tomcat libraries : {}", e.getLocalizedMessage(), e); } // Build classpath StringBuilder classPathEnv = new StringBuilder(ApplicationConstants.Environment.CLASSPATH.$()) .append(File.pathSeparatorChar).append("./*"); for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) { classPathEnv.append(File.pathSeparatorChar); classPathEnv.append(c.trim()); } classPathEnv.append(File.pathSeparatorChar).append("./log4j.properties"); // add the runtime classpath needed for tests to work if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { classPathEnv.append(':'); classPathEnv.append(System.getProperty("java.class.path")); } Map<String, String> env = new HashMap<String, String>(); env.put("CLASSPATH", classPathEnv.toString()); env.put(Constants.WAR, war); env.put(Constants.ZOOKEEPER_QUORUM, System.getenv(Constants.ZOOKEEPER_QUORUM)); ctx.setEnvironment(env); // Set the necessary command to execute the application master Vector<CharSequence> vargs = new Vector<CharSequence>(30); // Set java executable command LOG.info("Setting up app master command"); vargs.add(ApplicationConstants.Environment.JAVA_HOME.$() + "/bin/java"); // Set Xmx based on am memory size vargs.add("-Xmx" + 32 + "m"); vargs.add("com.toy.TomcatLauncher"); vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/Tomcat.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/Tomcat.stderr"); // Get final commmand StringBuilder command = new StringBuilder(); for (CharSequence str : vargs) { command.append(str).append(" "); } LOG.info("Completed setting up app master command " + command.toString()); List<String> commands = new ArrayList<String>(); commands.add(command.toString()); ctx.setCommands(commands); nmClientAsync.startContainerAsync(container, ctx); }
From source file:org.apache.falcon.service.SharedLibraryHostingService.java
License:Apache License
private void pushExtensionArtifactsToCluster(final Cluster cluster, final FileSystem clusterFs) throws FalconException { if (!Services.get().isRegistered(ExtensionService.SERVICE_NAME)) { LOG.info("ExtensionService not registered, return"); return;/*from w ww .ja va2 s.c o m*/ } ExtensionStore store = ExtensionStore.get(); if (!store.isExtensionStoreInitialized()) { LOG.info( "Extension store not initialized by Extension service. Make sure Extension service is added in " + "start up properties"); return; } final String filterPath = "/apps/falcon/extensions/mirroring/"; Path extensionStorePath = store.getExtensionStorePath(); LOG.info("extensionStorePath :{}", extensionStorePath); FileSystem falconFileSystem = HadoopClientFactory.get().createFalconFileSystem(extensionStorePath.toUri()); String nameNode = StringUtils .removeEnd(falconFileSystem.getConf().get(HadoopClientFactory.FS_DEFAULT_NAME_KEY), File.separator); String clusterStorageUrl = StringUtils.removeEnd(ClusterHelper.getStorageUrl(cluster), File.separator); // If default fs for Falcon server is same as cluster fs abort copy if (nameNode.equalsIgnoreCase(clusterStorageUrl)) { LOG.info("clusterStorageUrl :{} same return", clusterStorageUrl); return; } try { RemoteIterator<LocatedFileStatus> fileStatusListIterator = falconFileSystem .listFiles(extensionStorePath, true); while (fileStatusListIterator.hasNext()) { LocatedFileStatus srcfileStatus = fileStatusListIterator.next(); Path filePath = Path.getPathWithoutSchemeAndAuthority(srcfileStatus.getPath()); if (filePath != null && filePath.toString().startsWith(filterPath)) { /* HiveDR uses filter path as store path in DRStatusStore, so skip it. Copy only the extension artifacts */ continue; } if (srcfileStatus.isDirectory()) { if (!clusterFs.exists(filePath)) { HadoopClientFactory.mkdirs(clusterFs, filePath, srcfileStatus.getPermission()); } } else { if (clusterFs.exists(filePath)) { FileStatus targetfstat = clusterFs.getFileStatus(filePath); if (targetfstat.getLen() == srcfileStatus.getLen()) { continue; } } Path parentPath = filePath.getParent(); if (!clusterFs.exists(parentPath)) { FsPermission dirPerm = falconFileSystem.getFileStatus(parentPath).getPermission(); HadoopClientFactory.mkdirs(clusterFs, parentPath, dirPerm); } FileUtil.copy(falconFileSystem, srcfileStatus, clusterFs, filePath, false, true, falconFileSystem.getConf()); FileUtil.chmod(clusterFs.makeQualified(filePath).toString(), srcfileStatus.getPermission().toString()); } } } catch (IOException | InterruptedException e) { throw new FalconException("Failed to copy extension artifacts to cluster" + cluster.getName(), e); } }
From source file:org.apache.flink.streaming.connectors.fs.bucketing.BucketingSinkFaultTolerance2ITCase.java
License:Apache License
@Override public void postSubmit() throws Exception { // We read the files and verify that we have read all the strings. If a valid-length // file exists we only read the file to that point. (This test should work with // FileSystems that support truncate() and with others as well.) Pattern messageRegex = Pattern.compile("message (\\d*)"); // Keep a set of the message IDs that we read. The size must equal the read count and // the NUM_STRINGS. If numRead is bigger than the size of the set we have seen some // elements twice. Set<Integer> readNumbers = Sets.newHashSet(); int numRead = 0; RemoteIterator<LocatedFileStatus> files = dfs.listFiles(new Path(outPath), true); while (files.hasNext()) { LocatedFileStatus file = files.next(); if (!file.getPath().toString().endsWith(".valid-length")) { int validLength = (int) file.getLen(); if (dfs.exists(file.getPath().suffix(".valid-length"))) { FSDataInputStream inStream = dfs.open(file.getPath().suffix(".valid-length")); String validLengthString = inStream.readUTF(); validLength = Integer.parseInt(validLengthString); System.out.println("VALID LENGTH: " + validLength); }//from w ww . j a v a 2 s .c o m FSDataInputStream inStream = dfs.open(file.getPath()); byte[] buffer = new byte[validLength]; inStream.readFully(0, buffer, 0, validLength); inStream.close(); ByteArrayInputStream bais = new ByteArrayInputStream(buffer); InputStreamReader inStreamReader = new InputStreamReader(bais); BufferedReader br = new BufferedReader(inStreamReader); String line = br.readLine(); while (line != null) { Matcher matcher = messageRegex.matcher(line); if (matcher.matches()) { numRead++; int messageId = Integer.parseInt(matcher.group(1)); readNumbers.add(messageId); } else { Assert.fail("Read line does not match expected pattern."); } line = br.readLine(); } br.close(); inStreamReader.close(); bais.close(); } } // Verify that we read all strings (at-least-once) Assert.assertEquals(NUM_STRINGS, readNumbers.size()); // Verify that we don't have duplicates (boom!, exactly-once) Assert.assertEquals(NUM_STRINGS, numRead); }
From source file:org.apache.flink.streaming.connectors.fs.bucketing.BucketingSinkFaultToleranceITCase.java
License:Apache License
@Override public void postSubmit() throws Exception { // We read the files and verify that we have read all the strings. If a valid-length // file exists we only read the file to that point. (This test should work with // FileSystems that support truncate() and with others as well.) Pattern messageRegex = Pattern.compile("message (\\d*)"); // Keep a set of the message IDs that we read. The size must equal the read count and // the NUM_STRINGS. If numRead is bigger than the size of the set we have seen some // elements twice. Set<Integer> readNumbers = Sets.newHashSet(); HashSet<String> uniqMessagesRead = new HashSet<>(); HashSet<String> messagesInCommittedFiles = new HashSet<>(); RemoteIterator<LocatedFileStatus> files = dfs.listFiles(new Path(outPath), true); while (files.hasNext()) { LocatedFileStatus file = files.next(); if (!file.getPath().toString().endsWith(".valid-length")) { int validLength = (int) file.getLen(); if (dfs.exists(file.getPath().suffix(".valid-length"))) { FSDataInputStream inStream = dfs.open(file.getPath().suffix(".valid-length")); String validLengthString = inStream.readUTF(); validLength = Integer.parseInt(validLengthString); System.out.println("VALID LENGTH: " + validLength); }//w w w . jav a2 s . co m FSDataInputStream inStream = dfs.open(file.getPath()); byte[] buffer = new byte[validLength]; inStream.readFully(0, buffer, 0, validLength); inStream.close(); ByteArrayInputStream bais = new ByteArrayInputStream(buffer); InputStreamReader inStreamReader = new InputStreamReader(bais); BufferedReader br = new BufferedReader(inStreamReader); String line = br.readLine(); while (line != null) { Matcher matcher = messageRegex.matcher(line); if (matcher.matches()) { uniqMessagesRead.add(line); // check that in the committed files there are no duplicates if (!file.getPath().toString().endsWith(IN_PROGRESS_SUFFIX) && !file.getPath().toString().endsWith(PENDING_SUFFIX)) { if (!messagesInCommittedFiles.add(line)) { Assert.fail("Duplicate entry in committed bucket."); } } int messageId = Integer.parseInt(matcher.group(1)); readNumbers.add(messageId); } else { Assert.fail("Read line does not match expected pattern."); } line = br.readLine(); } br.close(); inStreamReader.close(); bais.close(); } } // Verify that we read all strings (at-least-once) Assert.assertEquals(NUM_STRINGS, readNumbers.size()); // Verify that we don't have duplicates (boom!, exactly-once) Assert.assertEquals(NUM_STRINGS, uniqMessagesRead.size()); }
From source file:org.apache.impala.catalog.HdfsTable.java
License:Apache License
/** * Drops and re-loads the block metadata for all partitions in 'partsByPath' whose * location is under the given 'dirPath'. It involves the following steps: * - Clear the current block metadata of the partitions. * - Call FileSystem.listStatus() on 'dirPath' to fetch the BlockLocations for each * file under it recursively./* w w w . j a v a 2 s. c o m*/ * - For every valid data file, map it to a partition from 'partsByPath' (if one exists) * and enumerate all its blocks and their corresponding hosts and disk IDs. * Requires that 'dirPath' and all paths in 'partsByPath' have consistent qualification * (either fully qualified or unqualified), for isDescendantPath(). * TODO: Split this method into more logical methods for cleaner code. */ private void loadBlockMetadata(Path dirPath, HashMap<Path, List<HdfsPartition>> partsByPath) { try { FileSystem fs = dirPath.getFileSystem(CONF); // No need to load blocks for empty partitions list. if (partsByPath.size() == 0 || !fs.exists(dirPath)) return; if (LOG.isTraceEnabled()) { LOG.trace("Loading block md for " + name_ + " directory " + dirPath.toString()); } // Clear the state of partitions under dirPath since they are going to be updated // based on the current snapshot of files in the directory. List<HdfsPartition> dirPathPartitions = partsByPath.get(dirPath); if (dirPathPartitions != null) { // The dirPath is a partition directory. This means the path is the root of an // unpartitioned table, or the path of at least one partition. for (HdfsPartition partition : dirPathPartitions) { partition.setFileDescriptors(new ArrayList<FileDescriptor>()); } } else { // The dirPath is not a partition directory. We expect it to be an ancestor of // partition paths (e.g., the table root). Clear all partitions whose paths are // a descendant of dirPath. for (Map.Entry<Path, List<HdfsPartition>> entry : partsByPath.entrySet()) { Path partDir = entry.getKey(); if (!FileSystemUtil.isDescendantPath(partDir, dirPath)) continue; for (HdfsPartition partition : entry.getValue()) { partition.setFileDescriptors(new ArrayList<FileDescriptor>()); } } } // For file systems that do not support BlockLocation API, we manually synthesize // block location metadata based on file formats. if (!FileSystemUtil.supportsStorageIds(fs)) { synthesizeBlockMetadata(fs, dirPath, partsByPath); return; } int unknownDiskIdCount = 0; RemoteIterator<LocatedFileStatus> fileStatusIter = fs.listFiles(dirPath, true); while (fileStatusIter.hasNext()) { LocatedFileStatus fileStatus = fileStatusIter.next(); if (!FileSystemUtil.isValidDataFile(fileStatus)) continue; // Find the partition that this file belongs (if any). Path partPathDir = fileStatus.getPath().getParent(); Preconditions.checkNotNull(partPathDir); List<HdfsPartition> partitions = partsByPath.get(partPathDir); // Skip if this file does not belong to any known partition. if (partitions == null) { if (LOG.isTraceEnabled()) { LOG.trace("File " + fileStatus.getPath().toString() + " doesn't correspond " + " to a known partition. Skipping metadata load for this file."); } continue; } String fileName = fileStatus.getPath().getName(); FileDescriptor fd = new FileDescriptor(fileName, fileStatus.getLen(), fileStatus.getModificationTime()); BlockLocation[] locations = fileStatus.getBlockLocations(); String partPathDirName = partPathDir.toString(); for (BlockLocation loc : locations) { Set<String> cachedHosts = Sets.newHashSet(loc.getCachedHosts()); // Enumerate all replicas of the block, adding any unknown hosts // to hostIndex_. We pick the network address from getNames() and // map it to the corresponding hostname from getHosts(). List<BlockReplica> replicas = Lists.newArrayListWithExpectedSize(loc.getNames().length); for (int i = 0; i < loc.getNames().length; ++i) { TNetworkAddress networkAddress = BlockReplica.parseLocation(loc.getNames()[i]); replicas.add(new BlockReplica(hostIndex_.getIndex(networkAddress), cachedHosts.contains(loc.getHosts()[i]))); } FileBlock currentBlock = new FileBlock(loc.getOffset(), loc.getLength(), replicas); THdfsFileBlock tHdfsFileBlock = currentBlock.toThrift(); fd.addThriftFileBlock(tHdfsFileBlock); unknownDiskIdCount += loadDiskIds(loc, tHdfsFileBlock); } if (LOG.isTraceEnabled()) { LOG.trace("Adding file md dir: " + partPathDirName + " file: " + fileName); } // Update the partitions' metadata that this file belongs to. for (HdfsPartition partition : partitions) { partition.getFileDescriptors().add(fd); numHdfsFiles_++; totalHdfsBytes_ += fd.getFileLength(); } } if (unknownDiskIdCount > 0) { if (LOG.isWarnEnabled()) { LOG.warn("Unknown disk id count for filesystem " + fs + ":" + unknownDiskIdCount); } } } catch (IOException e) { throw new RuntimeException( "Error loading block metadata for directory " + dirPath.toString() + ": " + e.getMessage(), e); } }
From source file:org.apache.impala.catalog.HdfsTable.java
License:Apache License
/** * For filesystems that don't support BlockLocation API, synthesize file blocks * by manually splitting the file range into fixed-size blocks. That way, scan * ranges can be derived from file blocks as usual. All synthesized blocks are given * an invalid network address so that the scheduler will treat them as remote. *//* w ww . j av a2s. c om*/ private void synthesizeBlockMetadata(FileSystem fs, Path dirPath, HashMap<Path, List<HdfsPartition>> partsByPath) throws IOException { RemoteIterator<LocatedFileStatus> fileStatusIter = fs.listFiles(dirPath, true); while (fileStatusIter.hasNext()) { LocatedFileStatus fileStatus = fileStatusIter.next(); if (!FileSystemUtil.isValidDataFile(fileStatus)) continue; Path partPathDir = fileStatus.getPath().getParent(); Preconditions.checkNotNull(partPathDir); List<HdfsPartition> partitions = partsByPath.get(partPathDir); // Skip if this file does not belong to any known partition. if (partitions == null) { if (LOG.isTraceEnabled()) { LOG.trace("File " + fileStatus.getPath().toString() + " doesn't correspond " + " to a known partition. Skipping metadata load for this file."); } continue; } String fileName = fileStatus.getPath().getName(); FileDescriptor fd = new FileDescriptor(fileName, fileStatus.getLen(), fileStatus.getModificationTime()); Preconditions.checkState(partitions.size() > 0); // For the purpose of synthesizing block metadata, we assume that all partitions // with the same location have the same file format. HdfsFileFormat fileFormat = partitions.get(0).getFileFormat(); synthesizeFdBlockMetadata(fs, fd, fileFormat); // Update the partitions' metadata that this file belongs to. for (HdfsPartition partition : partitions) { partition.getFileDescriptors().add(fd); numHdfsFiles_++; totalHdfsBytes_ += fd.getFileLength(); } } }
From source file:org.apache.tajo.engine.planner.TestPlannerUtil.java
License:Apache License
@Test public void testGetNonZeroLengthDataFiles() throws Exception { String queryFiles = ClassLoader.getSystemResource("queries").toString() + "/TestSelectQuery"; Path path = new Path(queryFiles); TableDesc tableDesc = new TableDesc(); tableDesc.setName("Test"); tableDesc.setPath(path.toUri());/* w w w .ja v a 2s. c o m*/ FileSystem fs = path.getFileSystem(util.getConfiguration()); List<Path> expectedFiles = new ArrayList<Path>(); RemoteIterator<LocatedFileStatus> files = fs.listFiles(path, true); while (files.hasNext()) { LocatedFileStatus file = files.next(); if (file.isFile() && file.getLen() > 0) { expectedFiles.add(file.getPath()); } } int fileNum = expectedFiles.size() / 5; int numResultFiles = 0; for (int i = 0; i <= 5; i++) { int start = i * fileNum; FragmentProto[] fragments = PhysicalPlanUtil.getNonZeroLengthDataFiles(util.getConfiguration(), tableDesc, start, fileNum); assertNotNull(fragments); numResultFiles += fragments.length; int expectedSize = fileNum; if (i == 5) { //last expectedSize = expectedFiles.size() - (fileNum * 5); } comparePath(expectedFiles, fragments, start, expectedSize); } assertEquals(expectedFiles.size(), numResultFiles); }