List of usage examples for org.apache.hadoop.fs FileStatus getPath
public Path getPath()
From source file:com.facebook.presto.hive.HiveSplitIterable.java
License:Apache License
private List<HiveSplit> createHiveSplits(String partitionName, FileStatus file, BlockLocation[] blockLocations, long start, long length, Properties schema, List<HivePartitionKey> partitionKeys, boolean splittable) throws IOException { ImmutableList.Builder<HiveSplit> builder = ImmutableList.builder(); if (splittable) { for (BlockLocation blockLocation : blockLocations) { // get the addresses for the block List<HostAddress> addresses = toHostAddress(blockLocation.getHosts()); // divide the block into uniform chunks that are smaller than the max split size int chunks = Math.max(1, (int) (blockLocation.getLength() / maxSplitSize.toBytes())); // when block does not divide evenly into chunks, make the chunk size slightly bigger than necessary long targetChunkSize = (long) Math.ceil(blockLocation.getLength() * 1.0 / chunks); long chunkOffset = 0; while (chunkOffset < blockLocation.getLength()) { // adjust the actual chunk size to account for the overrun when chunks are slightly bigger than necessary (see above) long chunkLength = Math.min(targetChunkSize, blockLocation.getLength() - chunkOffset); builder.add(new HiveSplit(clientId, table.getDbName(), table.getTableName(), partitionName, false, file.getPath().toString(), blockLocation.getOffset() + chunkOffset, chunkLength, schema, partitionKeys, addresses)); chunkOffset += chunkLength; }//w ww. j a va 2 s . c o m checkState(chunkOffset == blockLocation.getLength(), "Error splitting blocks"); } } else { // not splittable, use the hosts from the first block builder.add(new HiveSplit(clientId, table.getDbName(), table.getTableName(), partitionName, false, file.getPath().toString(), start, length, schema, partitionKeys, toHostAddress(blockLocations[0].getHosts()))); } return builder.build(); }
From source file:com.facebook.presto.hive.HiveSplitSourceProvider.java
License:Apache License
private void loadPartitionSplits(final HiveSplitSource hiveSplitSource, SuspendingExecutor suspendingExecutor, final ConnectorSession session) { final Semaphore semaphore = new Semaphore(maxPartitionBatchSize); try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) { ImmutableList.Builder<ListenableFuture<Void>> futureBuilder = ImmutableList.builder(); Iterator<String> nameIterator = partitionNames.iterator(); for (Partition partition : partitions) { checkState(nameIterator.hasNext(), "different number of partitions and partition names!"); final String partitionName = nameIterator.next(); final Properties schema = getPartitionSchema(table, partition); final List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition); Path path = new Path(getPartitionLocation(table, partition)); final Configuration configuration = hdfsEnvironment.getConfiguration(path); final InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, false); FileSystem fs = path.getFileSystem(configuration); if (inputFormat instanceof SymlinkTextInputFormat) { JobConf jobConf = new JobConf(configuration); FileInputFormat.setInputPaths(jobConf, path); InputSplit[] splits = inputFormat.getSplits(jobConf, 0); for (InputSplit rawSplit : splits) { FileSplit split = ((SymlinkTextInputFormat.SymlinkTextInputSplit) rawSplit) .getTargetSplit(); // get the filesystem for the target path -- it may be a different hdfs instance FileSystem targetFilesystem = split.getPath().getFileSystem(configuration); FileStatus fileStatus = targetFilesystem.getFileStatus(split.getPath()); hiveSplitSource.addToQueue(createHiveSplits(partitionName, fileStatus, targetFilesystem.getFileBlockLocations(fileStatus, split.getStart(), split.getLength()), split.getStart(), split.getLength(), schema, partitionKeys, false, session)); }//w w w .java 2s . c o m continue; } // TODO: this is currently serial across all partitions and should be done in suspendingExecutor if (bucket.isPresent()) { Optional<FileStatus> bucketFile = getBucketFile(bucket.get(), fs, path); if (bucketFile.isPresent()) { FileStatus file = bucketFile.get(); BlockLocation[] blockLocations = fs.getFileBlockLocations(file, 0, file.getLen()); boolean splittable = isSplittable(inputFormat, fs, file.getPath()); hiveSplitSource.addToQueue(createHiveSplits(partitionName, file, blockLocations, 0, file.getLen(), schema, partitionKeys, splittable, session)); continue; } } // Acquire semaphore so that we only have a fixed number of outstanding partitions being processed asynchronously // NOTE: there must not be any calls that throw in the space between acquiring the semaphore and setting the Future // callback to release it. Otherwise, we will need a try-finally block around this section. try { semaphore.acquire(); } catch (InterruptedException e) { Thread.currentThread().interrupt(); return; } ListenableFuture<Void> partitionFuture = createAsyncWalker(fs, suspendingExecutor).beginWalk(path, new FileStatusCallback() { @Override public void process(FileStatus file, BlockLocation[] blockLocations) { try { boolean splittable = isSplittable(inputFormat, file.getPath().getFileSystem(configuration), file.getPath()); hiveSplitSource.addToQueue(createHiveSplits(partitionName, file, blockLocations, 0, file.getLen(), schema, partitionKeys, splittable, session)); } catch (IOException e) { hiveSplitSource.fail(e); } } }); // release the semaphore when the partition finishes Futures.addCallback(partitionFuture, new FutureCallback<Void>() { @Override public void onSuccess(Void result) { semaphore.release(); } @Override public void onFailure(Throwable t) { semaphore.release(); } }); futureBuilder.add(partitionFuture); } // when all partitions finish, mark the queue as finished Futures.addCallback(Futures.allAsList(futureBuilder.build()), new FutureCallback<List<Void>>() { @Override public void onSuccess(List<Void> result) { hiveSplitSource.finished(); } @Override public void onFailure(Throwable t) { hiveSplitSource.fail(t); } }); } catch (Throwable e) { hiveSplitSource.fail(e); Throwables.propagateIfInstanceOf(e, Error.class); } }
From source file:com.facebook.presto.hive.HiveSplitSourceProvider.java
License:Apache License
private static Optional<FileStatus> getBucketFile(HiveBucket bucket, FileSystem fs, Path path) { FileStatus[] statuses = listStatus(fs, path); if (statuses.length != bucket.getBucketCount()) { return Optional.absent(); }//from w ww.j a v a 2 s . c om Map<String, FileStatus> map = new HashMap<>(); List<String> paths = new ArrayList<>(); for (FileStatus status : statuses) { if (!isFile(status)) { return Optional.absent(); } String pathString = status.getPath().toString(); map.put(pathString, status); paths.add(pathString); } // Hive sorts the paths as strings lexicographically Collections.sort(paths); String pathString = paths.get(bucket.getBucketNumber()); return Optional.of(map.get(pathString)); }
From source file:com.facebook.presto.hive.HiveSplitSourceProvider.java
License:Apache License
private List<HiveSplit> createHiveSplits(String partitionName, FileStatus file, BlockLocation[] blockLocations, long start, long length, Properties schema, List<HivePartitionKey> partitionKeys, boolean splittable, ConnectorSession session) throws IOException { ImmutableList.Builder<HiveSplit> builder = ImmutableList.builder(); if (splittable) { for (BlockLocation blockLocation : blockLocations) { // get the addresses for the block List<HostAddress> addresses = toHostAddress(blockLocation.getHosts()); long maxBytes = maxSplitSize.toBytes(); if (remainingInitialSplits > 0) { maxBytes = maxInitialSplitSize.toBytes(); }// www . j ava2s. c o m // divide the block into uniform chunks that are smaller than the max split size int chunks = Math.max(1, (int) (blockLocation.getLength() / maxBytes)); // when block does not divide evenly into chunks, make the chunk size slightly bigger than necessary long targetChunkSize = (long) Math.ceil(blockLocation.getLength() * 1.0 / chunks); long chunkOffset = 0; while (chunkOffset < blockLocation.getLength()) { // adjust the actual chunk size to account for the overrun when chunks are slightly bigger than necessary (see above) long chunkLength = Math.min(targetChunkSize, blockLocation.getLength() - chunkOffset); builder.add(new HiveSplit(connectorId, table.getDbName(), table.getTableName(), partitionName, file.getPath().toString(), blockLocation.getOffset() + chunkOffset, chunkLength, schema, partitionKeys, addresses, session)); chunkOffset += chunkLength; remainingInitialSplits--; } checkState(chunkOffset == blockLocation.getLength(), "Error splitting blocks"); } } else { // not splittable, use the hosts from the first block if it exists List<HostAddress> addresses = ImmutableList.of(); if (blockLocations.length > 0) { addresses = toHostAddress(blockLocations[0].getHosts()); } builder.add(new HiveSplit(connectorId, table.getDbName(), table.getTableName(), partitionName, file.getPath().toString(), start, length, schema, partitionKeys, addresses, session)); } return builder.build(); }
From source file:com.facebook.presto.hive.metastore.file.FileHiveMetastore.java
License:Apache License
private List<ArrayDeque<String>> listPartitions(Path director, List<Column> partitionColumns) { if (partitionColumns.isEmpty()) { return ImmutableList.of(); }// www . j a v a 2 s . c o m try { String directoryPrefix = partitionColumns.get(0).getName() + '='; List<ArrayDeque<String>> partitionValues = new ArrayList<>(); for (FileStatus fileStatus : metadataFileSystem.listStatus(director)) { if (!fileStatus.isDirectory()) { continue; } if (!fileStatus.getPath().getName().startsWith(directoryPrefix)) { continue; } List<ArrayDeque<String>> childPartitionValues; if (partitionColumns.size() == 1) { childPartitionValues = ImmutableList.of(new ArrayDeque<>()); } else { childPartitionValues = listPartitions(fileStatus.getPath(), partitionColumns.subList(1, partitionColumns.size())); } String value = fileStatus.getPath().getName().substring(directoryPrefix.length()); for (ArrayDeque<String> childPartition : childPartitionValues) { childPartition.addFirst(value); partitionValues.add(childPartition); } } return partitionValues; } catch (IOException e) { throw new PrestoException(HIVE_METASTORE_ERROR, "Error listing partition directories", e); } }
From source file:com.facebook.presto.hive.metastore.file.FileHiveMetastore.java
License:Apache License
private List<Path> getChildSchemaDirectories(Path metadataDirectory) { try {/*from w ww . j a v a2 s .c om*/ if (!metadataFileSystem.isDirectory(metadataDirectory)) { return ImmutableList.of(); } ImmutableList.Builder<Path> childSchemaDirectories = ImmutableList.builder(); for (FileStatus child : metadataFileSystem.listStatus(metadataDirectory)) { if (!child.isDirectory()) { continue; } Path childPath = child.getPath(); if (childPath.getName().startsWith(".")) { continue; } if (metadataFileSystem.isFile(new Path(childPath, PRESTO_SCHEMA_FILE_NAME))) { childSchemaDirectories.add(childPath); } } return childSchemaDirectories.build(); } catch (IOException e) { throw new PrestoException(HIVE_METASTORE_ERROR, e); } }
From source file:com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore.java
License:Apache License
private static RecursiveDeleteResult doRecursiveDeleteFiles(FileSystem fileSystem, Path directory, List<String> filePrefixes, boolean deleteEmptyDirectories) { FileStatus[] allFiles;/* w ww . jav a 2 s . co m*/ try { allFiles = fileSystem.listStatus(directory); } catch (IOException e) { ImmutableList.Builder<String> notDeletedItems = ImmutableList.builder(); notDeletedItems.add(directory.toString() + "/**"); return new RecursiveDeleteResult(false, notDeletedItems.build()); } boolean allDescendentsDeleted = true; ImmutableList.Builder<String> notDeletedEligibleItems = ImmutableList.builder(); for (FileStatus fileStatus : allFiles) { if (HadoopFileStatus.isFile(fileStatus)) { Path filePath = fileStatus.getPath(); String fileName = filePath.getName(); boolean eligible = false; for (String filePrefix : filePrefixes) { if (fileName.startsWith(filePrefix)) { eligible = true; break; } } if (eligible) { if (!deleteIfExists(fileSystem, filePath, false)) { allDescendentsDeleted = false; notDeletedEligibleItems.add(filePath.toString()); } } else { allDescendentsDeleted = false; } } else if (HadoopFileStatus.isDirectory(fileStatus)) { RecursiveDeleteResult subResult = doRecursiveDeleteFiles(fileSystem, fileStatus.getPath(), filePrefixes, deleteEmptyDirectories); if (!subResult.isDirectoryNoLongerExists()) { allDescendentsDeleted = false; } if (!subResult.getNotDeletedEligibleItems().isEmpty()) { notDeletedEligibleItems.addAll(subResult.getNotDeletedEligibleItems()); } } else { allDescendentsDeleted = false; notDeletedEligibleItems.add(fileStatus.getPath().toString()); } } if (allDescendentsDeleted && deleteEmptyDirectories) { verify(notDeletedEligibleItems.build().isEmpty()); if (!deleteIfExists(fileSystem, directory, false)) { return new RecursiveDeleteResult(false, ImmutableList.of(directory.toString() + "/")); } return new RecursiveDeleteResult(true, ImmutableList.of()); } return new RecursiveDeleteResult(false, notDeletedEligibleItems.build()); }
From source file:com.facebook.presto.hive.s3.PrestoS3FileSystem.java
License:Apache License
@Override public boolean rename(Path src, Path dst) throws IOException { boolean srcDirectory; try {// w w w . j a v a2s . c o m srcDirectory = directory(src); } catch (FileNotFoundException e) { return false; } try { if (!directory(dst)) { // cannot copy a file to an existing file return keysEqual(src, dst); } // move source under destination directory dst = new Path(dst, src.getName()); } catch (FileNotFoundException e) { // destination does not exist } if (keysEqual(src, dst)) { return true; } if (srcDirectory) { for (FileStatus file : listStatus(src)) { rename(file.getPath(), new Path(dst, file.getPath().getName())); } deleteObject(keyFromPath(src) + DIRECTORY_SUFFIX); } else { s3.copyObject(getBucketName(uri), keyFromPath(src), getBucketName(uri), keyFromPath(dst)); delete(src, true); } return true; }
From source file:com.facebook.presto.hive.s3.PrestoS3FileSystem.java
License:Apache License
@Override public boolean delete(Path path, boolean recursive) throws IOException { try {//from w ww . j a v a 2 s . c om if (!directory(path)) { return deleteObject(keyFromPath(path)); } } catch (FileNotFoundException e) { return false; } if (!recursive) { throw new IOException("Directory " + path + " is not empty"); } for (FileStatus file : listStatus(path)) { delete(file.getPath(), true); } deleteObject(keyFromPath(path) + DIRECTORY_SUFFIX); return true; }
From source file:com.flipkart.fdp.migration.distcp.codec.GenericHadoopCodec.java
License:Apache License
public List<FileTuple> getInputPaths(Collection<String> paths, Collection<String> excludeList) throws Exception { System.out.println("A total of " + paths.size() + " paths to scan..."); List<FileTuple> fileList = new ArrayList<FileTuple>(); List<String> inputPaths = new ArrayList<String>(); // Process regular expression based paths for (String path : paths) { System.out.println("Processing path: " + path); FileStatus[] stats = fs.globStatus(new Path(path)); if (stats == null || stats.length <= 0) continue; for (FileStatus fstat : stats) { if (fstat.isFile()) { fileList.add(new FileTuple(MirrorUtils.getSimplePath(fstat.getPath()), fstat.getLen(), fstat.getModificationTime())); } else { inputPaths.add(MirrorUtils.getSimplePath(fstat.getPath())); }//from w w w . jav a2 s. com } } if (inputPaths.size() > 0) { for (String path : inputPaths) { List<FileTuple> fstat = getFileStatusRecursive(new Path(path), excludeList); fileList.addAll(fstat); } } return fileList; }