List of usage examples for org.apache.hadoop.fs Path getFileSystem
public FileSystem getFileSystem(Configuration conf) throws IOException
From source file:com.facebook.hiveio.output.HiveApiOutputCommitter.java
License:Apache License
/** * Write success file to Hadoop if required * * @param conf Configuration// ww w . ja v a2 s . c o m * @throws IOException I/O errors */ private static void writeSuccessFile(Configuration conf) throws IOException { if (!HadoopUtils.needSuccessMarker(conf)) { return; } Path outputPath = HadoopUtils.getOutputPath(conf); FileSystem fs = outputPath.getFileSystem(conf); if (fs.exists(outputPath)) { Path successPath = new Path(outputPath, "_SUCCESS"); if (!fs.exists(successPath)) { LOG.info("Writing success file to {}", successPath); fs.create(successPath).close(); } } }
From source file:com.facebook.hiveio.output.HiveApiOutputCommitter.java
License:Apache License
/** * Table has no partitions, just copy data * * @param conf Configuration/*from w w w .ja v a 2 s. co m*/ * @param outputInfo OutputInfo * @throws IOException I/O errors */ private void noPartitionsCopyData(Configuration conf, OutputInfo outputInfo) throws IOException { Preconditions.checkArgument(!outputInfo.hasPartitionInfo()); Path tablePath = new Path(outputInfo.getTableRoot()); Path writePath = new Path(outputInfo.getPartitionPath()); FileSystem tableFs = tablePath.getFileSystem(conf); FileSystem writePathFs = writePath.getFileSystem(conf); if (!tableFs.getUri().equals(writePathFs.getUri())) { LOG.error("Table's root path fs {} is not on same as its partition path fs {}", tableFs.getUri(), writePathFs.getUri()); throw new IllegalStateException("Table's root path fs " + tableFs.getUri() + " is not on same as its partition path fs " + writePathFs.getUri()); } LOG.info("No partitions, copying data from {} to {}", writePath, tablePath); FileSystems.move(tableFs, writePath, writePath, tablePath); tableFs.delete(writePath, true); }
From source file:com.facebook.hiveio.output.HiveApiOutputFormat.java
License:Apache License
/** * Check if the given table is empty, that is has no files * @param conf Configuration to use//from w w w. jav a 2 s . c o m * @param description HiveOutputDescription * @param oti OutputInfo * @throws IOException Hadoop Filesystem issues */ private void checkTableIsEmpty(Configuration conf, HiveOutputDescription description, OutputInfo oti) throws IOException { Path tablePath = new Path(oti.getTableRoot()); FileSystem fs = tablePath.getFileSystem(conf); if (fs.exists(tablePath) && FileSystems.dirHasNonHiddenFiles(fs, tablePath)) { throw new IOException("Table " + description.getTableDesc().getTableName() + " has existing data"); } }
From source file:com.facebook.presto.hive.AbstractTestHiveClientS3.java
License:Apache License
@Test public void testGetFileStatus() throws Exception { Path basePath = new Path("s3://presto-test-hive/"); Path tablePath = new Path(basePath, "presto_test_s3"); Path filePath = new Path(tablePath, "test1.csv"); FileSystem fs = basePath.getFileSystem(hdfsEnvironment.getConfiguration(basePath)); assertTrue(isDirectory(fs.getFileStatus(basePath))); assertTrue(isDirectory(fs.getFileStatus(tablePath))); assertFalse(isDirectory(fs.getFileStatus(filePath))); assertFalse(fs.exists(new Path(basePath, "foo"))); }
From source file:com.facebook.presto.hive.AbstractTestHiveFileFormats.java
License:Apache License
public FileSplit createTestFile(String filePath, HiveOutputFormat<?, ?> outputFormat, @SuppressWarnings("deprecation") SerDe serDe, String compressionCodec) throws Exception { JobConf jobConf = new JobConf(); Properties tableProperties = new Properties(); tableProperties.setProperty("columns", COLUMN_NAMES_STRING); tableProperties.setProperty("columns.types", COLUMN_TYPES); serDe.initialize(new Configuration(), tableProperties); if (compressionCodec != null) { CompressionCodec codec = new CompressionCodecFactory(new Configuration()) .getCodecByName(compressionCodec); jobConf.set(COMPRESS_CODEC, codec.getClass().getName()); jobConf.set(COMPRESS_TYPE, SequenceFile.CompressionType.BLOCK.toString()); }//w w w. j a va2 s .com RecordWriter recordWriter = outputFormat.getHiveRecordWriter(jobConf, new Path(filePath), Text.class, compressionCodec != null, tableProperties, new Progressable() { @Override public void progress() { } }); try { serDe.initialize(new Configuration(), tableProperties); SettableStructObjectInspector objectInspector = getStandardStructObjectInspector(COLUMN_NAMES, FIELD_INSPECTORS); Object row = objectInspector.create(); List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs()); for (int rowNumber = 0; rowNumber < NUM_ROWS; rowNumber++) { for (int i = 0; i < TEST_VALUES.size(); i++) { Object key = TEST_VALUES.get(i).getKey(); if (key instanceof Slice) { key = ((Slice) key).getBytes(); } objectInspector.setStructFieldData(row, fields.get(i), key); } Writable record = serDe.serialize(row, objectInspector); recordWriter.write(record); } } finally { recordWriter.close(false); } Path path = new Path(filePath); path.getFileSystem(new Configuration()).setVerifyChecksum(true); File file = new File(filePath); return new FileSplit(path, 0, file.length(), new String[0]); }
From source file:com.facebook.presto.hive.BenchmarkHiveFileFormats.java
License:Apache License
private static FileSplit createFileSplit(File file) { try {/*w ww . ja v a 2 s. c om*/ Path lineitemPath = new Path(file.toURI()); lineitemPath.getFileSystem(new Configuration()).setVerifyChecksum(false); return new FileSplit(lineitemPath, 0, file.length(), new String[0]); } catch (IOException e) { throw Throwables.propagate(e); } }
From source file:com.facebook.presto.hive.DwrfRecordCursorProvider.java
License:Apache License
@Override public Optional<HiveRecordCursor> createHiveRecordCursor(String clientId, Configuration configuration, ConnectorSession session, Path path, long start, long length, Properties schema, List<HiveColumnHandle> columns, List<HivePartitionKey> partitionKeys, TupleDomain<HiveColumnHandle> tupleDomain, DateTimeZone hiveStorageTimeZone, TypeManager typeManager) { @SuppressWarnings("deprecation") Deserializer deserializer = getDeserializer(schema); if (!(deserializer instanceof OrcSerde)) { return Optional.absent(); }//from www .ja v a 2s. c o m StructObjectInspector rowInspector = getTableObjectInspector(schema); if (!all(rowInspector.getAllStructFieldRefs(), isSupportedDwrfType())) { throw new IllegalArgumentException("DWRF does not support DATE type"); } ReaderWriterProfiler.setProfilerOptions(configuration); RecordReader recordReader; try { FileSystem fileSystem = path.getFileSystem(configuration); Reader reader = OrcFile.createReader(fileSystem, path, new JobConf(configuration)); boolean[] include = findIncludedColumns(reader.getTypes(), columns); recordReader = reader.rows(start, length, include); } catch (Exception e) { throw Throwables.propagate(e); } return Optional.<HiveRecordCursor>of(new DwrfHiveRecordCursor(recordReader, length, schema, partitionKeys, columns, hiveStorageTimeZone, DateTimeZone.forID(session.getTimeZoneKey().getId()), typeManager)); }
From source file:com.facebook.presto.hive.FileSystemCache.java
License:Apache License
private static Callable<FileSystem> createFileSystemFromPath(final Path path, final Configuration conf) { return new Callable<FileSystem>() { @Override/* w w w .j a v a 2 s .co m*/ public FileSystem call() throws Exception { return path.getFileSystem(conf); } }; }
From source file:com.facebook.presto.hive.HiveClient.java
License:Apache License
private FileSystem getFileSystem(Path path) throws IOException { return path.getFileSystem(hdfsEnvironment.getConfiguration(path)); }
From source file:com.facebook.presto.hive.HiveSplitIterable.java
License:Apache License
private void loadPartitionSplits(final HiveSplitQueue hiveSplitQueue, SuspendingExecutor suspendingExecutor) throws InterruptedException { final Semaphore semaphore = new Semaphore(maxPartitionBatchSize); try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) { ImmutableList.Builder<ListenableFuture<Void>> futureBuilder = ImmutableList.builder(); Iterator<String> nameIterator = partitionNames.iterator(); for (Partition partition : partitions) { checkState(nameIterator.hasNext(), "different number of partitions and partition names!"); semaphore.acquire();// w w w . j a v a 2 s .c o m final String partitionName = nameIterator.next(); final Properties schema = getPartitionSchema(table, partition); final List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition); Path path = new Path(getPartitionLocation(table, partition)); final Configuration configuration = hdfsEnvironment.getConfiguration(path); final InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, false); Path partitionPath = hdfsEnvironment.getFileSystemWrapper().wrap(path); FileSystem fs = partitionPath.getFileSystem(configuration); final LastSplitMarkingQueue markerQueue = new LastSplitMarkingQueue(hiveSplitQueue); if (inputFormat instanceof SymlinkTextInputFormat) { JobConf jobConf = new JobConf(configuration); FileInputFormat.setInputPaths(jobConf, partitionPath); InputSplit[] splits = inputFormat.getSplits(jobConf, 0); for (InputSplit rawSplit : splits) { FileSplit split = ((SymlinkTextInputFormat.SymlinkTextInputSplit) rawSplit) .getTargetSplit(); // get the filesystem for the target path -- it may be a different hdfs instance FileSystem targetFilesystem = split.getPath().getFileSystem(configuration); FileStatus fileStatus = targetFilesystem.getFileStatus(split.getPath()); markerQueue.addToQueue(createHiveSplits(partitionName, fileStatus, targetFilesystem.getFileBlockLocations(fileStatus, split.getStart(), split.getLength()), split.getStart(), split.getLength(), schema, partitionKeys, false)); } markerQueue.finish(); continue; } ListenableFuture<Void> partitionFuture = new AsyncRecursiveWalker(fs, suspendingExecutor) .beginWalk(partitionPath, new FileStatusCallback() { @Override public void process(FileStatus file, BlockLocation[] blockLocations) { if (bucket.isPresent() && !fileMatchesBucket(file.getPath().getName(), bucket.get())) { return; } try { boolean splittable = isSplittable(inputFormat, file.getPath().getFileSystem(configuration), file.getPath()); markerQueue.addToQueue(createHiveSplits(partitionName, file, blockLocations, 0, file.getLen(), schema, partitionKeys, splittable)); } catch (IOException e) { hiveSplitQueue.fail(e); } } }); // release the semaphore when the partition finishes Futures.addCallback(partitionFuture, new FutureCallback<Void>() { @Override public void onSuccess(Void result) { markerQueue.finish(); semaphore.release(); } @Override public void onFailure(Throwable t) { markerQueue.finish(); semaphore.release(); } }); futureBuilder.add(partitionFuture); } // when all partitions finish, mark the queue as finished Futures.addCallback(Futures.allAsList(futureBuilder.build()), new FutureCallback<List<Void>>() { @Override public void onSuccess(List<Void> result) { hiveSplitQueue.finished(); } @Override public void onFailure(Throwable t) { hiveSplitQueue.fail(t); } }); } catch (Throwable e) { hiveSplitQueue.fail(e); Throwables.propagateIfInstanceOf(e, Error.class); } }