List of usage examples for org.apache.hadoop.fs Path getFileSystem
public FileSystem getFileSystem(Configuration conf) throws IOException
From source file:com.facebook.presto.hive.HiveSplitSourceProvider.java
License:Apache License
private void loadPartitionSplits(final HiveSplitSource hiveSplitSource, SuspendingExecutor suspendingExecutor, final ConnectorSession session) { final Semaphore semaphore = new Semaphore(maxPartitionBatchSize); try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) { ImmutableList.Builder<ListenableFuture<Void>> futureBuilder = ImmutableList.builder(); Iterator<String> nameIterator = partitionNames.iterator(); for (Partition partition : partitions) { checkState(nameIterator.hasNext(), "different number of partitions and partition names!"); final String partitionName = nameIterator.next(); final Properties schema = getPartitionSchema(table, partition); final List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition); Path path = new Path(getPartitionLocation(table, partition)); final Configuration configuration = hdfsEnvironment.getConfiguration(path); final InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, false); FileSystem fs = path.getFileSystem(configuration); if (inputFormat instanceof SymlinkTextInputFormat) { JobConf jobConf = new JobConf(configuration); FileInputFormat.setInputPaths(jobConf, path); InputSplit[] splits = inputFormat.getSplits(jobConf, 0); for (InputSplit rawSplit : splits) { FileSplit split = ((SymlinkTextInputFormat.SymlinkTextInputSplit) rawSplit) .getTargetSplit(); // get the filesystem for the target path -- it may be a different hdfs instance FileSystem targetFilesystem = split.getPath().getFileSystem(configuration); FileStatus fileStatus = targetFilesystem.getFileStatus(split.getPath()); hiveSplitSource.addToQueue(createHiveSplits(partitionName, fileStatus, targetFilesystem.getFileBlockLocations(fileStatus, split.getStart(), split.getLength()), split.getStart(), split.getLength(), schema, partitionKeys, false, session)); }//w w w . j av a 2 s .c o m continue; } // TODO: this is currently serial across all partitions and should be done in suspendingExecutor if (bucket.isPresent()) { Optional<FileStatus> bucketFile = getBucketFile(bucket.get(), fs, path); if (bucketFile.isPresent()) { FileStatus file = bucketFile.get(); BlockLocation[] blockLocations = fs.getFileBlockLocations(file, 0, file.getLen()); boolean splittable = isSplittable(inputFormat, fs, file.getPath()); hiveSplitSource.addToQueue(createHiveSplits(partitionName, file, blockLocations, 0, file.getLen(), schema, partitionKeys, splittable, session)); continue; } } // Acquire semaphore so that we only have a fixed number of outstanding partitions being processed asynchronously // NOTE: there must not be any calls that throw in the space between acquiring the semaphore and setting the Future // callback to release it. Otherwise, we will need a try-finally block around this section. try { semaphore.acquire(); } catch (InterruptedException e) { Thread.currentThread().interrupt(); return; } ListenableFuture<Void> partitionFuture = createAsyncWalker(fs, suspendingExecutor).beginWalk(path, new FileStatusCallback() { @Override public void process(FileStatus file, BlockLocation[] blockLocations) { try { boolean splittable = isSplittable(inputFormat, file.getPath().getFileSystem(configuration), file.getPath()); hiveSplitSource.addToQueue(createHiveSplits(partitionName, file, blockLocations, 0, file.getLen(), schema, partitionKeys, splittable, session)); } catch (IOException e) { hiveSplitSource.fail(e); } } }); // release the semaphore when the partition finishes Futures.addCallback(partitionFuture, new FutureCallback<Void>() { @Override public void onSuccess(Void result) { semaphore.release(); } @Override public void onFailure(Throwable t) { semaphore.release(); } }); futureBuilder.add(partitionFuture); } // when all partitions finish, mark the queue as finished Futures.addCallback(Futures.allAsList(futureBuilder.build()), new FutureCallback<List<Void>>() { @Override public void onSuccess(List<Void> result) { hiveSplitSource.finished(); } @Override public void onFailure(Throwable t) { hiveSplitSource.fail(t); } }); } catch (Throwable e) { hiveSplitSource.fail(e); Throwables.propagateIfInstanceOf(e, Error.class); } }
From source file:com.facebook.presto.hive.orc.DwrfRecordCursorProvider.java
License:Apache License
@Override public Optional<HiveRecordCursor> createHiveRecordCursor(String clientId, Configuration configuration, ConnectorSession session, Path path, long start, long length, Properties schema, List<HiveColumnHandle> columns, List<HivePartitionKey> partitionKeys, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone hiveStorageTimeZone, TypeManager typeManager) {//from w w w.j a va2 s . c om if (!isDeserializerClass(schema, OrcSerde.class)) { return Optional.empty(); } StructObjectInspector rowInspector = getTableObjectInspector(schema); if (rowInspector.getAllStructFieldRefs().stream() .anyMatch(field -> hasDateType(field.getFieldObjectInspector()))) { throw new IllegalArgumentException("DWRF does not support DATE type"); } ReaderWriterProfiler.setProfilerOptions(configuration); RecordReader recordReader; try { FileSystem fileSystem = path.getFileSystem(configuration); Reader reader = OrcFile.createReader(fileSystem, path, new JobConf(configuration)); boolean[] include = findIncludedColumns(reader.getTypes(), columns); recordReader = reader.rows(start, length, include); } catch (Exception e) { throw Throwables.propagate(e); } return Optional.<HiveRecordCursor>of(new DwrfHiveRecordCursor(recordReader, length, schema, partitionKeys, columns, hiveStorageTimeZone, typeManager)); }
From source file:com.facebook.presto.hive.orc.OrcRecordCursorProvider.java
License:Apache License
@Override public Optional<HiveRecordCursor> createHiveRecordCursor(String clientId, Configuration configuration, ConnectorSession session, Path path, long start, long length, Properties schema, List<HiveColumnHandle> columns, List<HivePartitionKey> partitionKeys, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone hiveStorageTimeZone, TypeManager typeManager) {// ww w. j av a2 s . c o m if (!enabled) { return Optional.empty(); } if (!isDeserializerClass(schema, OrcSerde.class)) { return Optional.empty(); } RecordReader recordReader; try { FileSystem fileSystem = path.getFileSystem(configuration); Reader reader = OrcFile.createReader(fileSystem, path); boolean[] include = findIncludedColumns(reader.getTypes(), columns); recordReader = reader.rows(start, length, include); } catch (Exception e) { throw Throwables.propagate(e); } return Optional.<HiveRecordCursor>of(new OrcHiveRecordCursor(recordReader, length, schema, partitionKeys, columns, hiveStorageTimeZone, typeManager)); }
From source file:com.facebook.presto.hive.OrcRecordCursorProvider.java
License:Apache License
@Override public Optional<HiveRecordCursor> createHiveRecordCursor(String clientId, Configuration configuration, ConnectorSession session, Path path, long start, long length, Properties schema, List<HiveColumnHandle> columns, List<HivePartitionKey> partitionKeys, TupleDomain<HiveColumnHandle> tupleDomain, DateTimeZone hiveStorageTimeZone, TypeManager typeManager) { @SuppressWarnings("deprecation") Deserializer deserializer = getDeserializer(schema); if (!(deserializer instanceof OrcSerde)) { return Optional.absent(); }//from w w w. j a v a2s . c o m RecordReader recordReader; try { FileSystem fileSystem = path.getFileSystem(configuration); Reader reader = OrcFile.createReader(fileSystem, path); boolean[] include = findIncludedColumns(reader.getTypes(), columns); recordReader = reader.rows(start, length, include); } catch (Exception e) { throw Throwables.propagate(e); } return Optional.<HiveRecordCursor>of(new OrcHiveRecordCursor(recordReader, length, schema, partitionKeys, columns, hiveStorageTimeZone, DateTimeZone.forID(session.getTimeZoneKey().getId()), typeManager)); }
From source file:com.facebook.presto.hive.parquet.ParquetTester.java
License:Apache License
private static void assertFileContents(JobConf jobConf, ObjectInspector objectInspector, TempFile tempFile, Iterable<?> expectedValues, Type type) throws IOException, InterruptedException { Path path = new Path(tempFile.getFile().toURI()); FileSystem fileSystem = path.getFileSystem(jobConf); ParquetMetadata parquetMetadata = ParquetMetadataReader.readFooter(fileSystem, path); FileMetaData fileMetaData = parquetMetadata.getFileMetaData(); MessageType fileSchema = fileMetaData.getSchema(); long size = fileSystem.getFileStatus(path).getLen(); FSDataInputStream inputStream = fileSystem.open(path); ParquetDataSource dataSource = new HdfsParquetDataSource(path, size, inputStream); TypeManager typeManager = new TypeRegistry(); ParquetReader parquetReader = new ParquetReader(fileSchema, fileSchema, parquetMetadata.getBlocks(), dataSource, typeManager);/*w w w . j a v a 2 s . c o m*/ assertEquals(parquetReader.getPosition(), 0); int rowsProcessed = 0; Iterator<?> iterator = expectedValues.iterator(); for (int batchSize = parquetReader.nextBatch(); batchSize >= 0; batchSize = parquetReader.nextBatch()) { ColumnDescriptor columnDescriptor = fileSchema.getColumns().get(0); Block block = parquetReader.readPrimitive(columnDescriptor, type); for (int i = 0; i < batchSize; i++) { assertTrue(iterator.hasNext()); Object expected = iterator.next(); Object actual = decodeObject(type, block, i); assertEquals(actual, expected); } rowsProcessed += batchSize; assertEquals(parquetReader.getPosition(), rowsProcessed); } assertFalse(iterator.hasNext()); assertEquals(parquetReader.getPosition(), rowsProcessed); parquetReader.close(); }
From source file:com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.java
License:Apache License
private static Map<Integer, ParquetDictionaryDescriptor> getDictionariesByColumnOrdinal( BlockMetaData blockMetadata, Path path, Configuration configuration, MessageType requestedSchema, TupleDomain<HiveColumnHandle> effectivePredicate) { // todo should we call release? ParquetCodecFactory codecFactory = new ParquetCodecFactory(configuration); ImmutableMap.Builder<Integer, ParquetDictionaryDescriptor> dictionaries = ImmutableMap.builder(); for (int ordinal = 0; ordinal < blockMetadata.getColumns().size(); ordinal++) { ColumnChunkMetaData columnChunkMetaData = blockMetadata.getColumns().get(ordinal); for (int i = 0; i < requestedSchema.getColumns().size(); i++) { ColumnDescriptor columnDescriptor = requestedSchema.getColumns().get(i); if (isColumnPredicate(columnDescriptor, effectivePredicate) && columnChunkMetaData.getPath().equals(ColumnPath.get(columnDescriptor.getPath())) && isOnlyDictionaryEncodingPages(columnChunkMetaData.getEncodings())) { DictionaryPage dictionaryPage; try (FSDataInputStream inputStream = path.getFileSystem(configuration).open(path)) { inputStream.seek(columnChunkMetaData.getStartingPos()); int totalSize = Ints.checkedCast(columnChunkMetaData.getTotalSize()); byte[] buffer = new byte[totalSize]; inputStream.readFully(buffer); dictionaryPage = readDictionaryPage(buffer, codecFactory, columnChunkMetaData.getCodec()); dictionaries.put(ordinal, new ParquetDictionaryDescriptor(columnDescriptor, dictionaryPage)); } catch (IOException ignored) { }/*from www. ja v a 2 s. com*/ break; } } } return dictionaries.build(); }
From source file:com.facebook.presto.hive.parquet.reader.ParquetFileReader.java
License:Apache License
public ParquetFileReader(Configuration configuration, Path file, List<BlockMetaData> blocks, List<ColumnDescriptor> columns) throws IOException { this.file = file; this.inputStream = file.getFileSystem(configuration).open(file); this.blocks = blocks; if (!blocks.isEmpty()) { for (ColumnDescriptor columnDescriptor : columns) { for (ColumnChunkMetaData metadata : blocks.get(0).getColumns()) { if (metadata.getPath().equals(ColumnPath.get(columnDescriptor.getPath()))) { columnMetadata.put(columnDescriptor, metadata); }//from w ww. ja va 2s . com } } } this.codecFactory = new ParquetCodecFactory(configuration); }
From source file:com.facebook.presto.hive.ParquetRecordWriterUtil.java
License:Apache License
public static RecordWriter createParquetWriter(Path target, JobConf conf, Properties properties, boolean compress, ConnectorSession session) throws IOException, ReflectiveOperationException { conf.setLong(ParquetOutputFormat.BLOCK_SIZE, getParquetWriterBlockSize(session).toBytes()); conf.setLong(ParquetOutputFormat.PAGE_SIZE, getParquetWriterPageSize(session).toBytes()); RecordWriter recordWriter = new MapredParquetOutputFormat().getHiveRecordWriter(conf, target, Text.class, compress, properties, Reporter.NULL); Object realWriter = REAL_WRITER_FIELD.get(recordWriter); Object internalWriter = INTERNAL_WRITER_FIELD.get(realWriter); ParquetFileWriter fileWriter = (ParquetFileWriter) FILE_WRITER_FIELD.get(internalWriter); return new ExtendedRecordWriter() { private long length; @Override/*w w w .j av a2 s . c o m*/ public long getWrittenBytes() { return length; } @Override public void write(Writable value) throws IOException { recordWriter.write(value); length = fileWriter.getPos(); } @Override public void close(boolean abort) throws IOException { recordWriter.close(abort); if (!abort) { length = target.getFileSystem(conf).getFileStatus(target).getLen(); } } }; }
From source file:com.facebook.presto.hive.TestOrcPageSourceMemoryTracking.java
License:Apache License
public static FileSplit createTestFile(String filePath, HiveOutputFormat<?, ?> outputFormat, @SuppressWarnings("deprecation") SerDe serDe, String compressionCodec, List<TestColumn> testColumns, int numRows) throws Exception { // filter out partition keys, which are not written to the file testColumns = ImmutableList.copyOf(filter(testColumns, not(TestColumn::isPartitionKey))); JobConf jobConf = new JobConf(); ReaderWriterProfiler.setProfilerOptions(jobConf); Properties tableProperties = new Properties(); tableProperties.setProperty("columns", Joiner.on(',').join(transform(testColumns, TestColumn::getName))); tableProperties.setProperty("columns.types", Joiner.on(',').join(transform(testColumns, TestColumn::getType))); serDe.initialize(CONFIGURATION, tableProperties); if (compressionCodec != null) { CompressionCodec codec = new CompressionCodecFactory(CONFIGURATION).getCodecByName(compressionCodec); jobConf.set(COMPRESS_CODEC, codec.getClass().getName()); jobConf.set(COMPRESS_TYPE, SequenceFile.CompressionType.BLOCK.toString()); }//w w w . j a va 2 s .com RecordWriter recordWriter = createRecordWriter(new Path(filePath), CONFIGURATION); try { SettableStructObjectInspector objectInspector = getStandardStructObjectInspector( ImmutableList.copyOf(transform(testColumns, TestColumn::getName)), ImmutableList.copyOf(transform(testColumns, TestColumn::getObjectInspector))); Object row = objectInspector.create(); List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs()); for (int rowNumber = 0; rowNumber < numRows; rowNumber++) { for (int i = 0; i < testColumns.size(); i++) { Object writeValue = testColumns.get(i).getWriteValue(); if (writeValue instanceof Slice) { writeValue = ((Slice) writeValue).getBytes(); } objectInspector.setStructFieldData(row, fields.get(i), writeValue); } Writable record = serDe.serialize(row, objectInspector); recordWriter.write(record); if (rowNumber % STRIPE_ROWS == STRIPE_ROWS - 1) { flushStripe(recordWriter); } } } finally { recordWriter.close(false); } Path path = new Path(filePath); path.getFileSystem(CONFIGURATION).setVerifyChecksum(true); File file = new File(filePath); return new FileSplit(path, 0, file.length(), new String[0]); }
From source file:com.facebook.presto.raptor.storage.OrcRowSink.java
License:Apache License
private static RecordWriter createRecordWriter(Path target, JobConf conf) { try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(FileSystem.class.getClassLoader())) { FileSystem fileSystem = target.getFileSystem(conf); fileSystem.setWriteChecksum(false); OrcFile.WriterOptions options = OrcFile.writerOptions(conf).fileSystem(fileSystem).compress(SNAPPY); return WRITER_CONSTRUCTOR.newInstance(target, options); } catch (ReflectiveOperationException | IOException e) { throw new PrestoException(RAPTOR_ERROR, "Failed to create writer", e); }// w w w . j a v a 2 s. co m }