Example usage for org.apache.hadoop.fs Path getFileSystem

List of usage examples for org.apache.hadoop.fs Path getFileSystem

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getFileSystem.

Prototype

public FileSystem getFileSystem(Configuration conf) throws IOException 

Source Link

Document

Return the FileSystem that owns this Path.

Usage

From source file:com.facebook.presto.hive.HiveSplitSourceProvider.java

License:Apache License

private void loadPartitionSplits(final HiveSplitSource hiveSplitSource, SuspendingExecutor suspendingExecutor,
        final ConnectorSession session) {
    final Semaphore semaphore = new Semaphore(maxPartitionBatchSize);
    try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) {
        ImmutableList.Builder<ListenableFuture<Void>> futureBuilder = ImmutableList.builder();

        Iterator<String> nameIterator = partitionNames.iterator();
        for (Partition partition : partitions) {
            checkState(nameIterator.hasNext(), "different number of partitions and partition names!");
            final String partitionName = nameIterator.next();
            final Properties schema = getPartitionSchema(table, partition);
            final List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition);

            Path path = new Path(getPartitionLocation(table, partition));
            final Configuration configuration = hdfsEnvironment.getConfiguration(path);
            final InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, false);

            FileSystem fs = path.getFileSystem(configuration);

            if (inputFormat instanceof SymlinkTextInputFormat) {
                JobConf jobConf = new JobConf(configuration);
                FileInputFormat.setInputPaths(jobConf, path);
                InputSplit[] splits = inputFormat.getSplits(jobConf, 0);
                for (InputSplit rawSplit : splits) {
                    FileSplit split = ((SymlinkTextInputFormat.SymlinkTextInputSplit) rawSplit)
                            .getTargetSplit();

                    // get the filesystem for the target path -- it may be a different hdfs instance
                    FileSystem targetFilesystem = split.getPath().getFileSystem(configuration);
                    FileStatus fileStatus = targetFilesystem.getFileStatus(split.getPath());
                    hiveSplitSource.addToQueue(createHiveSplits(partitionName, fileStatus,
                            targetFilesystem.getFileBlockLocations(fileStatus, split.getStart(),
                                    split.getLength()),
                            split.getStart(), split.getLength(), schema, partitionKeys, false, session));
                }//w w w  .  j  av a 2 s  .c  o m
                continue;
            }

            // TODO: this is currently serial across all partitions and should be done in suspendingExecutor
            if (bucket.isPresent()) {
                Optional<FileStatus> bucketFile = getBucketFile(bucket.get(), fs, path);
                if (bucketFile.isPresent()) {
                    FileStatus file = bucketFile.get();
                    BlockLocation[] blockLocations = fs.getFileBlockLocations(file, 0, file.getLen());
                    boolean splittable = isSplittable(inputFormat, fs, file.getPath());

                    hiveSplitSource.addToQueue(createHiveSplits(partitionName, file, blockLocations, 0,
                            file.getLen(), schema, partitionKeys, splittable, session));
                    continue;
                }
            }

            // Acquire semaphore so that we only have a fixed number of outstanding partitions being processed asynchronously
            // NOTE: there must not be any calls that throw in the space between acquiring the semaphore and setting the Future
            // callback to release it. Otherwise, we will need a try-finally block around this section.
            try {
                semaphore.acquire();
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
                return;
            }

            ListenableFuture<Void> partitionFuture = createAsyncWalker(fs, suspendingExecutor).beginWalk(path,
                    new FileStatusCallback() {
                        @Override
                        public void process(FileStatus file, BlockLocation[] blockLocations) {
                            try {
                                boolean splittable = isSplittable(inputFormat,
                                        file.getPath().getFileSystem(configuration), file.getPath());

                                hiveSplitSource.addToQueue(createHiveSplits(partitionName, file, blockLocations,
                                        0, file.getLen(), schema, partitionKeys, splittable, session));
                            } catch (IOException e) {
                                hiveSplitSource.fail(e);
                            }
                        }
                    });

            // release the semaphore when the partition finishes
            Futures.addCallback(partitionFuture, new FutureCallback<Void>() {
                @Override
                public void onSuccess(Void result) {
                    semaphore.release();
                }

                @Override
                public void onFailure(Throwable t) {
                    semaphore.release();
                }
            });

            futureBuilder.add(partitionFuture);
        }

        // when all partitions finish, mark the queue as finished
        Futures.addCallback(Futures.allAsList(futureBuilder.build()), new FutureCallback<List<Void>>() {
            @Override
            public void onSuccess(List<Void> result) {
                hiveSplitSource.finished();
            }

            @Override
            public void onFailure(Throwable t) {
                hiveSplitSource.fail(t);
            }
        });
    } catch (Throwable e) {
        hiveSplitSource.fail(e);
        Throwables.propagateIfInstanceOf(e, Error.class);
    }
}

From source file:com.facebook.presto.hive.orc.DwrfRecordCursorProvider.java

License:Apache License

@Override
public Optional<HiveRecordCursor> createHiveRecordCursor(String clientId, Configuration configuration,
        ConnectorSession session, Path path, long start, long length, Properties schema,
        List<HiveColumnHandle> columns, List<HivePartitionKey> partitionKeys,
        TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone hiveStorageTimeZone,
        TypeManager typeManager) {//from   w  w w.j a va2  s  .  c om
    if (!isDeserializerClass(schema, OrcSerde.class)) {
        return Optional.empty();
    }

    StructObjectInspector rowInspector = getTableObjectInspector(schema);
    if (rowInspector.getAllStructFieldRefs().stream()
            .anyMatch(field -> hasDateType(field.getFieldObjectInspector()))) {
        throw new IllegalArgumentException("DWRF does not support DATE type");
    }

    ReaderWriterProfiler.setProfilerOptions(configuration);

    RecordReader recordReader;
    try {
        FileSystem fileSystem = path.getFileSystem(configuration);
        Reader reader = OrcFile.createReader(fileSystem, path, new JobConf(configuration));
        boolean[] include = findIncludedColumns(reader.getTypes(), columns);
        recordReader = reader.rows(start, length, include);
    } catch (Exception e) {
        throw Throwables.propagate(e);
    }

    return Optional.<HiveRecordCursor>of(new DwrfHiveRecordCursor(recordReader, length, schema, partitionKeys,
            columns, hiveStorageTimeZone, typeManager));
}

From source file:com.facebook.presto.hive.orc.OrcRecordCursorProvider.java

License:Apache License

@Override
public Optional<HiveRecordCursor> createHiveRecordCursor(String clientId, Configuration configuration,
        ConnectorSession session, Path path, long start, long length, Properties schema,
        List<HiveColumnHandle> columns, List<HivePartitionKey> partitionKeys,
        TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone hiveStorageTimeZone,
        TypeManager typeManager) {//  ww w. j  av a2  s  . c  o m
    if (!enabled) {
        return Optional.empty();
    }

    if (!isDeserializerClass(schema, OrcSerde.class)) {
        return Optional.empty();
    }

    RecordReader recordReader;
    try {
        FileSystem fileSystem = path.getFileSystem(configuration);
        Reader reader = OrcFile.createReader(fileSystem, path);
        boolean[] include = findIncludedColumns(reader.getTypes(), columns);
        recordReader = reader.rows(start, length, include);
    } catch (Exception e) {
        throw Throwables.propagate(e);
    }

    return Optional.<HiveRecordCursor>of(new OrcHiveRecordCursor(recordReader, length, schema, partitionKeys,
            columns, hiveStorageTimeZone, typeManager));
}

From source file:com.facebook.presto.hive.OrcRecordCursorProvider.java

License:Apache License

@Override
public Optional<HiveRecordCursor> createHiveRecordCursor(String clientId, Configuration configuration,
        ConnectorSession session, Path path, long start, long length, Properties schema,
        List<HiveColumnHandle> columns, List<HivePartitionKey> partitionKeys,
        TupleDomain<HiveColumnHandle> tupleDomain, DateTimeZone hiveStorageTimeZone, TypeManager typeManager) {
    @SuppressWarnings("deprecation")
    Deserializer deserializer = getDeserializer(schema);
    if (!(deserializer instanceof OrcSerde)) {
        return Optional.absent();
    }//from  w  w  w.  j a  v  a2s . c  o  m

    RecordReader recordReader;
    try {
        FileSystem fileSystem = path.getFileSystem(configuration);
        Reader reader = OrcFile.createReader(fileSystem, path);
        boolean[] include = findIncludedColumns(reader.getTypes(), columns);
        recordReader = reader.rows(start, length, include);
    } catch (Exception e) {
        throw Throwables.propagate(e);
    }

    return Optional.<HiveRecordCursor>of(new OrcHiveRecordCursor(recordReader, length, schema, partitionKeys,
            columns, hiveStorageTimeZone, DateTimeZone.forID(session.getTimeZoneKey().getId()), typeManager));
}

From source file:com.facebook.presto.hive.parquet.ParquetTester.java

License:Apache License

private static void assertFileContents(JobConf jobConf, ObjectInspector objectInspector, TempFile tempFile,
        Iterable<?> expectedValues, Type type) throws IOException, InterruptedException {
    Path path = new Path(tempFile.getFile().toURI());
    FileSystem fileSystem = path.getFileSystem(jobConf);
    ParquetMetadata parquetMetadata = ParquetMetadataReader.readFooter(fileSystem, path);
    FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
    MessageType fileSchema = fileMetaData.getSchema();

    long size = fileSystem.getFileStatus(path).getLen();
    FSDataInputStream inputStream = fileSystem.open(path);
    ParquetDataSource dataSource = new HdfsParquetDataSource(path, size, inputStream);

    TypeManager typeManager = new TypeRegistry();
    ParquetReader parquetReader = new ParquetReader(fileSchema, fileSchema, parquetMetadata.getBlocks(),
            dataSource, typeManager);/*w w  w .  j a  v  a  2 s  . c  o  m*/
    assertEquals(parquetReader.getPosition(), 0);

    int rowsProcessed = 0;
    Iterator<?> iterator = expectedValues.iterator();
    for (int batchSize = parquetReader.nextBatch(); batchSize >= 0; batchSize = parquetReader.nextBatch()) {
        ColumnDescriptor columnDescriptor = fileSchema.getColumns().get(0);
        Block block = parquetReader.readPrimitive(columnDescriptor, type);
        for (int i = 0; i < batchSize; i++) {
            assertTrue(iterator.hasNext());
            Object expected = iterator.next();
            Object actual = decodeObject(type, block, i);
            assertEquals(actual, expected);
        }
        rowsProcessed += batchSize;
        assertEquals(parquetReader.getPosition(), rowsProcessed);
    }
    assertFalse(iterator.hasNext());

    assertEquals(parquetReader.getPosition(), rowsProcessed);
    parquetReader.close();
}

From source file:com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.java

License:Apache License

private static Map<Integer, ParquetDictionaryDescriptor> getDictionariesByColumnOrdinal(
        BlockMetaData blockMetadata, Path path, Configuration configuration, MessageType requestedSchema,
        TupleDomain<HiveColumnHandle> effectivePredicate) {
    // todo should we call release?
    ParquetCodecFactory codecFactory = new ParquetCodecFactory(configuration);

    ImmutableMap.Builder<Integer, ParquetDictionaryDescriptor> dictionaries = ImmutableMap.builder();
    for (int ordinal = 0; ordinal < blockMetadata.getColumns().size(); ordinal++) {
        ColumnChunkMetaData columnChunkMetaData = blockMetadata.getColumns().get(ordinal);

        for (int i = 0; i < requestedSchema.getColumns().size(); i++) {
            ColumnDescriptor columnDescriptor = requestedSchema.getColumns().get(i);
            if (isColumnPredicate(columnDescriptor, effectivePredicate)
                    && columnChunkMetaData.getPath().equals(ColumnPath.get(columnDescriptor.getPath()))
                    && isOnlyDictionaryEncodingPages(columnChunkMetaData.getEncodings())) {
                DictionaryPage dictionaryPage;
                try (FSDataInputStream inputStream = path.getFileSystem(configuration).open(path)) {
                    inputStream.seek(columnChunkMetaData.getStartingPos());

                    int totalSize = Ints.checkedCast(columnChunkMetaData.getTotalSize());
                    byte[] buffer = new byte[totalSize];
                    inputStream.readFully(buffer);

                    dictionaryPage = readDictionaryPage(buffer, codecFactory, columnChunkMetaData.getCodec());
                    dictionaries.put(ordinal,
                            new ParquetDictionaryDescriptor(columnDescriptor, dictionaryPage));
                } catch (IOException ignored) {
                }/*from  www. ja  v a 2 s.  com*/
                break;
            }
        }
    }
    return dictionaries.build();
}

From source file:com.facebook.presto.hive.parquet.reader.ParquetFileReader.java

License:Apache License

public ParquetFileReader(Configuration configuration, Path file, List<BlockMetaData> blocks,
        List<ColumnDescriptor> columns) throws IOException {
    this.file = file;
    this.inputStream = file.getFileSystem(configuration).open(file);
    this.blocks = blocks;
    if (!blocks.isEmpty()) {
        for (ColumnDescriptor columnDescriptor : columns) {
            for (ColumnChunkMetaData metadata : blocks.get(0).getColumns()) {
                if (metadata.getPath().equals(ColumnPath.get(columnDescriptor.getPath()))) {
                    columnMetadata.put(columnDescriptor, metadata);
                }//from w  ww. ja va  2s . com
            }
        }
    }
    this.codecFactory = new ParquetCodecFactory(configuration);
}

From source file:com.facebook.presto.hive.ParquetRecordWriterUtil.java

License:Apache License

public static RecordWriter createParquetWriter(Path target, JobConf conf, Properties properties,
        boolean compress, ConnectorSession session) throws IOException, ReflectiveOperationException {
    conf.setLong(ParquetOutputFormat.BLOCK_SIZE, getParquetWriterBlockSize(session).toBytes());
    conf.setLong(ParquetOutputFormat.PAGE_SIZE, getParquetWriterPageSize(session).toBytes());

    RecordWriter recordWriter = new MapredParquetOutputFormat().getHiveRecordWriter(conf, target, Text.class,
            compress, properties, Reporter.NULL);

    Object realWriter = REAL_WRITER_FIELD.get(recordWriter);
    Object internalWriter = INTERNAL_WRITER_FIELD.get(realWriter);
    ParquetFileWriter fileWriter = (ParquetFileWriter) FILE_WRITER_FIELD.get(internalWriter);

    return new ExtendedRecordWriter() {
        private long length;

        @Override/*w w  w .j av  a2 s .  c o m*/
        public long getWrittenBytes() {
            return length;
        }

        @Override
        public void write(Writable value) throws IOException {
            recordWriter.write(value);
            length = fileWriter.getPos();
        }

        @Override
        public void close(boolean abort) throws IOException {
            recordWriter.close(abort);
            if (!abort) {
                length = target.getFileSystem(conf).getFileStatus(target).getLen();
            }
        }
    };
}

From source file:com.facebook.presto.hive.TestOrcPageSourceMemoryTracking.java

License:Apache License

public static FileSplit createTestFile(String filePath, HiveOutputFormat<?, ?> outputFormat,
        @SuppressWarnings("deprecation") SerDe serDe, String compressionCodec, List<TestColumn> testColumns,
        int numRows) throws Exception {
    // filter out partition keys, which are not written to the file
    testColumns = ImmutableList.copyOf(filter(testColumns, not(TestColumn::isPartitionKey)));

    JobConf jobConf = new JobConf();
    ReaderWriterProfiler.setProfilerOptions(jobConf);

    Properties tableProperties = new Properties();
    tableProperties.setProperty("columns", Joiner.on(',').join(transform(testColumns, TestColumn::getName)));
    tableProperties.setProperty("columns.types",
            Joiner.on(',').join(transform(testColumns, TestColumn::getType)));
    serDe.initialize(CONFIGURATION, tableProperties);

    if (compressionCodec != null) {
        CompressionCodec codec = new CompressionCodecFactory(CONFIGURATION).getCodecByName(compressionCodec);
        jobConf.set(COMPRESS_CODEC, codec.getClass().getName());
        jobConf.set(COMPRESS_TYPE, SequenceFile.CompressionType.BLOCK.toString());
    }//w  w w . j  a  va  2 s .com

    RecordWriter recordWriter = createRecordWriter(new Path(filePath), CONFIGURATION);

    try {
        SettableStructObjectInspector objectInspector = getStandardStructObjectInspector(
                ImmutableList.copyOf(transform(testColumns, TestColumn::getName)),
                ImmutableList.copyOf(transform(testColumns, TestColumn::getObjectInspector)));

        Object row = objectInspector.create();

        List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());

        for (int rowNumber = 0; rowNumber < numRows; rowNumber++) {
            for (int i = 0; i < testColumns.size(); i++) {
                Object writeValue = testColumns.get(i).getWriteValue();
                if (writeValue instanceof Slice) {
                    writeValue = ((Slice) writeValue).getBytes();
                }
                objectInspector.setStructFieldData(row, fields.get(i), writeValue);
            }

            Writable record = serDe.serialize(row, objectInspector);
            recordWriter.write(record);
            if (rowNumber % STRIPE_ROWS == STRIPE_ROWS - 1) {
                flushStripe(recordWriter);
            }
        }
    } finally {
        recordWriter.close(false);
    }

    Path path = new Path(filePath);
    path.getFileSystem(CONFIGURATION).setVerifyChecksum(true);
    File file = new File(filePath);
    return new FileSplit(path, 0, file.length(), new String[0]);
}

From source file:com.facebook.presto.raptor.storage.OrcRowSink.java

License:Apache License

private static RecordWriter createRecordWriter(Path target, JobConf conf) {
    try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(FileSystem.class.getClassLoader())) {
        FileSystem fileSystem = target.getFileSystem(conf);
        fileSystem.setWriteChecksum(false);
        OrcFile.WriterOptions options = OrcFile.writerOptions(conf).fileSystem(fileSystem).compress(SNAPPY);
        return WRITER_CONSTRUCTOR.newInstance(target, options);
    } catch (ReflectiveOperationException | IOException e) {
        throw new PrestoException(RAPTOR_ERROR, "Failed to create writer", e);
    }// w w w  .  j a v  a 2  s. co m
}