Example usage for org.apache.hadoop.fs Path getFileSystem

List of usage examples for org.apache.hadoop.fs Path getFileSystem

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getFileSystem.

Prototype

public FileSystem getFileSystem(Configuration conf) throws IOException 

Source Link

Document

Return the FileSystem that owns this Path.

Usage

From source file:com.facebook.hiveio.output.HiveApiOutputCommitter.java

License:Apache License

/**
 * Write success file to Hadoop if required
 *
 * @param conf Configuration// ww w .  ja  v  a2  s  .  c  o m
 * @throws IOException I/O errors
 */
private static void writeSuccessFile(Configuration conf) throws IOException {
    if (!HadoopUtils.needSuccessMarker(conf)) {
        return;
    }
    Path outputPath = HadoopUtils.getOutputPath(conf);
    FileSystem fs = outputPath.getFileSystem(conf);
    if (fs.exists(outputPath)) {
        Path successPath = new Path(outputPath, "_SUCCESS");
        if (!fs.exists(successPath)) {
            LOG.info("Writing success file to {}", successPath);
            fs.create(successPath).close();
        }
    }
}

From source file:com.facebook.hiveio.output.HiveApiOutputCommitter.java

License:Apache License

/**
 * Table has no partitions, just copy data
 *
 * @param conf Configuration/*from  w w w  .ja  v a  2 s. co  m*/
 * @param outputInfo OutputInfo
 * @throws IOException I/O errors
 */
private void noPartitionsCopyData(Configuration conf, OutputInfo outputInfo) throws IOException {
    Preconditions.checkArgument(!outputInfo.hasPartitionInfo());
    Path tablePath = new Path(outputInfo.getTableRoot());
    Path writePath = new Path(outputInfo.getPartitionPath());
    FileSystem tableFs = tablePath.getFileSystem(conf);
    FileSystem writePathFs = writePath.getFileSystem(conf);
    if (!tableFs.getUri().equals(writePathFs.getUri())) {
        LOG.error("Table's root path fs {} is not on same as its partition path fs {}", tableFs.getUri(),
                writePathFs.getUri());
        throw new IllegalStateException("Table's root path fs " + tableFs.getUri()
                + " is not on same as its partition path fs " + writePathFs.getUri());
    }
    LOG.info("No partitions, copying data from {} to {}", writePath, tablePath);
    FileSystems.move(tableFs, writePath, writePath, tablePath);
    tableFs.delete(writePath, true);
}

From source file:com.facebook.hiveio.output.HiveApiOutputFormat.java

License:Apache License

/**
 * Check if the given table is empty, that is has no files
 * @param conf Configuration to use//from w w  w. jav a  2 s .  c  o m
 * @param description HiveOutputDescription
 * @param oti OutputInfo
 * @throws IOException Hadoop Filesystem issues
 */
private void checkTableIsEmpty(Configuration conf, HiveOutputDescription description, OutputInfo oti)
        throws IOException {
    Path tablePath = new Path(oti.getTableRoot());
    FileSystem fs = tablePath.getFileSystem(conf);

    if (fs.exists(tablePath) && FileSystems.dirHasNonHiddenFiles(fs, tablePath)) {
        throw new IOException("Table " + description.getTableDesc().getTableName() + " has existing data");
    }
}

From source file:com.facebook.presto.hive.AbstractTestHiveClientS3.java

License:Apache License

@Test
public void testGetFileStatus() throws Exception {
    Path basePath = new Path("s3://presto-test-hive/");
    Path tablePath = new Path(basePath, "presto_test_s3");
    Path filePath = new Path(tablePath, "test1.csv");
    FileSystem fs = basePath.getFileSystem(hdfsEnvironment.getConfiguration(basePath));

    assertTrue(isDirectory(fs.getFileStatus(basePath)));
    assertTrue(isDirectory(fs.getFileStatus(tablePath)));
    assertFalse(isDirectory(fs.getFileStatus(filePath)));
    assertFalse(fs.exists(new Path(basePath, "foo")));
}

From source file:com.facebook.presto.hive.AbstractTestHiveFileFormats.java

License:Apache License

public FileSplit createTestFile(String filePath, HiveOutputFormat<?, ?> outputFormat,
        @SuppressWarnings("deprecation") SerDe serDe, String compressionCodec) throws Exception {
    JobConf jobConf = new JobConf();
    Properties tableProperties = new Properties();
    tableProperties.setProperty("columns", COLUMN_NAMES_STRING);
    tableProperties.setProperty("columns.types", COLUMN_TYPES);
    serDe.initialize(new Configuration(), tableProperties);

    if (compressionCodec != null) {
        CompressionCodec codec = new CompressionCodecFactory(new Configuration())
                .getCodecByName(compressionCodec);
        jobConf.set(COMPRESS_CODEC, codec.getClass().getName());
        jobConf.set(COMPRESS_TYPE, SequenceFile.CompressionType.BLOCK.toString());
    }//w  w  w. j a  va2 s  .com

    RecordWriter recordWriter = outputFormat.getHiveRecordWriter(jobConf, new Path(filePath), Text.class,
            compressionCodec != null, tableProperties, new Progressable() {
                @Override
                public void progress() {
                }
            });

    try {
        serDe.initialize(new Configuration(), tableProperties);

        SettableStructObjectInspector objectInspector = getStandardStructObjectInspector(COLUMN_NAMES,
                FIELD_INSPECTORS);
        Object row = objectInspector.create();

        List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());

        for (int rowNumber = 0; rowNumber < NUM_ROWS; rowNumber++) {
            for (int i = 0; i < TEST_VALUES.size(); i++) {
                Object key = TEST_VALUES.get(i).getKey();
                if (key instanceof Slice) {
                    key = ((Slice) key).getBytes();
                }
                objectInspector.setStructFieldData(row, fields.get(i), key);
            }

            Writable record = serDe.serialize(row, objectInspector);
            recordWriter.write(record);
        }
    } finally {
        recordWriter.close(false);
    }

    Path path = new Path(filePath);
    path.getFileSystem(new Configuration()).setVerifyChecksum(true);
    File file = new File(filePath);
    return new FileSplit(path, 0, file.length(), new String[0]);
}

From source file:com.facebook.presto.hive.BenchmarkHiveFileFormats.java

License:Apache License

private static FileSplit createFileSplit(File file) {
    try {/*w ww  . ja  v  a 2  s. c om*/
        Path lineitemPath = new Path(file.toURI());
        lineitemPath.getFileSystem(new Configuration()).setVerifyChecksum(false);
        return new FileSplit(lineitemPath, 0, file.length(), new String[0]);
    } catch (IOException e) {
        throw Throwables.propagate(e);
    }
}

From source file:com.facebook.presto.hive.DwrfRecordCursorProvider.java

License:Apache License

@Override
public Optional<HiveRecordCursor> createHiveRecordCursor(String clientId, Configuration configuration,
        ConnectorSession session, Path path, long start, long length, Properties schema,
        List<HiveColumnHandle> columns, List<HivePartitionKey> partitionKeys,
        TupleDomain<HiveColumnHandle> tupleDomain, DateTimeZone hiveStorageTimeZone, TypeManager typeManager) {
    @SuppressWarnings("deprecation")
    Deserializer deserializer = getDeserializer(schema);
    if (!(deserializer instanceof OrcSerde)) {
        return Optional.absent();
    }//from www  .ja  v a 2s. c  o m

    StructObjectInspector rowInspector = getTableObjectInspector(schema);
    if (!all(rowInspector.getAllStructFieldRefs(), isSupportedDwrfType())) {
        throw new IllegalArgumentException("DWRF does not support DATE type");
    }

    ReaderWriterProfiler.setProfilerOptions(configuration);

    RecordReader recordReader;
    try {
        FileSystem fileSystem = path.getFileSystem(configuration);
        Reader reader = OrcFile.createReader(fileSystem, path, new JobConf(configuration));
        boolean[] include = findIncludedColumns(reader.getTypes(), columns);
        recordReader = reader.rows(start, length, include);
    } catch (Exception e) {
        throw Throwables.propagate(e);
    }

    return Optional.<HiveRecordCursor>of(new DwrfHiveRecordCursor(recordReader, length, schema, partitionKeys,
            columns, hiveStorageTimeZone, DateTimeZone.forID(session.getTimeZoneKey().getId()), typeManager));
}

From source file:com.facebook.presto.hive.FileSystemCache.java

License:Apache License

private static Callable<FileSystem> createFileSystemFromPath(final Path path, final Configuration conf) {
    return new Callable<FileSystem>() {
        @Override/* w  w w .j a  v  a 2 s .co m*/
        public FileSystem call() throws Exception {
            return path.getFileSystem(conf);
        }
    };
}

From source file:com.facebook.presto.hive.HiveClient.java

License:Apache License

private FileSystem getFileSystem(Path path) throws IOException {
    return path.getFileSystem(hdfsEnvironment.getConfiguration(path));
}

From source file:com.facebook.presto.hive.HiveSplitIterable.java

License:Apache License

private void loadPartitionSplits(final HiveSplitQueue hiveSplitQueue, SuspendingExecutor suspendingExecutor)
        throws InterruptedException {
    final Semaphore semaphore = new Semaphore(maxPartitionBatchSize);
    try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) {
        ImmutableList.Builder<ListenableFuture<Void>> futureBuilder = ImmutableList.builder();

        Iterator<String> nameIterator = partitionNames.iterator();
        for (Partition partition : partitions) {
            checkState(nameIterator.hasNext(), "different number of partitions and partition names!");
            semaphore.acquire();// w  w w  .  j a v a 2  s .c o  m
            final String partitionName = nameIterator.next();
            final Properties schema = getPartitionSchema(table, partition);
            final List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition);

            Path path = new Path(getPartitionLocation(table, partition));
            final Configuration configuration = hdfsEnvironment.getConfiguration(path);
            final InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, false);
            Path partitionPath = hdfsEnvironment.getFileSystemWrapper().wrap(path);

            FileSystem fs = partitionPath.getFileSystem(configuration);
            final LastSplitMarkingQueue markerQueue = new LastSplitMarkingQueue(hiveSplitQueue);

            if (inputFormat instanceof SymlinkTextInputFormat) {
                JobConf jobConf = new JobConf(configuration);
                FileInputFormat.setInputPaths(jobConf, partitionPath);
                InputSplit[] splits = inputFormat.getSplits(jobConf, 0);
                for (InputSplit rawSplit : splits) {
                    FileSplit split = ((SymlinkTextInputFormat.SymlinkTextInputSplit) rawSplit)
                            .getTargetSplit();

                    // get the filesystem for the target path -- it may be a different hdfs instance
                    FileSystem targetFilesystem = split.getPath().getFileSystem(configuration);
                    FileStatus fileStatus = targetFilesystem.getFileStatus(split.getPath());
                    markerQueue.addToQueue(createHiveSplits(partitionName, fileStatus,
                            targetFilesystem.getFileBlockLocations(fileStatus, split.getStart(),
                                    split.getLength()),
                            split.getStart(), split.getLength(), schema, partitionKeys, false));
                }
                markerQueue.finish();
                continue;
            }

            ListenableFuture<Void> partitionFuture = new AsyncRecursiveWalker(fs, suspendingExecutor)
                    .beginWalk(partitionPath, new FileStatusCallback() {
                        @Override
                        public void process(FileStatus file, BlockLocation[] blockLocations) {
                            if (bucket.isPresent()
                                    && !fileMatchesBucket(file.getPath().getName(), bucket.get())) {
                                return;
                            }

                            try {
                                boolean splittable = isSplittable(inputFormat,
                                        file.getPath().getFileSystem(configuration), file.getPath());

                                markerQueue.addToQueue(createHiveSplits(partitionName, file, blockLocations, 0,
                                        file.getLen(), schema, partitionKeys, splittable));
                            } catch (IOException e) {
                                hiveSplitQueue.fail(e);
                            }
                        }
                    });

            // release the semaphore when the partition finishes
            Futures.addCallback(partitionFuture, new FutureCallback<Void>() {
                @Override
                public void onSuccess(Void result) {
                    markerQueue.finish();
                    semaphore.release();
                }

                @Override
                public void onFailure(Throwable t) {
                    markerQueue.finish();
                    semaphore.release();
                }
            });
            futureBuilder.add(partitionFuture);
        }

        // when all partitions finish, mark the queue as finished
        Futures.addCallback(Futures.allAsList(futureBuilder.build()), new FutureCallback<List<Void>>() {
            @Override
            public void onSuccess(List<Void> result) {
                hiveSplitQueue.finished();
            }

            @Override
            public void onFailure(Throwable t) {
                hiveSplitQueue.fail(t);
            }
        });
    } catch (Throwable e) {
        hiveSplitQueue.fail(e);
        Throwables.propagateIfInstanceOf(e, Error.class);
    }
}