Example usage for org.apache.hadoop.fs FileSystem getConf

List of usage examples for org.apache.hadoop.fs FileSystem getConf

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getConf.

Prototype

@Override
    public Configuration getConf() 

Source Link

Usage

From source file:com.uber.hoodie.common.HoodieClientTestUtils.java

License:Apache License

/**
 * Reads the paths under the a hoodie dataset out as a DataFrame
 *//*  w w  w.j  a v a2s. c  o m*/
public static Dataset<Row> read(JavaSparkContext jsc, String basePath, SQLContext sqlContext, FileSystem fs,
        String... paths) {
    List<String> filteredPaths = new ArrayList<>();
    try {
        HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs.getConf(), basePath, true);
        for (String path : paths) {
            TableFileSystemView.ReadOptimizedView fileSystemView = new HoodieTableFileSystemView(metaClient,
                    metaClient.getCommitsTimeline().filterCompletedInstants(), fs.globStatus(new Path(path)));
            List<HoodieDataFile> latestFiles = fileSystemView.getLatestDataFiles().collect(Collectors.toList());
            for (HoodieDataFile file : latestFiles) {
                filteredPaths.add(file.getPath());
            }
        }
        return sqlContext.read().parquet(filteredPaths.toArray(new String[filteredPaths.size()]));
    } catch (Exception e) {
        throw new HoodieException("Error reading hoodie dataset as a dataframe", e);
    }
}

From source file:com.uber.hoodie.common.table.log.AbstractHoodieLogRecordScanner.java

License:Apache License

public AbstractHoodieLogRecordScanner(FileSystem fs, String basePath, List<String> logFilePaths,
        Schema readerSchema, String latestInstantTime, boolean readBlocksLazily, boolean reverseReader,
        int bufferSize) {
    this.readerSchema = readerSchema;
    this.latestInstantTime = latestInstantTime;
    this.hoodieTableMetaClient = new HoodieTableMetaClient(fs.getConf(), basePath);
    // load class from the payload fully qualified class name
    this.payloadClassFQN = this.hoodieTableMetaClient.getTableConfig().getPayloadClass();
    this.totalLogFiles.addAndGet(logFilePaths.size());
    this.logFilePaths = logFilePaths;
    this.readBlocksLazily = readBlocksLazily;
    this.reverseReader = reverseReader;
    this.fs = fs;
    this.bufferSize = bufferSize;
}

From source file:com.uber.hoodie.common.table.log.avro.AvroLogAppender.java

License:Apache License

public AvroLogAppender(HoodieLogAppendConfig config) throws IOException, InterruptedException {
    FileSystem fs = config.getFs();
    this.config = config;
    this.autoFlush = config.isAutoFlush();
    GenericDatumWriter<IndexedRecord> datumWriter = new GenericDatumWriter<>(config.getSchema());
    this.writer = new DataFileWriter<>(datumWriter);
    Path path = config.getLogFile().getPath();

    if (fs.exists(path)) {
        //TODO - check for log corruption and roll over if needed
        log.info(config.getLogFile() + " exists. Appending to existing file");
        // this log path exists, we will append to it
        fs = FileSystem.get(fs.getConf());
        try {/*from ww  w. j  a va2  s.  c o  m*/
            this.output = fs.append(path, config.getBufferSize());
        } catch (RemoteException e) {
            // this happens when either another task executor writing to this file died or data node is going down
            if (e.getClassName().equals(AlreadyBeingCreatedException.class.getName())
                    && fs instanceof DistributedFileSystem) {
                log.warn("Trying to recover log on path " + path);
                if (FSUtils.recoverDFSFileLease((DistributedFileSystem) fs, path)) {
                    log.warn("Recovered lease on path " + path);
                    // try again
                    this.output = fs.append(path, config.getBufferSize());
                } else {
                    log.warn("Failed to recover lease on path " + path);
                    throw new HoodieException(e);
                }
            }
        }
        this.writer.appendTo(new AvroFSInput(FileContext.getFileContext(fs.getConf()), path), output);
        // we always want to flush to disk everytime a avro block is written
        this.writer.setFlushOnEveryBlock(true);
    } else {
        log.info(config.getLogFile() + " does not exist. Create a new file");
        this.output = fs.create(path, false, config.getBufferSize(), config.getReplication(),
                config.getBlockSize(), null);
        this.writer.create(config.getSchema(), output);
        this.writer.setFlushOnEveryBlock(true);
        // We need to close the writer to be able to tell the name node that we created this file
        // this.writer.close();
    }
}

From source file:com.uber.hoodie.common.table.log.avro.AvroLogReader.java

License:Apache License

public AvroLogReader(HoodieLogFile file, FileSystem fs, Schema readerSchema) throws IOException {
    GenericDatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
    datumReader.setExpected(readerSchema);
    final AvroFSInput input = new AvroFSInput(FileContext.getFileContext(fs.getConf()), file.getPath());
    this.reader = (DataFileReader<GenericRecord>) DataFileReader.openReader(input, datumReader);
    this.file = file;
}

From source file:com.uber.hoodie.common.util.AvroUtils.java

License:Apache License

public static List<HoodieRecord<HoodieAvroPayload>> loadFromFile(FileSystem fs, String deltaFilePath,
        Schema expectedSchema) {// w ww  .java2  s  .c o m
    List<HoodieRecord<HoodieAvroPayload>> loadedRecords = Lists.newArrayList();
    Path path = new Path(deltaFilePath);
    try {
        SeekableInput input = new FsInput(path, fs.getConf());
        GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>();
        // Set the expected schema to be the current schema to account for schema evolution
        reader.setExpected(expectedSchema);

        FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader);
        for (GenericRecord deltaRecord : fileReader) {
            String key = deltaRecord.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
            String partitionPath = deltaRecord.get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
            loadedRecords.add(new HoodieRecord<>(new HoodieKey(key, partitionPath),
                    new HoodieAvroPayload(Optional.of(deltaRecord))));
        }
        fileReader.close(); // also closes underlying FsInput
    } catch (IOException e) {
        throw new HoodieIOException("Could not read avro records from path " + deltaFilePath, e);
    }
    return loadedRecords;
}

From source file:com.uber.hoodie.common.util.FSUtils.java

License:Apache License

public static int getDefaultBufferSize(final FileSystem fs) {
    return fs.getConf().getInt("io.file.buffer.size", 4096);
}

From source file:com.uber.hoodie.hadoop.HoodieInputFormat.java

License:Apache License

/**
 * Read the table metadata from a data path. This assumes certain hierarchy of files which should
 * be changed once a better way is figured out to pass in the hoodie meta directory
 *///from   ww w. j  av a2s.  c om
protected static HoodieTableMetaClient getTableMetaClient(FileSystem fs, Path dataPath) throws IOException {
    int levels = HoodieHiveUtil.DEFAULT_LEVELS_TO_BASEPATH;
    if (HoodiePartitionMetadata.hasPartitionMetadata(fs, dataPath)) {
        HoodiePartitionMetadata metadata = new HoodiePartitionMetadata(fs, dataPath);
        metadata.readFromFS();
        levels = metadata.getPartitionDepth();
    }
    Path baseDir = HoodieHiveUtil.getNthParent(dataPath, levels);
    LOG.info("Reading hoodie metadata from path " + baseDir.toString());
    return new HoodieTableMetaClient(fs.getConf(), baseDir.toString());
}

From source file:com.uber.hoodie.hive.HiveSyncTool.java

License:Apache License

public static void main(String[] args) throws Exception {
    // parse the params
    final HiveSyncConfig cfg = new HiveSyncConfig();
    JCommander cmd = new JCommander(cfg, args);
    if (cfg.help || args.length == 0) {
        cmd.usage();//from  ww  w  .  j  a v  a2  s . c  o m
        System.exit(1);
    }
    FileSystem fs = FSUtils.getFs(cfg.basePath, new Configuration());
    HiveConf hiveConf = new HiveConf();
    hiveConf.addResource(fs.getConf());
    new HiveSyncTool(cfg, hiveConf, fs).syncHoodieTable();
}

From source file:com.uber.hoodie.hive.HoodieHiveClient.java

License:Apache License

public HoodieHiveClient(HiveSyncConfig cfg, HiveConf configuration, FileSystem fs) {
    this.syncConfig = cfg;
    this.fs = fs;
    this.metaClient = new HoodieTableMetaClient(fs.getConf(), cfg.basePath, true);
    this.tableType = metaClient.getTableType();

    LOG.info("Creating hive connection " + cfg.jdbcUrl);
    createHiveConnection();//from www . j a  va2s  .  c  om
    try {
        this.client = new HiveMetaStoreClient(configuration);
    } catch (MetaException e) {
        throw new HoodieHiveSyncException("Failed to create HiveMetaStoreClient", e);
    }

    try {
        this.partitionValueExtractor = (PartitionValueExtractor) Class.forName(cfg.partitionValueExtractorClass)
                .newInstance();
    } catch (Exception e) {
        throw new HoodieHiveSyncException(
                "Failed to initialize PartitionValueExtractor class " + cfg.partitionValueExtractorClass, e);
    }

    activeTimeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
}

From source file:com.uber.hoodie.HoodieDataSourceHelpers.java

License:Apache License

/**
 * Obtain all the commits, compactions that have occurred on the timeline, whose instant times
 * could be fed into the datasource options.
 *///  w  w  w  .j  a  v  a2 s  .co m
public static HoodieTimeline allCompletedCommitsCompactions(FileSystem fs, String basePath) {
    HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs.getConf(), basePath, true);
    if (metaClient.getTableType().equals(HoodieTableType.MERGE_ON_READ)) {
        return metaClient.getActiveTimeline().getTimelineOfActions(
                Sets.newHashSet(HoodieActiveTimeline.COMMIT_ACTION, HoodieActiveTimeline.DELTA_COMMIT_ACTION));
    } else {
        return metaClient.getCommitTimeline().filterCompletedInstants();
    }
}