List of usage examples for org.apache.hadoop.fs FileSystem getConf
@Override
public Configuration getConf()
From source file:com.uber.hoodie.common.HoodieClientTestUtils.java
License:Apache License
/** * Reads the paths under the a hoodie dataset out as a DataFrame *//* w w w.j a v a2s. c o m*/ public static Dataset<Row> read(JavaSparkContext jsc, String basePath, SQLContext sqlContext, FileSystem fs, String... paths) { List<String> filteredPaths = new ArrayList<>(); try { HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs.getConf(), basePath, true); for (String path : paths) { TableFileSystemView.ReadOptimizedView fileSystemView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsTimeline().filterCompletedInstants(), fs.globStatus(new Path(path))); List<HoodieDataFile> latestFiles = fileSystemView.getLatestDataFiles().collect(Collectors.toList()); for (HoodieDataFile file : latestFiles) { filteredPaths.add(file.getPath()); } } return sqlContext.read().parquet(filteredPaths.toArray(new String[filteredPaths.size()])); } catch (Exception e) { throw new HoodieException("Error reading hoodie dataset as a dataframe", e); } }
From source file:com.uber.hoodie.common.table.log.AbstractHoodieLogRecordScanner.java
License:Apache License
public AbstractHoodieLogRecordScanner(FileSystem fs, String basePath, List<String> logFilePaths, Schema readerSchema, String latestInstantTime, boolean readBlocksLazily, boolean reverseReader, int bufferSize) { this.readerSchema = readerSchema; this.latestInstantTime = latestInstantTime; this.hoodieTableMetaClient = new HoodieTableMetaClient(fs.getConf(), basePath); // load class from the payload fully qualified class name this.payloadClassFQN = this.hoodieTableMetaClient.getTableConfig().getPayloadClass(); this.totalLogFiles.addAndGet(logFilePaths.size()); this.logFilePaths = logFilePaths; this.readBlocksLazily = readBlocksLazily; this.reverseReader = reverseReader; this.fs = fs; this.bufferSize = bufferSize; }
From source file:com.uber.hoodie.common.table.log.avro.AvroLogAppender.java
License:Apache License
public AvroLogAppender(HoodieLogAppendConfig config) throws IOException, InterruptedException { FileSystem fs = config.getFs(); this.config = config; this.autoFlush = config.isAutoFlush(); GenericDatumWriter<IndexedRecord> datumWriter = new GenericDatumWriter<>(config.getSchema()); this.writer = new DataFileWriter<>(datumWriter); Path path = config.getLogFile().getPath(); if (fs.exists(path)) { //TODO - check for log corruption and roll over if needed log.info(config.getLogFile() + " exists. Appending to existing file"); // this log path exists, we will append to it fs = FileSystem.get(fs.getConf()); try {/*from ww w. j a va2 s. c o m*/ this.output = fs.append(path, config.getBufferSize()); } catch (RemoteException e) { // this happens when either another task executor writing to this file died or data node is going down if (e.getClassName().equals(AlreadyBeingCreatedException.class.getName()) && fs instanceof DistributedFileSystem) { log.warn("Trying to recover log on path " + path); if (FSUtils.recoverDFSFileLease((DistributedFileSystem) fs, path)) { log.warn("Recovered lease on path " + path); // try again this.output = fs.append(path, config.getBufferSize()); } else { log.warn("Failed to recover lease on path " + path); throw new HoodieException(e); } } } this.writer.appendTo(new AvroFSInput(FileContext.getFileContext(fs.getConf()), path), output); // we always want to flush to disk everytime a avro block is written this.writer.setFlushOnEveryBlock(true); } else { log.info(config.getLogFile() + " does not exist. Create a new file"); this.output = fs.create(path, false, config.getBufferSize(), config.getReplication(), config.getBlockSize(), null); this.writer.create(config.getSchema(), output); this.writer.setFlushOnEveryBlock(true); // We need to close the writer to be able to tell the name node that we created this file // this.writer.close(); } }
From source file:com.uber.hoodie.common.table.log.avro.AvroLogReader.java
License:Apache License
public AvroLogReader(HoodieLogFile file, FileSystem fs, Schema readerSchema) throws IOException { GenericDatumReader<GenericRecord> datumReader = new GenericDatumReader<>(); datumReader.setExpected(readerSchema); final AvroFSInput input = new AvroFSInput(FileContext.getFileContext(fs.getConf()), file.getPath()); this.reader = (DataFileReader<GenericRecord>) DataFileReader.openReader(input, datumReader); this.file = file; }
From source file:com.uber.hoodie.common.util.AvroUtils.java
License:Apache License
public static List<HoodieRecord<HoodieAvroPayload>> loadFromFile(FileSystem fs, String deltaFilePath, Schema expectedSchema) {// w ww .java2 s .c o m List<HoodieRecord<HoodieAvroPayload>> loadedRecords = Lists.newArrayList(); Path path = new Path(deltaFilePath); try { SeekableInput input = new FsInput(path, fs.getConf()); GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(); // Set the expected schema to be the current schema to account for schema evolution reader.setExpected(expectedSchema); FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader); for (GenericRecord deltaRecord : fileReader) { String key = deltaRecord.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString(); String partitionPath = deltaRecord.get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString(); loadedRecords.add(new HoodieRecord<>(new HoodieKey(key, partitionPath), new HoodieAvroPayload(Optional.of(deltaRecord)))); } fileReader.close(); // also closes underlying FsInput } catch (IOException e) { throw new HoodieIOException("Could not read avro records from path " + deltaFilePath, e); } return loadedRecords; }
From source file:com.uber.hoodie.common.util.FSUtils.java
License:Apache License
public static int getDefaultBufferSize(final FileSystem fs) { return fs.getConf().getInt("io.file.buffer.size", 4096); }
From source file:com.uber.hoodie.hadoop.HoodieInputFormat.java
License:Apache License
/** * Read the table metadata from a data path. This assumes certain hierarchy of files which should * be changed once a better way is figured out to pass in the hoodie meta directory *///from ww w. j av a2s. c om protected static HoodieTableMetaClient getTableMetaClient(FileSystem fs, Path dataPath) throws IOException { int levels = HoodieHiveUtil.DEFAULT_LEVELS_TO_BASEPATH; if (HoodiePartitionMetadata.hasPartitionMetadata(fs, dataPath)) { HoodiePartitionMetadata metadata = new HoodiePartitionMetadata(fs, dataPath); metadata.readFromFS(); levels = metadata.getPartitionDepth(); } Path baseDir = HoodieHiveUtil.getNthParent(dataPath, levels); LOG.info("Reading hoodie metadata from path " + baseDir.toString()); return new HoodieTableMetaClient(fs.getConf(), baseDir.toString()); }
From source file:com.uber.hoodie.hive.HiveSyncTool.java
License:Apache License
public static void main(String[] args) throws Exception { // parse the params final HiveSyncConfig cfg = new HiveSyncConfig(); JCommander cmd = new JCommander(cfg, args); if (cfg.help || args.length == 0) { cmd.usage();//from ww w . j a v a2 s . c o m System.exit(1); } FileSystem fs = FSUtils.getFs(cfg.basePath, new Configuration()); HiveConf hiveConf = new HiveConf(); hiveConf.addResource(fs.getConf()); new HiveSyncTool(cfg, hiveConf, fs).syncHoodieTable(); }
From source file:com.uber.hoodie.hive.HoodieHiveClient.java
License:Apache License
public HoodieHiveClient(HiveSyncConfig cfg, HiveConf configuration, FileSystem fs) { this.syncConfig = cfg; this.fs = fs; this.metaClient = new HoodieTableMetaClient(fs.getConf(), cfg.basePath, true); this.tableType = metaClient.getTableType(); LOG.info("Creating hive connection " + cfg.jdbcUrl); createHiveConnection();//from www . j a va2s . c om try { this.client = new HiveMetaStoreClient(configuration); } catch (MetaException e) { throw new HoodieHiveSyncException("Failed to create HiveMetaStoreClient", e); } try { this.partitionValueExtractor = (PartitionValueExtractor) Class.forName(cfg.partitionValueExtractorClass) .newInstance(); } catch (Exception e) { throw new HoodieHiveSyncException( "Failed to initialize PartitionValueExtractor class " + cfg.partitionValueExtractorClass, e); } activeTimeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants(); }
From source file:com.uber.hoodie.HoodieDataSourceHelpers.java
License:Apache License
/** * Obtain all the commits, compactions that have occurred on the timeline, whose instant times * could be fed into the datasource options. */// w w w .j a v a2 s .co m public static HoodieTimeline allCompletedCommitsCompactions(FileSystem fs, String basePath) { HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs.getConf(), basePath, true); if (metaClient.getTableType().equals(HoodieTableType.MERGE_ON_READ)) { return metaClient.getActiveTimeline().getTimelineOfActions( Sets.newHashSet(HoodieActiveTimeline.COMMIT_ACTION, HoodieActiveTimeline.DELTA_COMMIT_ACTION)); } else { return metaClient.getCommitTimeline().filterCompletedInstants(); } }