List of usage examples for org.apache.hadoop.fs FileSystem getConf
@Override
public Configuration getConf()
From source file:com.uber.hoodie.TestMultiFS.java
License:Apache License
@Test public void readLocalWriteHDFS() throws Exception { // Generator of some records to be loaded in. HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator(); // Initialize table and filesystem HoodieTableMetaClient.initTableType(jsc.hadoopConfiguration(), dfsBasePath, HoodieTableType.valueOf(tableType), tableName, HoodieAvroPayload.class.getName()); //Create write client to write some records in HoodieWriteConfig cfg = getHoodieWriteConfig(dfsBasePath); HoodieWriteClient hdfsWriteClient = getHoodieWriteClient(cfg); // Write generated data to hdfs (only inserts) String readCommitTime = hdfsWriteClient.startCommit(); logger.info("Starting commit " + readCommitTime); List<HoodieRecord> records = dataGen.generateInserts(readCommitTime, 100); JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1); hdfsWriteClient.upsert(writeRecords, readCommitTime); // Read from hdfs FileSystem fs = FSUtils.getFs(dfsBasePath, HoodieTestUtils.getDefaultHadoopConf()); HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs.getConf(), dfsBasePath); HoodieTimeline timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline(); Dataset<Row> readRecords = HoodieClientTestUtils.readCommit(dfsBasePath, sqlContext, timeline, readCommitTime);/*from w ww. jav a2 s . co m*/ assertEquals("Should contain 100 records", readRecords.count(), records.size()); // Write to local HoodieTableMetaClient.initTableType(jsc.hadoopConfiguration(), tablePath, HoodieTableType.valueOf(tableType), tableName, HoodieAvroPayload.class.getName()); HoodieWriteConfig localConfig = getHoodieWriteConfig(tablePath); HoodieWriteClient localWriteClient = getHoodieWriteClient(localConfig); String writeCommitTime = localWriteClient.startCommit(); logger.info("Starting write commit " + writeCommitTime); List<HoodieRecord> localRecords = dataGen.generateInserts(writeCommitTime, 100); JavaRDD<HoodieRecord> localWriteRecords = jsc.parallelize(localRecords, 1); logger.info("Writing to path: " + tablePath); localWriteClient.upsert(localWriteRecords, writeCommitTime); logger.info("Reading from path: " + tablePath); fs = FSUtils.getFs(tablePath, HoodieTestUtils.getDefaultHadoopConf()); metaClient = new HoodieTableMetaClient(fs.getConf(), tablePath); timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline(); Dataset<Row> localReadRecords = HoodieClientTestUtils.readCommit(tablePath, sqlContext, timeline, writeCommitTime); assertEquals("Should contain 100 records", localReadRecords.count(), localRecords.size()); hdfsWriteClient.close(); localWriteClient.close(); }
From source file:com.uber.hoodie.utilities.HiveIncrementalPuller.java
License:Apache License
private String scanForCommitTime(FileSystem fs, String targetDataPath) throws IOException { if (targetDataPath == null) { throw new IllegalArgumentException("Please specify either --fromCommitTime or --targetDataPath"); }//from w w w.j av a 2 s .c o m if (!fs.exists(new Path(targetDataPath)) || !fs.exists(new Path(targetDataPath + "/.hoodie"))) { return "0"; } HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs.getConf(), targetDataPath); Optional<HoodieInstant> lastCommit = metadata.getActiveTimeline().getCommitsTimeline() .filterCompletedInstants().lastInstant(); if (lastCommit.isPresent()) { return lastCommit.get().getTimestamp(); } return "0"; }
From source file:com.uber.hoodie.utilities.HiveIncrementalPuller.java
License:Apache License
private String getLastCommitTimePulled(FileSystem fs, String sourceTableLocation) throws IOException { HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs.getConf(), sourceTableLocation); List<String> commitsToSync = metadata.getActiveTimeline().getCommitsTimeline().filterCompletedInstants() .findInstantsAfter(config.fromCommitTime, config.maxCommits).getInstants() .map(HoodieInstant::getTimestamp).collect(Collectors.toList()); if (commitsToSync.isEmpty()) { log.warn("Nothing to sync. All commits in " + config.sourceTable + " are " + metadata.getActiveTimeline().getCommitsTimeline() .filterCompletedInstants().getInstants().collect(Collectors.toList()) + " and from commit time is " + config.fromCommitTime); return null; }//w ww. j a v a 2 s. com log.info("Syncing commits " + commitsToSync); return commitsToSync.get(commitsToSync.size() - 1); }
From source file:com.uber.hoodie.utilities.HoodieSnapshotCopier.java
License:Apache License
public void snapshot(JavaSparkContext jsc, String baseDir, final String outputDir, final boolean shouldAssumeDatePartitioning) throws IOException { FileSystem fs = FSUtils.getFs(baseDir, jsc.hadoopConfiguration()); final SerializableConfiguration serConf = new SerializableConfiguration(jsc.hadoopConfiguration()); final HoodieTableMetaClient tableMetadata = new HoodieTableMetaClient(fs.getConf(), baseDir); final TableFileSystemView.ReadOptimizedView fsView = new HoodieTableFileSystemView(tableMetadata, tableMetadata.getActiveTimeline().getCommitsTimeline().filterCompletedInstants()); // Get the latest commit Optional<HoodieInstant> latestCommit = tableMetadata.getActiveTimeline().getCommitsTimeline() .filterCompletedInstants().lastInstant(); if (!latestCommit.isPresent()) { logger.warn("No commits present. Nothing to snapshot"); return;// ww w. j ava2s .c om } final String latestCommitTimestamp = latestCommit.get().getTimestamp(); logger.info(String.format("Starting to snapshot latest version files which are also no-late-than %s.", latestCommitTimestamp)); List<String> partitions = FSUtils.getAllPartitionPaths(fs, baseDir, shouldAssumeDatePartitioning); if (partitions.size() > 0) { logger.info(String.format("The job needs to copy %d partitions.", partitions.size())); // Make sure the output directory is empty Path outputPath = new Path(outputDir); if (fs.exists(outputPath)) { logger.warn( String.format("The output path %s targetBasePath already exists, deleting", outputPath)); fs.delete(new Path(outputDir), true); } jsc.parallelize(partitions, partitions.size()).flatMap(partition -> { // Only take latest version files <= latestCommit. FileSystem fs1 = FSUtils.getFs(baseDir, serConf.get()); List<Tuple2<String, String>> filePaths = new ArrayList<>(); Stream<HoodieDataFile> dataFiles = fsView.getLatestDataFilesBeforeOrOn(partition, latestCommitTimestamp); dataFiles.forEach( hoodieDataFile -> filePaths.add(new Tuple2<>(partition, hoodieDataFile.getPath()))); // also need to copy over partition metadata Path partitionMetaFile = new Path(new Path(baseDir, partition), HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE); if (fs1.exists(partitionMetaFile)) { filePaths.add(new Tuple2<>(partition, partitionMetaFile.toString())); } return filePaths.iterator(); }).foreach(tuple -> { String partition = tuple._1(); Path sourceFilePath = new Path(tuple._2()); Path toPartitionPath = new Path(outputDir, partition); FileSystem ifs = FSUtils.getFs(baseDir, serConf.get()); if (!ifs.exists(toPartitionPath)) { ifs.mkdirs(toPartitionPath); } FileUtil.copy(ifs, sourceFilePath, ifs, new Path(toPartitionPath, sourceFilePath.getName()), false, ifs.getConf()); }); // Also copy the .commit files logger.info(String.format("Copying .commit files which are no-late-than %s.", latestCommitTimestamp)); FileStatus[] commitFilesToCopy = fs.listStatus( new Path(baseDir + "/" + HoodieTableMetaClient.METAFOLDER_NAME), (commitFilePath) -> { if (commitFilePath.getName().equals(HoodieTableConfig.HOODIE_PROPERTIES_FILE)) { return true; } else { String commitTime = FSUtils.getCommitFromCommitFile(commitFilePath.getName()); return HoodieTimeline.compareTimestamps(commitTime, latestCommitTimestamp, HoodieTimeline.LESSER_OR_EQUAL); } }); for (FileStatus commitStatus : commitFilesToCopy) { Path targetFilePath = new Path(outputDir + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + commitStatus.getPath().getName()); if (!fs.exists(targetFilePath.getParent())) { fs.mkdirs(targetFilePath.getParent()); } if (fs.exists(targetFilePath)) { logger.error(String.format("The target output commit file (%s targetBasePath) already exists.", targetFilePath)); } FileUtil.copy(fs, commitStatus.getPath(), fs, targetFilePath, false, fs.getConf()); } } else { logger.info("The job has 0 partition to copy."); } // Create the _SUCCESS tag Path successTagPath = new Path(outputDir + "/_SUCCESS"); if (!fs.exists(successTagPath)) { logger.info(String.format("Creating _SUCCESS under targetBasePath: $s", outputDir)); fs.createNewFile(successTagPath); } }
From source file:com.yahoo.glimmer.util.MapReducePartInputStreamEnumeration.java
License:Open Source License
public MapReducePartInputStreamEnumeration(FileSystem fileSystem, Path srcPath) throws IOException { this.fileSystem = fileSystem; CompressionCodecFactory factory = new CompressionCodecFactory(fileSystem.getConf()); codecIfAny = factory.getCodec(srcPath); FileStatus srcFileStatus = fileSystem.getFileStatus(srcPath); if (srcFileStatus.isDirectory()) { // returns FileStatus objects sorted by filename. String partFilenamePattern = "part-?-?????"; if (codecIfAny != null) { partFilenamePattern += codecIfAny.getDefaultExtension(); }/* w w w .ja v a 2 s . c o m*/ Path partPathGlob = new Path(srcPath, partFilenamePattern); partFileStatuses = fileSystem.globStatus(partPathGlob); } else { partFileStatuses = new FileStatus[] { srcFileStatus }; } }
From source file:com.yata.core.HDFSManager.java
License:Apache License
/** * * @param hdfsTestDataSourceFile//from w w w .ja v a 2 s . c o m * @param hdfsTestDataTargetFile * @throws IOException * * hadoop fs -cp /projects/ddsw/dev/data/backup/dealer_hierarchy/<<DOMAIN_NAME>>/<<FILE_NAME>> /projects/ddsw/dev/data/raw/nas/<<DOMAIN_NAME>> */ public void copyHDFSData(String hdfsTestDataSourceFile, String hdfsTestDataTargetFile) throws OozieClientException { System.out.println("copyHDFSData@" + className + " : Loading Test Data From :-> " + hdfsTestDataSourceFile + " : Into :-> " + hdfsTestDataTargetFile); FileSystem hdfs = null; Path hdfsTestDataSource = null; Path hdfsTestDataTarget = null; try { hdfs = getHdfsFileSytem(); System.out.println("copyHDFSData@" + className + " : HDFS :-> " + hdfs); System.out.println("copyHDFSData@" + className + " : HDFSHomeDirectory :-> " + hdfs.getHomeDirectory()); System.out.println("copyHDFSData@" + className + " : HDFS-URI :-> " + hdfs.getUri()); System.out.println( "copyHDFSData@" + className + " : HDFSWorkingDirectory :-> " + hdfs.getWorkingDirectory()); System.out.println("copyHDFSData@" + className + " : HDFS : " + hdfs + " : Exists :-> " + hdfs.exists(hdfs.getHomeDirectory())); hdfsTestDataSource = new Path(hdfs.getUri().getPath() + hdfsTestDataSourceFile); hdfsTestDataTarget = new Path(hdfs.getUri().getPath() + hdfsTestDataTargetFile); System.out.println("copyHDFSData@" + className + " : HDFS TEST DATA : " + hdfsTestDataSource + " : Exists :-> " + hdfs.exists(hdfsTestDataSource)); System.out.println("copyHDFSData@" + className + " : HDFS DOMAIN DATA : " + hdfsTestDataTarget + " : Exists :-> " + hdfs.exists(hdfsTestDataTarget)); } catch (IOException e) { e.printStackTrace(); throw new OozieClientException("ERR_CODE_1218", "copyHDFSData@" + className + " : IOException while getting HDFS FileSystem - EXITING..."); } FileUtil hdfsUtil = new FileUtil(); try { hdfsUtil.copy(hdfs, hdfsTestDataSource, hdfs, hdfsTestDataTarget, false, true, hdfs.getConf()); System.out.println("copyHDFSData@" + className + " : NOW : HDFS TEST DATA : " + hdfsTestDataSource + " : Exists :-> " + hdfs.exists(hdfsTestDataSource)); System.out.println("copyHDFSData@" + className + " : HDFS DOMAIN DATA : " + hdfsTestDataTarget + " : Exists :-> " + hdfs.exists(hdfsTestDataTarget)); } catch (IOException e) { e.printStackTrace(); throw new OozieClientException("ERR_CODE_1218", "copyHDFSData@" + className + " : IOException while Copying HDFS Data - EXITING..."); } /** * IMPORTANT * If the Source Data file on HDFS is not owned by the Hive/Hadoop User, then use the command below to * change the permission for Hive/Hadoop User to move/delete the file once processed... */ try { hdfs.setPermission(hdfsTestDataTarget, new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.READ_EXECUTE)); } catch (IOException e) { e.printStackTrace(); throw new OozieClientException("ERR_CODE_1218", "copyHDFSData@" + className + " : IOException while Changing HDFS File Permissions - EXITING..."); } }
From source file:dima.kmeansseq.SequenceFile.java
License:Apache License
/** * Construct the preferred type of SequenceFile Writer. * // ww w. j a v a 2 s . c o m * @param fs * The configured filesystem. * @param conf * The configuration. * @param name * The name of the file. * @param keyClass * The 'key' type. * @param valClass * The 'value' type. * @param compressionType * The compression type. * @return Returns the handle to the constructed SequenceFile Writer. * @throws IOException */ public static Writer createWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass, CompressionType compressionType) throws IOException { return createWriter(fs, conf, name, keyClass, valClass, fs.getConf().getInt("io.file.buffer.size", 4096), fs.getDefaultReplication(), fs.getDefaultBlockSize(), compressionType, new DefaultCodec(), null, new Metadata()); }
From source file:dima.kmeansseq.SequenceFile.java
License:Apache License
/** * Construct the preferred type of SequenceFile Writer. * //from w ww . j a v a2 s . co m * @param fs * The configured filesystem. * @param conf * The configuration. * @param name * The name of the file. * @param keyClass * The 'key' type. * @param valClass * The 'value' type. * @param compressionType * The compression type. * @param progress * The Progressable object to track progress. * @return Returns the handle to the constructed SequenceFile Writer. * @throws IOException */ public static Writer createWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass, CompressionType compressionType, Progressable progress) throws IOException { return createWriter(fs, conf, name, keyClass, valClass, fs.getConf().getInt("io.file.buffer.size", 4096), fs.getDefaultReplication(), fs.getDefaultBlockSize(), compressionType, new DefaultCodec(), progress, new Metadata()); }
From source file:dima.kmeansseq.SequenceFile.java
License:Apache License
/** * Construct the preferred type of SequenceFile Writer. * //from www . jav a 2 s . c o m * @param fs * The configured filesystem. * @param conf * The configuration. * @param name * The name of the file. * @param keyClass * The 'key' type. * @param valClass * The 'value' type. * @param compressionType * The compression type. * @param codec * The compression codec. * @return Returns the handle to the constructed SequenceFile Writer. * @throws IOException */ public static Writer createWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass, CompressionType compressionType, CompressionCodec codec) throws IOException { return createWriter(fs, conf, name, keyClass, valClass, fs.getConf().getInt("io.file.buffer.size", 4096), fs.getDefaultReplication(), fs.getDefaultBlockSize(), compressionType, codec, null, new Metadata()); }
From source file:dima.kmeansseq.SequenceFile.java
License:Apache License
/** * Construct the preferred type of SequenceFile Writer. * /*from ww w.java 2 s . co m*/ * @param fs * The configured filesystem. * @param conf * The configuration. * @param name * The name of the file. * @param keyClass * The 'key' type. * @param valClass * The 'value' type. * @param compressionType * The compression type. * @param codec * The compression codec. * @param progress * The Progressable object to track progress. * @param metadata * The metadata of the file. * @return Returns the handle to the constructed SequenceFile Writer. * @throws IOException */ public static Writer createWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass, CompressionType compressionType, CompressionCodec codec, Progressable progress, Metadata metadata) throws IOException { return createWriter(fs, conf, name, keyClass, valClass, fs.getConf().getInt("io.file.buffer.size", 4096), fs.getDefaultReplication(), fs.getDefaultBlockSize(), compressionType, codec, progress, metadata); }