Example usage for org.apache.hadoop.fs FileSystem getConf

List of usage examples for org.apache.hadoop.fs FileSystem getConf

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getConf.

Prototype

@Override
    public Configuration getConf() 

Source Link

Usage

From source file:com.uber.hoodie.TestMultiFS.java

License:Apache License

@Test
public void readLocalWriteHDFS() throws Exception {

    // Generator of some records to be loaded in.
    HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();

    // Initialize table and filesystem
    HoodieTableMetaClient.initTableType(jsc.hadoopConfiguration(), dfsBasePath,
            HoodieTableType.valueOf(tableType), tableName, HoodieAvroPayload.class.getName());

    //Create write client to write some records in
    HoodieWriteConfig cfg = getHoodieWriteConfig(dfsBasePath);
    HoodieWriteClient hdfsWriteClient = getHoodieWriteClient(cfg);

    // Write generated data to hdfs (only inserts)
    String readCommitTime = hdfsWriteClient.startCommit();
    logger.info("Starting commit " + readCommitTime);
    List<HoodieRecord> records = dataGen.generateInserts(readCommitTime, 100);
    JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
    hdfsWriteClient.upsert(writeRecords, readCommitTime);

    // Read from hdfs
    FileSystem fs = FSUtils.getFs(dfsBasePath, HoodieTestUtils.getDefaultHadoopConf());
    HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs.getConf(), dfsBasePath);
    HoodieTimeline timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
    Dataset<Row> readRecords = HoodieClientTestUtils.readCommit(dfsBasePath, sqlContext, timeline,
            readCommitTime);/*from   w ww. jav  a2  s  .  co  m*/
    assertEquals("Should contain 100 records", readRecords.count(), records.size());

    // Write to local
    HoodieTableMetaClient.initTableType(jsc.hadoopConfiguration(), tablePath,
            HoodieTableType.valueOf(tableType), tableName, HoodieAvroPayload.class.getName());
    HoodieWriteConfig localConfig = getHoodieWriteConfig(tablePath);
    HoodieWriteClient localWriteClient = getHoodieWriteClient(localConfig);

    String writeCommitTime = localWriteClient.startCommit();
    logger.info("Starting write commit " + writeCommitTime);
    List<HoodieRecord> localRecords = dataGen.generateInserts(writeCommitTime, 100);
    JavaRDD<HoodieRecord> localWriteRecords = jsc.parallelize(localRecords, 1);
    logger.info("Writing to path: " + tablePath);
    localWriteClient.upsert(localWriteRecords, writeCommitTime);

    logger.info("Reading from path: " + tablePath);
    fs = FSUtils.getFs(tablePath, HoodieTestUtils.getDefaultHadoopConf());
    metaClient = new HoodieTableMetaClient(fs.getConf(), tablePath);
    timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline();
    Dataset<Row> localReadRecords = HoodieClientTestUtils.readCommit(tablePath, sqlContext, timeline,
            writeCommitTime);
    assertEquals("Should contain 100 records", localReadRecords.count(), localRecords.size());

    hdfsWriteClient.close();
    localWriteClient.close();
}

From source file:com.uber.hoodie.utilities.HiveIncrementalPuller.java

License:Apache License

private String scanForCommitTime(FileSystem fs, String targetDataPath) throws IOException {
    if (targetDataPath == null) {
        throw new IllegalArgumentException("Please specify either --fromCommitTime or --targetDataPath");
    }//from w  w w.j av  a  2  s  .c  o  m
    if (!fs.exists(new Path(targetDataPath)) || !fs.exists(new Path(targetDataPath + "/.hoodie"))) {
        return "0";
    }
    HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs.getConf(), targetDataPath);

    Optional<HoodieInstant> lastCommit = metadata.getActiveTimeline().getCommitsTimeline()
            .filterCompletedInstants().lastInstant();
    if (lastCommit.isPresent()) {
        return lastCommit.get().getTimestamp();
    }
    return "0";
}

From source file:com.uber.hoodie.utilities.HiveIncrementalPuller.java

License:Apache License

private String getLastCommitTimePulled(FileSystem fs, String sourceTableLocation) throws IOException {
    HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs.getConf(), sourceTableLocation);
    List<String> commitsToSync = metadata.getActiveTimeline().getCommitsTimeline().filterCompletedInstants()
            .findInstantsAfter(config.fromCommitTime, config.maxCommits).getInstants()
            .map(HoodieInstant::getTimestamp).collect(Collectors.toList());
    if (commitsToSync.isEmpty()) {
        log.warn("Nothing to sync. All commits in "
                + config.sourceTable + " are " + metadata.getActiveTimeline().getCommitsTimeline()
                        .filterCompletedInstants().getInstants().collect(Collectors.toList())
                + " and from commit time is " + config.fromCommitTime);
        return null;
    }//w  ww. j  a v  a  2  s. com
    log.info("Syncing commits " + commitsToSync);
    return commitsToSync.get(commitsToSync.size() - 1);
}

From source file:com.uber.hoodie.utilities.HoodieSnapshotCopier.java

License:Apache License

public void snapshot(JavaSparkContext jsc, String baseDir, final String outputDir,
        final boolean shouldAssumeDatePartitioning) throws IOException {
    FileSystem fs = FSUtils.getFs(baseDir, jsc.hadoopConfiguration());
    final SerializableConfiguration serConf = new SerializableConfiguration(jsc.hadoopConfiguration());
    final HoodieTableMetaClient tableMetadata = new HoodieTableMetaClient(fs.getConf(), baseDir);
    final TableFileSystemView.ReadOptimizedView fsView = new HoodieTableFileSystemView(tableMetadata,
            tableMetadata.getActiveTimeline().getCommitsTimeline().filterCompletedInstants());
    // Get the latest commit
    Optional<HoodieInstant> latestCommit = tableMetadata.getActiveTimeline().getCommitsTimeline()
            .filterCompletedInstants().lastInstant();
    if (!latestCommit.isPresent()) {
        logger.warn("No commits present. Nothing to snapshot");
        return;// ww  w.  j  ava2s .c om
    }
    final String latestCommitTimestamp = latestCommit.get().getTimestamp();
    logger.info(String.format("Starting to snapshot latest version files which are also no-late-than %s.",
            latestCommitTimestamp));

    List<String> partitions = FSUtils.getAllPartitionPaths(fs, baseDir, shouldAssumeDatePartitioning);
    if (partitions.size() > 0) {
        logger.info(String.format("The job needs to copy %d partitions.", partitions.size()));

        // Make sure the output directory is empty
        Path outputPath = new Path(outputDir);
        if (fs.exists(outputPath)) {
            logger.warn(
                    String.format("The output path %s targetBasePath already exists, deleting", outputPath));
            fs.delete(new Path(outputDir), true);
        }

        jsc.parallelize(partitions, partitions.size()).flatMap(partition -> {
            // Only take latest version files <= latestCommit.
            FileSystem fs1 = FSUtils.getFs(baseDir, serConf.get());
            List<Tuple2<String, String>> filePaths = new ArrayList<>();
            Stream<HoodieDataFile> dataFiles = fsView.getLatestDataFilesBeforeOrOn(partition,
                    latestCommitTimestamp);
            dataFiles.forEach(
                    hoodieDataFile -> filePaths.add(new Tuple2<>(partition, hoodieDataFile.getPath())));

            // also need to copy over partition metadata
            Path partitionMetaFile = new Path(new Path(baseDir, partition),
                    HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE);
            if (fs1.exists(partitionMetaFile)) {
                filePaths.add(new Tuple2<>(partition, partitionMetaFile.toString()));
            }

            return filePaths.iterator();
        }).foreach(tuple -> {
            String partition = tuple._1();
            Path sourceFilePath = new Path(tuple._2());
            Path toPartitionPath = new Path(outputDir, partition);
            FileSystem ifs = FSUtils.getFs(baseDir, serConf.get());

            if (!ifs.exists(toPartitionPath)) {
                ifs.mkdirs(toPartitionPath);
            }
            FileUtil.copy(ifs, sourceFilePath, ifs, new Path(toPartitionPath, sourceFilePath.getName()), false,
                    ifs.getConf());
        });

        // Also copy the .commit files
        logger.info(String.format("Copying .commit files which are no-late-than %s.", latestCommitTimestamp));
        FileStatus[] commitFilesToCopy = fs.listStatus(
                new Path(baseDir + "/" + HoodieTableMetaClient.METAFOLDER_NAME), (commitFilePath) -> {
                    if (commitFilePath.getName().equals(HoodieTableConfig.HOODIE_PROPERTIES_FILE)) {
                        return true;
                    } else {
                        String commitTime = FSUtils.getCommitFromCommitFile(commitFilePath.getName());
                        return HoodieTimeline.compareTimestamps(commitTime, latestCommitTimestamp,
                                HoodieTimeline.LESSER_OR_EQUAL);
                    }
                });
        for (FileStatus commitStatus : commitFilesToCopy) {
            Path targetFilePath = new Path(outputDir + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
                    + commitStatus.getPath().getName());
            if (!fs.exists(targetFilePath.getParent())) {
                fs.mkdirs(targetFilePath.getParent());
            }
            if (fs.exists(targetFilePath)) {
                logger.error(String.format("The target output commit file (%s targetBasePath) already exists.",
                        targetFilePath));
            }
            FileUtil.copy(fs, commitStatus.getPath(), fs, targetFilePath, false, fs.getConf());
        }
    } else {
        logger.info("The job has 0 partition to copy.");
    }

    // Create the _SUCCESS tag
    Path successTagPath = new Path(outputDir + "/_SUCCESS");
    if (!fs.exists(successTagPath)) {
        logger.info(String.format("Creating _SUCCESS under targetBasePath: $s", outputDir));
        fs.createNewFile(successTagPath);
    }
}

From source file:com.yahoo.glimmer.util.MapReducePartInputStreamEnumeration.java

License:Open Source License

public MapReducePartInputStreamEnumeration(FileSystem fileSystem, Path srcPath) throws IOException {
    this.fileSystem = fileSystem;

    CompressionCodecFactory factory = new CompressionCodecFactory(fileSystem.getConf());
    codecIfAny = factory.getCodec(srcPath);

    FileStatus srcFileStatus = fileSystem.getFileStatus(srcPath);
    if (srcFileStatus.isDirectory()) {
        // returns FileStatus objects sorted by filename.
        String partFilenamePattern = "part-?-?????";
        if (codecIfAny != null) {
            partFilenamePattern += codecIfAny.getDefaultExtension();
        }/*  w  w  w .ja v a  2 s . c  o m*/
        Path partPathGlob = new Path(srcPath, partFilenamePattern);
        partFileStatuses = fileSystem.globStatus(partPathGlob);
    } else {
        partFileStatuses = new FileStatus[] { srcFileStatus };
    }

}

From source file:com.yata.core.HDFSManager.java

License:Apache License

/**
 *
 * @param hdfsTestDataSourceFile//from   w w  w .ja v  a 2  s . c o  m
 * @param hdfsTestDataTargetFile
 * @throws IOException
 *
 * hadoop fs -cp /projects/ddsw/dev/data/backup/dealer_hierarchy/<<DOMAIN_NAME>>/<<FILE_NAME>> /projects/ddsw/dev/data/raw/nas/<<DOMAIN_NAME>>
 */
public void copyHDFSData(String hdfsTestDataSourceFile, String hdfsTestDataTargetFile)
        throws OozieClientException {

    System.out.println("copyHDFSData@" + className + " : Loading Test Data From :-> " + hdfsTestDataSourceFile
            + " : Into :-> " + hdfsTestDataTargetFile);

    FileSystem hdfs = null;
    Path hdfsTestDataSource = null;
    Path hdfsTestDataTarget = null;

    try {

        hdfs = getHdfsFileSytem();

        System.out.println("copyHDFSData@" + className + " : HDFS :-> " + hdfs);

        System.out.println("copyHDFSData@" + className + " : HDFSHomeDirectory :-> " + hdfs.getHomeDirectory());
        System.out.println("copyHDFSData@" + className + " : HDFS-URI :-> " + hdfs.getUri());
        System.out.println(
                "copyHDFSData@" + className + " : HDFSWorkingDirectory :-> " + hdfs.getWorkingDirectory());
        System.out.println("copyHDFSData@" + className + " : HDFS : " + hdfs + " : Exists :-> "
                + hdfs.exists(hdfs.getHomeDirectory()));

        hdfsTestDataSource = new Path(hdfs.getUri().getPath() + hdfsTestDataSourceFile);
        hdfsTestDataTarget = new Path(hdfs.getUri().getPath() + hdfsTestDataTargetFile);

        System.out.println("copyHDFSData@" + className + " : HDFS TEST DATA : " + hdfsTestDataSource
                + " : Exists :-> " + hdfs.exists(hdfsTestDataSource));
        System.out.println("copyHDFSData@" + className + " : HDFS DOMAIN DATA : " + hdfsTestDataTarget
                + " : Exists :-> " + hdfs.exists(hdfsTestDataTarget));

    } catch (IOException e) {

        e.printStackTrace();
        throw new OozieClientException("ERR_CODE_1218",
                "copyHDFSData@" + className + " : IOException while getting HDFS FileSystem - EXITING...");
    }

    FileUtil hdfsUtil = new FileUtil();

    try {

        hdfsUtil.copy(hdfs, hdfsTestDataSource, hdfs, hdfsTestDataTarget, false, true, hdfs.getConf());

        System.out.println("copyHDFSData@" + className + " : NOW : HDFS TEST DATA : " + hdfsTestDataSource
                + " : Exists :-> " + hdfs.exists(hdfsTestDataSource));
        System.out.println("copyHDFSData@" + className + " : HDFS DOMAIN DATA : " + hdfsTestDataTarget
                + " : Exists :-> " + hdfs.exists(hdfsTestDataTarget));

    } catch (IOException e) {

        e.printStackTrace();
        throw new OozieClientException("ERR_CODE_1218",
                "copyHDFSData@" + className + " : IOException while Copying HDFS Data - EXITING...");
    }

    /**
     * IMPORTANT
     * If the Source Data file on HDFS is not owned by the Hive/Hadoop User, then use the command below to
     * change the permission for Hive/Hadoop User to move/delete the file once processed...
     */
    try {

        hdfs.setPermission(hdfsTestDataTarget,
                new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.READ_EXECUTE));
    } catch (IOException e) {

        e.printStackTrace();
        throw new OozieClientException("ERR_CODE_1218", "copyHDFSData@" + className
                + " : IOException while Changing HDFS File Permissions - EXITING...");
    }

}

From source file:dima.kmeansseq.SequenceFile.java

License:Apache License

/**
 * Construct the preferred type of SequenceFile Writer.
 * // ww  w.  j  a v a  2  s .  c  o m
 * @param fs
 *            The configured filesystem.
 * @param conf
 *            The configuration.
 * @param name
 *            The name of the file.
 * @param keyClass
 *            The 'key' type.
 * @param valClass
 *            The 'value' type.
 * @param compressionType
 *            The compression type.
 * @return Returns the handle to the constructed SequenceFile Writer.
 * @throws IOException
 */
public static Writer createWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass,
        CompressionType compressionType) throws IOException {
    return createWriter(fs, conf, name, keyClass, valClass, fs.getConf().getInt("io.file.buffer.size", 4096),
            fs.getDefaultReplication(), fs.getDefaultBlockSize(), compressionType, new DefaultCodec(), null,
            new Metadata());
}

From source file:dima.kmeansseq.SequenceFile.java

License:Apache License

/**
 * Construct the preferred type of SequenceFile Writer.
 * //from w ww  . j  a  v a2 s  .  co m
 * @param fs
 *            The configured filesystem.
 * @param conf
 *            The configuration.
 * @param name
 *            The name of the file.
 * @param keyClass
 *            The 'key' type.
 * @param valClass
 *            The 'value' type.
 * @param compressionType
 *            The compression type.
 * @param progress
 *            The Progressable object to track progress.
 * @return Returns the handle to the constructed SequenceFile Writer.
 * @throws IOException
 */
public static Writer createWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass,
        CompressionType compressionType, Progressable progress) throws IOException {
    return createWriter(fs, conf, name, keyClass, valClass, fs.getConf().getInt("io.file.buffer.size", 4096),
            fs.getDefaultReplication(), fs.getDefaultBlockSize(), compressionType, new DefaultCodec(), progress,
            new Metadata());
}

From source file:dima.kmeansseq.SequenceFile.java

License:Apache License

/**
 * Construct the preferred type of SequenceFile Writer.
 * //from   www . jav  a 2  s  . c o  m
 * @param fs
 *            The configured filesystem.
 * @param conf
 *            The configuration.
 * @param name
 *            The name of the file.
 * @param keyClass
 *            The 'key' type.
 * @param valClass
 *            The 'value' type.
 * @param compressionType
 *            The compression type.
 * @param codec
 *            The compression codec.
 * @return Returns the handle to the constructed SequenceFile Writer.
 * @throws IOException
 */
public static Writer createWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass,
        CompressionType compressionType, CompressionCodec codec) throws IOException {
    return createWriter(fs, conf, name, keyClass, valClass, fs.getConf().getInt("io.file.buffer.size", 4096),
            fs.getDefaultReplication(), fs.getDefaultBlockSize(), compressionType, codec, null, new Metadata());
}

From source file:dima.kmeansseq.SequenceFile.java

License:Apache License

/**
 * Construct the preferred type of SequenceFile Writer.
 * /*from ww w.java 2  s . co  m*/
 * @param fs
 *            The configured filesystem.
 * @param conf
 *            The configuration.
 * @param name
 *            The name of the file.
 * @param keyClass
 *            The 'key' type.
 * @param valClass
 *            The 'value' type.
 * @param compressionType
 *            The compression type.
 * @param codec
 *            The compression codec.
 * @param progress
 *            The Progressable object to track progress.
 * @param metadata
 *            The metadata of the file.
 * @return Returns the handle to the constructed SequenceFile Writer.
 * @throws IOException
 */
public static Writer createWriter(FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass,
        CompressionType compressionType, CompressionCodec codec, Progressable progress, Metadata metadata)
        throws IOException {
    return createWriter(fs, conf, name, keyClass, valClass, fs.getConf().getInt("io.file.buffer.size", 4096),
            fs.getDefaultReplication(), fs.getDefaultBlockSize(), compressionType, codec, progress, metadata);
}