Example usage for org.apache.hadoop.fs FileSystem newInstance

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem newInstance.

Prototype

public static FileSystem newInstance(Configuration conf) throws IOException

Source Link

Document

Returns a unique configured FileSystem implementation for the default filesystem of the supplied configuration.

Usage

From source file:com.pagerankcalculator.TwitterPageRank.java

public int calculatePagerank(String in, String out, int iteration)
        throws IOException, InterruptedException, ClassNotFoundException {
    Job job = Job.getInstance(getConf());
    job.setJobName("[" + TwitterPageRank.AUTHOR + "]: Job#2 Iteration-" + iteration + " Calculating Page Rank");
    job.setJarByClass(TwitterPageRank.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(PageRankCalculationMapper.class);
    job.setReducerClass(PageRankCalculationReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setNumReduceTasks(TwitterPageRank.NUM_REDUCE_TASKS);

    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    Path inputFilePath = new Path(in);
    Path outputFilePath = new Path(out);

    FileInputFormat.addInputPath(job, inputFilePath);
    FileOutputFormat.setOutputPath(job, outputFilePath);

    FileSystem fs = FileSystem.newInstance(getConf());

    if (fs.exists(outputFilePath)) {
        fs.delete(outputFilePath, true);
    }//from w  w  w .  j a v a  2 s .  c  o m

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.pagerankcalculator.TwitterPageRank.java

public int sortPagerank(String in, String out)
        throws IOException, InterruptedException, ClassNotFoundException {
    Job job = Job.getInstance(getConf());
    job.setJobName("[" + TwitterPageRank.AUTHOR + "]: Job#3 Sorting Page Rank");
    job.setJarByClass(TwitterPageRank.class);

    job.setMapOutputKeyClass(DoubleWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(PageRankSortingMapper.class);
    job.setReducerClass(PageRankSortingReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setNumReduceTasks(1);/*from  w w  w .  j a  v a 2 s  .  co m*/

    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    job.setSortComparatorClass(DoubleSortDescComparator.class);

    Path inputFilePath = new Path(in);
    Path outputFilePath = new Path(out);

    FileInputFormat.addInputPath(job, inputFilePath);
    FileOutputFormat.setOutputPath(job, outputFilePath);

    FileSystem fs = FileSystem.newInstance(getConf());

    if (fs.exists(outputFilePath)) {
        fs.delete(outputFilePath, true);
    }

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:de.l3s.concatgz.io.ImmediateOutput.java

License:Open Source License

public ImmediateOutput(TaskInputOutputContext context, boolean flushOnWrite) throws IOException {
    this.context = context;
    this.flushOnWrite = flushOnWrite;
    Configuration conf = context.getConfiguration();
    this.dir = getPath(conf);
    this.fs = FileSystem.newInstance(conf);
    this.bufferSize = conf.getInt("io.file.buffer.size", 4096);
    this.replication = getReplication(conf);

    String idPrefix = getIdPrefix(conf);
    file = "" + context.getTaskAttemptID().getTaskID().getId();
    while (file.length() < 5)
        file = "0" + file;
    if (idPrefix.length() > 0)
        file = idPrefix + "-" + file;
    file = "-" + file;
}

From source file:gobblin.compliance.HivePurgerExtractor.java

License:Apache License

/**
 * @throws IOException// w ww  .j ava2  s  .c om
 * @Returns Partition from the partition name.
 */
private Partition getHiveTablePartition(String partitionName, Properties properties) {
    Partition hiveTablePartition = null;
    try {
        properties.setProperty(HivePurgerConfigurationKeys.HIVE_DATASET_WHITELIST,
                getCompleteTableName(partitionName));
        IterableDatasetFinder<HiveDataset> datasetFinder = new HiveDatasetFinder(
                FileSystem.newInstance(HadoopUtils.newConfiguration()), properties);
        Iterator<HiveDataset> hiveDatasetIterator = datasetFinder.getDatasetsIterator();
        Preconditions.checkArgument(hiveDatasetIterator.hasNext(), "Unable to find table to update from");
        HiveDataset hiveDataset = hiveDatasetIterator.next();
        List<Partition> partitions = hiveDataset.getPartitionsFromDataset();
        Preconditions.checkArgument(!partitions.isEmpty(),
                "No partitions found for " + getCompleteTableName(partitionName));
        for (Partition partition : partitions) {
            if (partition.getCompleteName().equals(partitionName)) {
                hiveTablePartition = partition;
            }
        }
    } catch (IOException e) {
        Throwables.propagate(e);
    }
    Preconditions.checkNotNull(hiveTablePartition, "Cannot find the required partition " + partitionName);
    return hiveTablePartition;
}

From source file:gobblin.compliance.HivePurgerSource.java

License:Apache License

@VisibleForTesting
protected void initialize(SourceState state) throws IOException {
    setTimeStamp();//www.  j  ava  2 s  .co  m
    this.setLowWatermark(state);
    this.maxWorkUnits = state.getPropAsInt(HivePurgerConfigurationKeys.MAX_WORKUNITS_KEY,
            HivePurgerConfigurationKeys.DEFAULT_MAX_WORKUNITS);
    this.maxWorkUnitExecutionAttempts = state.getPropAsInt(
            HivePurgerConfigurationKeys.MAX_WORKUNIT_EXECUTION_ATTEMPTS_KEY,
            HivePurgerConfigurationKeys.DEFAULT_MAX_WORKUNIT_EXECUTION_ATTEMPTS);
    // TODO: Event submitter and metrics will be added later
    this.datasetFinder = new HiveDatasetFinder(FileSystem.newInstance(HadoopUtils.newConfiguration()),
            state.getProperties());
    populateDatasets();
}

From source file:gobblin.compliance.retention.ComplianceRetentionJob.java

License:Apache License

public void initDatasetFinder(Properties properties) throws IOException {
    Preconditions.checkArgument(properties.containsKey(GOBBLIN_COMPLIANCE_DATASET_FINDER_CLASS),
            "Missing required propety " + GOBBLIN_COMPLIANCE_DATASET_FINDER_CLASS);
    String finderClass = properties.getProperty(GOBBLIN_COMPLIANCE_DATASET_FINDER_CLASS);
    this.finder = GobblinConstructorUtils.invokeConstructor(DatasetsFinder.class, finderClass,
            new State(properties));

    Iterator<HiveDataset> datasetsIterator = new HiveDatasetFinder(FileSystem.newInstance(new Configuration()),
            properties).getDatasetsIterator();

    while (datasetsIterator.hasNext()) {
        // Drop partitions from empty tables if property is set, otherwise skip the table
        HiveDataset hiveDataset = datasetsIterator.next();
        List<Partition> partitionsFromDataset = hiveDataset.getPartitionsFromDataset();
        String completeTableName = hiveDataset.getTable().getCompleteName();
        if (!partitionsFromDataset.isEmpty()) {
            this.tableNamesList.add(completeTableName);
            continue;
        }/*from  w ww .j ava2  s.  c o m*/
        if (!Boolean.parseBoolean(properties.getProperty(ComplianceConfigurationKeys.SHOULD_DROP_EMPTY_TABLES,
                ComplianceConfigurationKeys.DEFAULT_SHOULD_DROP_EMPTY_TABLES))) {
            continue;
        }
        if (completeTableName.contains(ComplianceConfigurationKeys.TRASH)
                || completeTableName.contains(ComplianceConfigurationKeys.BACKUP)
                || completeTableName.contains(ComplianceConfigurationKeys.STAGING)) {
            this.tablesToDrop.add(hiveDataset);
        }
    }
}

From source file:io.druid.storage.hdfs.HdfsDataSegmentPusherTest.java

License:Apache License

private void testUsingScheme(final String scheme) throws Exception {
    Configuration conf = new Configuration(true);

    // Create a mock segment on disk
    File segmentDir = tempFolder.newFolder();
    File tmp = new File(segmentDir, "version.bin");

    final byte[] data = new byte[] { 0x0, 0x0, 0x0, 0x1 };
    Files.write(data, tmp);//w  ww .j  a va  2  s .c o  m
    final long size = data.length;

    HdfsDataSegmentPusherConfig config = new HdfsDataSegmentPusherConfig();
    final File storageDirectory = tempFolder.newFolder();

    config.setStorageDirectory(
            scheme != null ? StringUtils.format("%s://%s", scheme, storageDirectory.getAbsolutePath())
                    : storageDirectory.getAbsolutePath());
    HdfsDataSegmentPusher pusher = new HdfsDataSegmentPusher(config, conf, new DefaultObjectMapper());

    DataSegment segmentToPush = new DataSegment("foo", new Interval("2015/2016"), "0",
            Maps.<String, Object>newHashMap(), Lists.<String>newArrayList(), Lists.<String>newArrayList(),
            NoneShardSpec.instance(), 0, size);

    DataSegment segment = pusher.push(segmentDir, segmentToPush);

    String indexUri = StringUtils.format("%s/%s/%d_index.zip",
            FileSystem.newInstance(conf).makeQualified(new Path(config.getStorageDirectory())).toUri()
                    .toString(),
            pusher.getStorageDir(segmentToPush), segmentToPush.getShardSpec().getPartitionNum());

    Assert.assertEquals(segmentToPush.getSize(), segment.getSize());
    Assert.assertEquals(segmentToPush, segment);
    Assert.assertEquals(ImmutableMap.of("type", "hdfs", "path", indexUri), segment.getLoadSpec());
    // rename directory after push
    final String segmentPath = pusher.getStorageDir(segment);

    File indexFile = new File(StringUtils.format("%s/%s/%d_index.zip", storageDirectory, segmentPath,
            segment.getShardSpec().getPartitionNum()));
    Assert.assertTrue(indexFile.exists());
    File descriptorFile = new File(StringUtils.format("%s/%s/%d_descriptor.json", storageDirectory, segmentPath,
            segment.getShardSpec().getPartitionNum()));
    Assert.assertTrue(descriptorFile.exists());

    // push twice will fail and temp dir cleaned
    File outDir = new File(StringUtils.format("%s/%s", config.getStorageDirectory(), segmentPath));
    outDir.setReadOnly();
    try {
        pusher.push(segmentDir, segmentToPush);
    } catch (IOException e) {
        Assert.fail("should not throw exception");
    }
}

From source file:io.druid.storage.hdfs.HdfsDataSegmentPusherTest.java

License:Apache License

private void testUsingSchemeForMultipleSegments(final String scheme, final int numberOfSegments)
        throws Exception {
    Configuration conf = new Configuration(true);
    DataSegment[] segments = new DataSegment[numberOfSegments];

    // Create a mock segment on disk
    File segmentDir = tempFolder.newFolder();
    File tmp = new File(segmentDir, "version.bin");

    final byte[] data = new byte[] { 0x0, 0x0, 0x0, 0x1 };
    Files.write(data, tmp);/*from w  ww .j  ava  2  s . c o  m*/
    final long size = data.length;

    HdfsDataSegmentPusherConfig config = new HdfsDataSegmentPusherConfig();
    final File storageDirectory = tempFolder.newFolder();

    config.setStorageDirectory(
            scheme != null ? StringUtils.format("%s://%s", scheme, storageDirectory.getAbsolutePath())
                    : storageDirectory.getAbsolutePath());
    HdfsDataSegmentPusher pusher = new HdfsDataSegmentPusher(config, conf, new DefaultObjectMapper());

    for (int i = 0; i < numberOfSegments; i++) {
        segments[i] = new DataSegment("foo", new Interval("2015/2016"), "0", Maps.<String, Object>newHashMap(),
                Lists.<String>newArrayList(), Lists.<String>newArrayList(), new NumberedShardSpec(i, i), 0,
                size);
    }

    for (int i = 0; i < numberOfSegments; i++) {
        final DataSegment pushedSegment = pusher.push(segmentDir, segments[i]);

        String indexUri = StringUtils.format("%s/%s/%d_index.zip",
                FileSystem.newInstance(conf).makeQualified(new Path(config.getStorageDirectory())).toUri()
                        .toString(),
                pusher.getStorageDir(segments[i]), segments[i].getShardSpec().getPartitionNum());

        Assert.assertEquals(segments[i].getSize(), pushedSegment.getSize());
        Assert.assertEquals(segments[i], pushedSegment);
        Assert.assertEquals(ImmutableMap.of("type", "hdfs", "path", indexUri), pushedSegment.getLoadSpec());
        // rename directory after push
        String segmentPath = pusher.getStorageDir(pushedSegment);

        File indexFile = new File(StringUtils.format("%s/%s/%d_index.zip", storageDirectory, segmentPath,
                pushedSegment.getShardSpec().getPartitionNum()));
        Assert.assertTrue(indexFile.exists());
        File descriptorFile = new File(StringUtils.format("%s/%s/%d_descriptor.json", storageDirectory,
                segmentPath, pushedSegment.getShardSpec().getPartitionNum()));
        Assert.assertTrue(descriptorFile.exists());

        //read actual data from descriptor file.
        DataSegment fromDescriptorFileDataSegment = objectMapper.readValue(descriptorFile, DataSegment.class);

        Assert.assertEquals(segments[i].getSize(), pushedSegment.getSize());
        Assert.assertEquals(segments[i], pushedSegment);
        Assert.assertEquals(ImmutableMap.of("type", "hdfs", "path", indexUri),
                fromDescriptorFileDataSegment.getLoadSpec());
        // rename directory after push
        segmentPath = pusher.getStorageDir(fromDescriptorFileDataSegment);

        indexFile = new File(StringUtils.format("%s/%s/%d_index.zip", storageDirectory, segmentPath,
                fromDescriptorFileDataSegment.getShardSpec().getPartitionNum()));
        Assert.assertTrue(indexFile.exists());

        // push twice will fail and temp dir cleaned
        File outDir = new File(StringUtils.format("%s/%s", config.getStorageDirectory(), segmentPath));
        outDir.setReadOnly();
        try {
            pusher.push(segmentDir, segments[i]);
        } catch (IOException e) {
            Assert.fail("should not throw exception");
        }
    }
}

From source file:io.hops.experiments.utils.DFSOperationsUtils.java

License:Apache License

public static FileSystem getDFSClient(Configuration conf) throws IOException {
    if (SERVER_LESS_MODE) {
        serverLessModeRandomWait();/*from  w w  w.  j  a v a 2  s . com*/
        return null;
    }
    FileSystem client = dfsClients.get();
    if (client == null) {
        client = (FileSystem) FileSystem.newInstance(conf);
        dfsClients.set(client);
        System.out.println(Thread.currentThread().getName() + " Creating new client. Total: "
                + dfsClientsCount.incrementAndGet() + " New Client is: " + client);
    } else {
        System.out.println("Reusing Existing Client " + client);
    }
    return client;
}

From source file:io.warp10.continuum.store.HFileStats.java

License:Apache License

public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();
    CacheConfig cacheConf = new CacheConfig(conf);

    FileSystem fs = FileSystem.newInstance(conf);

    FileStatus[] pathes = fs.globStatus(new Path(args[0]));

    long bytes = 0L;
    long cells = 0L;

    for (FileStatus status : pathes) {
        try {// w w  w. j  a  v a 2 s  .  com
            HFile.Reader reader = HFile.createReader(fs, status.getPath(), cacheConf, conf);
            bytes += reader.length();
            cells += reader.getEntries();

            System.out.println(
                    status.getPath() + " >>> " + reader.length() + " bytes " + reader.getEntries() + " cells");

            reader.close();
        } catch (Exception e) {
            continue;
        }
    }

    System.out.println(
            "TOTAL: " + cells + " cells " + bytes + " bytes " + (bytes / (double) cells) + " bytes/cell");

    long ts = System.currentTimeMillis();

    System.out.println(ts * 1000 + "// hbase.bytes{} " + bytes);
    System.out.println(ts * 1000 + "// hbase.datapoints{} " + cells);
}