List of usage examples for org.apache.hadoop.fs FileSystem newInstance
public static FileSystem newInstance(Configuration conf) throws IOException
From source file:com.pagerankcalculator.TwitterPageRank.java
public int calculatePagerank(String in, String out, int iteration) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(getConf()); job.setJobName("[" + TwitterPageRank.AUTHOR + "]: Job#2 Iteration-" + iteration + " Calculating Page Rank"); job.setJarByClass(TwitterPageRank.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(PageRankCalculationMapper.class); job.setReducerClass(PageRankCalculationReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setNumReduceTasks(TwitterPageRank.NUM_REDUCE_TASKS); LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); Path inputFilePath = new Path(in); Path outputFilePath = new Path(out); FileInputFormat.addInputPath(job, inputFilePath); FileOutputFormat.setOutputPath(job, outputFilePath); FileSystem fs = FileSystem.newInstance(getConf()); if (fs.exists(outputFilePath)) { fs.delete(outputFilePath, true); }//from w w w . j a v a 2 s . c o m return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.pagerankcalculator.TwitterPageRank.java
public int sortPagerank(String in, String out) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(getConf()); job.setJobName("[" + TwitterPageRank.AUTHOR + "]: Job#3 Sorting Page Rank"); job.setJarByClass(TwitterPageRank.class); job.setMapOutputKeyClass(DoubleWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(PageRankSortingMapper.class); job.setReducerClass(PageRankSortingReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setNumReduceTasks(1);/*from w w w . j a v a 2 s . co m*/ LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); job.setSortComparatorClass(DoubleSortDescComparator.class); Path inputFilePath = new Path(in); Path outputFilePath = new Path(out); FileInputFormat.addInputPath(job, inputFilePath); FileOutputFormat.setOutputPath(job, outputFilePath); FileSystem fs = FileSystem.newInstance(getConf()); if (fs.exists(outputFilePath)) { fs.delete(outputFilePath, true); } return job.waitForCompletion(true) ? 0 : 1; }
From source file:de.l3s.concatgz.io.ImmediateOutput.java
License:Open Source License
public ImmediateOutput(TaskInputOutputContext context, boolean flushOnWrite) throws IOException { this.context = context; this.flushOnWrite = flushOnWrite; Configuration conf = context.getConfiguration(); this.dir = getPath(conf); this.fs = FileSystem.newInstance(conf); this.bufferSize = conf.getInt("io.file.buffer.size", 4096); this.replication = getReplication(conf); String idPrefix = getIdPrefix(conf); file = "" + context.getTaskAttemptID().getTaskID().getId(); while (file.length() < 5) file = "0" + file; if (idPrefix.length() > 0) file = idPrefix + "-" + file; file = "-" + file; }
From source file:gobblin.compliance.HivePurgerExtractor.java
License:Apache License
/** * @throws IOException// w ww .j ava2 s .c om * @Returns Partition from the partition name. */ private Partition getHiveTablePartition(String partitionName, Properties properties) { Partition hiveTablePartition = null; try { properties.setProperty(HivePurgerConfigurationKeys.HIVE_DATASET_WHITELIST, getCompleteTableName(partitionName)); IterableDatasetFinder<HiveDataset> datasetFinder = new HiveDatasetFinder( FileSystem.newInstance(HadoopUtils.newConfiguration()), properties); Iterator<HiveDataset> hiveDatasetIterator = datasetFinder.getDatasetsIterator(); Preconditions.checkArgument(hiveDatasetIterator.hasNext(), "Unable to find table to update from"); HiveDataset hiveDataset = hiveDatasetIterator.next(); List<Partition> partitions = hiveDataset.getPartitionsFromDataset(); Preconditions.checkArgument(!partitions.isEmpty(), "No partitions found for " + getCompleteTableName(partitionName)); for (Partition partition : partitions) { if (partition.getCompleteName().equals(partitionName)) { hiveTablePartition = partition; } } } catch (IOException e) { Throwables.propagate(e); } Preconditions.checkNotNull(hiveTablePartition, "Cannot find the required partition " + partitionName); return hiveTablePartition; }
From source file:gobblin.compliance.HivePurgerSource.java
License:Apache License
@VisibleForTesting protected void initialize(SourceState state) throws IOException { setTimeStamp();//www. j ava 2 s .co m this.setLowWatermark(state); this.maxWorkUnits = state.getPropAsInt(HivePurgerConfigurationKeys.MAX_WORKUNITS_KEY, HivePurgerConfigurationKeys.DEFAULT_MAX_WORKUNITS); this.maxWorkUnitExecutionAttempts = state.getPropAsInt( HivePurgerConfigurationKeys.MAX_WORKUNIT_EXECUTION_ATTEMPTS_KEY, HivePurgerConfigurationKeys.DEFAULT_MAX_WORKUNIT_EXECUTION_ATTEMPTS); // TODO: Event submitter and metrics will be added later this.datasetFinder = new HiveDatasetFinder(FileSystem.newInstance(HadoopUtils.newConfiguration()), state.getProperties()); populateDatasets(); }
From source file:gobblin.compliance.retention.ComplianceRetentionJob.java
License:Apache License
public void initDatasetFinder(Properties properties) throws IOException { Preconditions.checkArgument(properties.containsKey(GOBBLIN_COMPLIANCE_DATASET_FINDER_CLASS), "Missing required propety " + GOBBLIN_COMPLIANCE_DATASET_FINDER_CLASS); String finderClass = properties.getProperty(GOBBLIN_COMPLIANCE_DATASET_FINDER_CLASS); this.finder = GobblinConstructorUtils.invokeConstructor(DatasetsFinder.class, finderClass, new State(properties)); Iterator<HiveDataset> datasetsIterator = new HiveDatasetFinder(FileSystem.newInstance(new Configuration()), properties).getDatasetsIterator(); while (datasetsIterator.hasNext()) { // Drop partitions from empty tables if property is set, otherwise skip the table HiveDataset hiveDataset = datasetsIterator.next(); List<Partition> partitionsFromDataset = hiveDataset.getPartitionsFromDataset(); String completeTableName = hiveDataset.getTable().getCompleteName(); if (!partitionsFromDataset.isEmpty()) { this.tableNamesList.add(completeTableName); continue; }/*from w ww .j ava2 s. c o m*/ if (!Boolean.parseBoolean(properties.getProperty(ComplianceConfigurationKeys.SHOULD_DROP_EMPTY_TABLES, ComplianceConfigurationKeys.DEFAULT_SHOULD_DROP_EMPTY_TABLES))) { continue; } if (completeTableName.contains(ComplianceConfigurationKeys.TRASH) || completeTableName.contains(ComplianceConfigurationKeys.BACKUP) || completeTableName.contains(ComplianceConfigurationKeys.STAGING)) { this.tablesToDrop.add(hiveDataset); } } }
From source file:io.druid.storage.hdfs.HdfsDataSegmentPusherTest.java
License:Apache License
private void testUsingScheme(final String scheme) throws Exception { Configuration conf = new Configuration(true); // Create a mock segment on disk File segmentDir = tempFolder.newFolder(); File tmp = new File(segmentDir, "version.bin"); final byte[] data = new byte[] { 0x0, 0x0, 0x0, 0x1 }; Files.write(data, tmp);//w ww .j a va 2 s .c o m final long size = data.length; HdfsDataSegmentPusherConfig config = new HdfsDataSegmentPusherConfig(); final File storageDirectory = tempFolder.newFolder(); config.setStorageDirectory( scheme != null ? StringUtils.format("%s://%s", scheme, storageDirectory.getAbsolutePath()) : storageDirectory.getAbsolutePath()); HdfsDataSegmentPusher pusher = new HdfsDataSegmentPusher(config, conf, new DefaultObjectMapper()); DataSegment segmentToPush = new DataSegment("foo", new Interval("2015/2016"), "0", Maps.<String, Object>newHashMap(), Lists.<String>newArrayList(), Lists.<String>newArrayList(), NoneShardSpec.instance(), 0, size); DataSegment segment = pusher.push(segmentDir, segmentToPush); String indexUri = StringUtils.format("%s/%s/%d_index.zip", FileSystem.newInstance(conf).makeQualified(new Path(config.getStorageDirectory())).toUri() .toString(), pusher.getStorageDir(segmentToPush), segmentToPush.getShardSpec().getPartitionNum()); Assert.assertEquals(segmentToPush.getSize(), segment.getSize()); Assert.assertEquals(segmentToPush, segment); Assert.assertEquals(ImmutableMap.of("type", "hdfs", "path", indexUri), segment.getLoadSpec()); // rename directory after push final String segmentPath = pusher.getStorageDir(segment); File indexFile = new File(StringUtils.format("%s/%s/%d_index.zip", storageDirectory, segmentPath, segment.getShardSpec().getPartitionNum())); Assert.assertTrue(indexFile.exists()); File descriptorFile = new File(StringUtils.format("%s/%s/%d_descriptor.json", storageDirectory, segmentPath, segment.getShardSpec().getPartitionNum())); Assert.assertTrue(descriptorFile.exists()); // push twice will fail and temp dir cleaned File outDir = new File(StringUtils.format("%s/%s", config.getStorageDirectory(), segmentPath)); outDir.setReadOnly(); try { pusher.push(segmentDir, segmentToPush); } catch (IOException e) { Assert.fail("should not throw exception"); } }
From source file:io.druid.storage.hdfs.HdfsDataSegmentPusherTest.java
License:Apache License
private void testUsingSchemeForMultipleSegments(final String scheme, final int numberOfSegments) throws Exception { Configuration conf = new Configuration(true); DataSegment[] segments = new DataSegment[numberOfSegments]; // Create a mock segment on disk File segmentDir = tempFolder.newFolder(); File tmp = new File(segmentDir, "version.bin"); final byte[] data = new byte[] { 0x0, 0x0, 0x0, 0x1 }; Files.write(data, tmp);/*from w ww .j ava 2 s . c o m*/ final long size = data.length; HdfsDataSegmentPusherConfig config = new HdfsDataSegmentPusherConfig(); final File storageDirectory = tempFolder.newFolder(); config.setStorageDirectory( scheme != null ? StringUtils.format("%s://%s", scheme, storageDirectory.getAbsolutePath()) : storageDirectory.getAbsolutePath()); HdfsDataSegmentPusher pusher = new HdfsDataSegmentPusher(config, conf, new DefaultObjectMapper()); for (int i = 0; i < numberOfSegments; i++) { segments[i] = new DataSegment("foo", new Interval("2015/2016"), "0", Maps.<String, Object>newHashMap(), Lists.<String>newArrayList(), Lists.<String>newArrayList(), new NumberedShardSpec(i, i), 0, size); } for (int i = 0; i < numberOfSegments; i++) { final DataSegment pushedSegment = pusher.push(segmentDir, segments[i]); String indexUri = StringUtils.format("%s/%s/%d_index.zip", FileSystem.newInstance(conf).makeQualified(new Path(config.getStorageDirectory())).toUri() .toString(), pusher.getStorageDir(segments[i]), segments[i].getShardSpec().getPartitionNum()); Assert.assertEquals(segments[i].getSize(), pushedSegment.getSize()); Assert.assertEquals(segments[i], pushedSegment); Assert.assertEquals(ImmutableMap.of("type", "hdfs", "path", indexUri), pushedSegment.getLoadSpec()); // rename directory after push String segmentPath = pusher.getStorageDir(pushedSegment); File indexFile = new File(StringUtils.format("%s/%s/%d_index.zip", storageDirectory, segmentPath, pushedSegment.getShardSpec().getPartitionNum())); Assert.assertTrue(indexFile.exists()); File descriptorFile = new File(StringUtils.format("%s/%s/%d_descriptor.json", storageDirectory, segmentPath, pushedSegment.getShardSpec().getPartitionNum())); Assert.assertTrue(descriptorFile.exists()); //read actual data from descriptor file. DataSegment fromDescriptorFileDataSegment = objectMapper.readValue(descriptorFile, DataSegment.class); Assert.assertEquals(segments[i].getSize(), pushedSegment.getSize()); Assert.assertEquals(segments[i], pushedSegment); Assert.assertEquals(ImmutableMap.of("type", "hdfs", "path", indexUri), fromDescriptorFileDataSegment.getLoadSpec()); // rename directory after push segmentPath = pusher.getStorageDir(fromDescriptorFileDataSegment); indexFile = new File(StringUtils.format("%s/%s/%d_index.zip", storageDirectory, segmentPath, fromDescriptorFileDataSegment.getShardSpec().getPartitionNum())); Assert.assertTrue(indexFile.exists()); // push twice will fail and temp dir cleaned File outDir = new File(StringUtils.format("%s/%s", config.getStorageDirectory(), segmentPath)); outDir.setReadOnly(); try { pusher.push(segmentDir, segments[i]); } catch (IOException e) { Assert.fail("should not throw exception"); } } }
From source file:io.hops.experiments.utils.DFSOperationsUtils.java
License:Apache License
public static FileSystem getDFSClient(Configuration conf) throws IOException { if (SERVER_LESS_MODE) { serverLessModeRandomWait();/*from w w w. j a v a 2 s . com*/ return null; } FileSystem client = dfsClients.get(); if (client == null) { client = (FileSystem) FileSystem.newInstance(conf); dfsClients.set(client); System.out.println(Thread.currentThread().getName() + " Creating new client. Total: " + dfsClientsCount.incrementAndGet() + " New Client is: " + client); } else { System.out.println("Reusing Existing Client " + client); } return client; }
From source file:io.warp10.continuum.store.HFileStats.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); CacheConfig cacheConf = new CacheConfig(conf); FileSystem fs = FileSystem.newInstance(conf); FileStatus[] pathes = fs.globStatus(new Path(args[0])); long bytes = 0L; long cells = 0L; for (FileStatus status : pathes) { try {// w w w. j a v a 2 s . com HFile.Reader reader = HFile.createReader(fs, status.getPath(), cacheConf, conf); bytes += reader.length(); cells += reader.getEntries(); System.out.println( status.getPath() + " >>> " + reader.length() + " bytes " + reader.getEntries() + " cells"); reader.close(); } catch (Exception e) { continue; } } System.out.println( "TOTAL: " + cells + " cells " + bytes + " bytes " + (bytes / (double) cells) + " bytes/cell"); long ts = System.currentTimeMillis(); System.out.println(ts * 1000 + "// hbase.bytes{} " + bytes); System.out.println(ts * 1000 + "// hbase.datapoints{} " + cells); }