List of usage examples for org.apache.hadoop.fs Path SEPARATOR
String SEPARATOR
To view the source code for org.apache.hadoop.fs Path SEPARATOR.
Click Source Link
From source file:HBaseBloomFilterSemiJoinSystemTest.java
License:Apache License
@Test public void testBloomFilterSemiJoinDirectly() throws IOException, InterruptedException, NoSuchFieldException, IllegalAccessException { NavigableMap<ByteBuffer, ListMultimap<ByteBuffer, BloomFilter>> regionIndex = NonAggregatingRegionObserver .buildIndex("test_table", util.getConfiguration(), util.getTestFileSystem(), new Path(util.getDefaultRootDirPath() + Path.SEPARATOR + "test_table")); assertSame("Unexpected number of regions.", 3, regionIndex.size()); NavigableMap<HRegionInfo, ServerName> regions = table.getRegionLocations(); for (Map.Entry<HRegionInfo, ServerName> entry : regions.entrySet()) { LOG.info("Using Region: " + entry.getKey() + " Server: " + entry.getValue()); }//from w ww . j av a 2 s . c o m for (Map.Entry<ByteBuffer, ListMultimap<ByteBuffer, BloomFilter>> entry : regionIndex.entrySet()) { assertSame("Unexpected number of HFiles.", 1, entry.getValue().get(ByteBuffer.wrap(ROW_COLBF_CF)).size()); } CompoundBloomFilterBase bfEntryCreator = new CompoundBloomFilterBase(); double falsePositivesCounter = 0.0; for (int i = 0; i < NUM_ROWS; i++) { byte[] key1 = toBytes("aaa" + i); byte[] key2 = toBytes("bbb" + i); byte[] key3 = toBytes("ccc" + i); assertNotNull("Could not find a region for key: " + new String(key2)); // creates bbbXaaaX bf entry keys that must match region0 [bbb0, bbb999] ROW_COL BF [bbb0aaa0, bbb999aaa999] byte[] bfMatchKey = bfEntryCreator.createBloomKey(key2, 0, key2.length, key1, 0, key1.length); BloomFilter bfMatch = bloomFilterForRowCol(regionIndex, key2); // bloom filters don't return false positives assertTrue("Unexpected result from the bloom filter: " + new String(bfMatchKey), bfMatch.contains(bfMatchKey, 0, bfMatchKey.length, null)); // creates bbbXcccX bf entry keys that don't exist in region2 [bbb0, bbb999] ROW_COL BF [bbb0aaa0, bbb999aaa999] // but will match the index and therefore may provide false positives byte[] bfNoMatchKeyFalsePositives = bfEntryCreator.createBloomKey(key2, 0, key2.length, key3, 0, key3.length); BloomFilter bfNoMatchFalsePositives = bloomFilterForRowCol(regionIndex, key2); if (bfNoMatchFalsePositives.contains(bfNoMatchKeyFalsePositives, 0, bfNoMatchKeyFalsePositives.length, null)) { falsePositivesCounter++; } if (i <= NUM_ROWS / 2 && Integer.parseInt((i + "").charAt(0) + "") < 5) { // creates cccXaaaX bf entry keys that don't exist in region3 [ccc0, ccc999] ROW_COL BF [ccc0bbb0, ccc999bbb999] // but won't match the index (and therefore won't provide false positives) byte[] bfNoMatchKeyNoFalsePositives = bfEntryCreator.createBloomKey(key3, 0, key3.length, key1, 0, key1.length); BloomFilter bfNoMatchNoFalsePositives = bloomFilterForRowCol(regionIndex, key3); assertFalse("Unexpected result from the bloom filter: " + new String(bfNoMatchKeyNoFalsePositives), bfNoMatchNoFalsePositives.contains(bfNoMatchKeyNoFalsePositives, 0, bfNoMatchKeyNoFalsePositives.length, null)); } } double falsePositiveRate = falsePositivesCounter / NUM_ROWS; LOG.info("False positive Rate: {} ", falsePositiveRate); assertTrue("Unexpectedly high percentage of false positives: " + falsePositiveRate, falsePositiveRate < 0.1); }
From source file:be.uantwerpen.adrem.hadoop.util.Tools.java
License:Apache License
public static String createPath(String... parts) { StringBuilder path = new StringBuilder(); for (String part : parts) { path.append(part);//www . j av a 2 s . c o m path.append(Path.SEPARATOR); } return path.substring(0, path.length() - 1); }
From source file:be.uantwerpen.adrem.hadoop.util.Tools.java
License:Apache License
public static void cleanupSubdirsExcept(String dir, Collection<String> toKeep) { Path path = new Path(dir); try {//from ww w. ja va 2 s . co m for (FileStatus fs : path.getFileSystem(new Configuration()).listStatus(path)) { String[] sp = fs.getPath().toString().split(Path.SEPARATOR); String filename = sp[sp.length - 1]; if (toKeep.contains(filename)) { cleanDirs(fs.getPath().toString() + Path.SEPARATOR + "_SUCCESS"); continue; } cleanDirs(fs.getPath().toString()); } } catch (IOException e) { e.printStackTrace(); } }
From source file:cascading.flow.tez.Hadoop2TezFlow.java
License:Open Source License
private String createStagingRoot() { return ".staging" + Path.SEPARATOR + getID(); }
From source file:cascading.flow.tez.planner.Hadoop2TezFlowStepJob.java
License:Open Source License
String createStepStagingPath() { String result = ""; if (HadoopUtil.isLocal(jobConfiguration)) result = jobConfiguration.get("hadoop.tmp.dir") + Path.SEPARATOR; String flowStagingPath = ((Hadoop2TezFlow) flowStep.getFlow()).getFlowStagingPath(); return result + flowStagingPath + Path.SEPARATOR + flowStep.getID(); }
From source file:cascading.tap.hadoop.Hadoop18TapUtil.java
License:Open Source License
private static Path getTaskOutputPath(JobConf conf) { String taskId = conf.get("mapred.task.id"); Path p = new Path(FileOutputFormat.getOutputPath(conf), TEMPORARY_PATH + Path.SEPARATOR + "_" + taskId); try {//from www . ja v a 2s . c o m FileSystem fs = p.getFileSystem(conf); return p.makeQualified(fs); } catch (IOException ie) { return p; } }
From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java
License:Open Source License
private static Path getTaskOutputPath(Configuration conf) { String taskId = conf.get("mapred.task.id", conf.get("mapreduce.task.id")); Path p = new Path(FileOutputFormat.getOutputPath(asJobConfInstance(conf)), TEMPORARY_PATH + Path.SEPARATOR + "_" + taskId); try {//from w w w . j av a 2s .co m FileSystem fs = p.getFileSystem(conf); return p.makeQualified(fs); } catch (IOException ie) { return p; } }
From source file:cc.slda.AnnotateDocuments.java
License:Apache License
/** * Runs this tool./*from ww w .j ava2 s . com*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); options.addOption(OptionBuilder.withArgName(PCUTOFF).hasArg() .withDescription("probability of topic assignment").create(PCUTOFF)); options.addOption(OptionBuilder.withArgName(INDEX).hasArg() .withDescription("path to data directory containing term and title indices").create(INDEX)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(INDEX)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String indexPath = cmdline.getOptionValue(INDEX); String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; float cutoff = 0.9f; if (cmdline.hasOption(PCUTOFF)) { cutoff = Float.parseFloat(cmdline.getOptionValue(PCUTOFF)); } LOG.info("Tool: " + AnnotateDocuments.class.getSimpleName()); LOG.info(" - indices path: " + indexPath); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - number of reducers: " + reduceTasks); LOG.info(" - log(probCutoff): " + Math.log(cutoff)); Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Job job = Job.getInstance(conf); job.setJobName(AnnotateDocuments.class.getSimpleName()); job.setJarByClass(AnnotateDocuments.class); String termIndex = indexPath + Path.SEPARATOR + TERM; String titleIndex = indexPath + Path.SEPARATOR + TITLE; Path termIndexPath = new Path(termIndex); Path titleIndexPath = new Path(titleIndex); Preconditions.checkArgument(fs.exists(termIndexPath), "Missing term index files... " + termIndexPath); DistributedCache.addCacheFile(termIndexPath.toUri(), job.getConfiguration()); Preconditions.checkArgument(fs.exists(titleIndexPath), "Missing title index files... " + titleIndexPath); DistributedCache.addCacheFile(titleIndexPath.toUri(), job.getConfiguration()); job.setNumReduceTasks(reduceTasks); conf.setFloat(PCUTOFF, cutoff); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(HMapSIW.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(HMapSIW.class); job.setMapperClass(MyMapper.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath); FileSystem.get(conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:ch.sentric.hbase.coprocessor.LoadWithTableDescriptorExample.java
License:Apache License
public static void main(String[] args) throws IOException { Configuration conf = HBaseConfiguration.create(); FileSystem fs = FileSystem.get(conf); Path path = new Path(fs.getUri() + Path.SEPARATOR + "coprocessor-1.0-SNAPSHOT.jar"); HTableDescriptor htd = new HTableDescriptor("testtable"); htd.addFamily(new HColumnDescriptor("colfam1")); htd.setValue("COPROCESSOR$1", path.toString() + "|" + ProspectiveSearchRegionObserver.class.getCanonicalName() + "|" + Coprocessor.PRIORITY_USER); HBaseAdmin admin = new HBaseAdmin(conf); admin.createTable(htd);//w ww . j av a 2 s .c o m System.out.println(admin.getTableDescriptor(Bytes.toBytes("testtable"))); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputCommitter.java
License:Apache License
@Override public void commitJob(JobContext context) throws IOException { Configuration configuration = context.getConfiguration(); MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(configuration); BasicMapReduceTaskContext taskContext = classLoader.getTaskContextProvider().get(this.taskContext); String outputDatasetName = configuration.get(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET); PartitionedFileSet outputDataset = taskContext.getDataset(outputDatasetName); Partitioning partitioning = outputDataset.getPartitioning(); Set<PartitionKey> partitionsToAdd = new HashSet<>(); Set<String> relativePaths = new HashSet<>(); // Go over all files in the temporary directory and keep track of partitions to add for them FileStatus[] allCommittedTaskPaths = getAllCommittedTaskPaths(context); for (FileStatus committedTaskPath : allCommittedTaskPaths) { FileSystem fs = committedTaskPath.getPath().getFileSystem(configuration); RemoteIterator<LocatedFileStatus> fileIter = fs.listFiles(committedTaskPath.getPath(), true); while (fileIter.hasNext()) { Path path = fileIter.next().getPath(); String relativePath = getRelative(committedTaskPath.getPath(), path); int lastPathSepIdx = relativePath.lastIndexOf(Path.SEPARATOR); if (lastPathSepIdx == -1) { // this shouldn't happen because each relative path should consist of at least one partition key and // the output file name LOG.warn("Skipping path '{}'. It's relative path '{}' has fewer than two parts", path, relativePath);// w ww .j a v a 2 s .c om continue; } // relativePath = "../key1/key2/part-m-00000" // relativeDir = "../key1/key2" // fileName = "part-m-00000" String relativeDir = relativePath.substring(0, lastPathSepIdx); String fileName = relativePath.substring(lastPathSepIdx + 1); Path finalDir = new Path(FileOutputFormat.getOutputPath(context), relativeDir); Path finalPath = new Path(finalDir, fileName); if (fs.exists(finalPath)) { throw new FileAlreadyExistsException("Final output path " + finalPath + " already exists"); } PartitionKey partitionKey = getPartitionKey(partitioning, relativeDir); partitionsToAdd.add(partitionKey); relativePaths.add(relativeDir); } } // We need to copy to the parent of the FileOutputFormat's outputDir, since we added a _temporary_jobId suffix to // the original outputDir. Path finalOutput = FileOutputFormat.getOutputPath(context); FileSystem fs = finalOutput.getFileSystem(configuration); for (FileStatus stat : getAllCommittedTaskPaths(context)) { mergePaths(fs, stat, finalOutput); } // compute the metadata to be written to every output partition Map<String, String> metadata = ConfigurationUtil.getNamedConfigurations(this.taskContext.getConfiguration(), PartitionedFileSetArguments.OUTPUT_PARTITION_METADATA_PREFIX); // create all the necessary partitions for (PartitionKey partitionKey : partitionsToAdd) { PartitionOutput partitionOutput = outputDataset.getPartitionOutput(partitionKey); partitionOutput.setMetadata(metadata); partitionOutput.addPartition(); } // close the TaskContext, which flushes dataset operations try { taskContext.flushOperations(); } catch (Exception e) { Throwables.propagateIfPossible(e, IOException.class); throw new IOException(e); } // delete the job-specific _temporary folder and create a _done file in the o/p folder cleanupJob(context); // mark all the final output paths with a _SUCCESS file, if configured to do so (default = true) if (configuration.getBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, true)) { for (String relativePath : relativePaths) { Path pathToMark = new Path(finalOutput, relativePath); Path markerPath = new Path(pathToMark, SUCCEEDED_FILE_NAME); fs.createNewFile(markerPath); } } }