List of usage examples for org.apache.hadoop.fs Path toString
@Override
public String toString()
From source file:com.asakusafw.windgate.hadoopfs.ssh.WindGateHadoopGetTest.java
License:Apache License
/** * Run as simulation mode./*from w ww . java 2s . c o m*/ * @throws Exception if failed */ @Test public void simulated() throws Exception { RuntimeContext.set(RuntimeContext.DEFAULT.mode(ExecutionMode.SIMULATION)); Path testing = new Path(PREFIX, "testing"); put(testing, "Hello, world!"); ByteArrayOutputStream buffer = new ByteArrayOutputStream(); int result = new WindGateHadoopGet(conf).execute(buffer, testing.toString()); assertThat(result, is(0)); Map<String, String> contents = get(buffer.toByteArray()); assertThat(contents.toString(), contents.size(), is(0)); }
From source file:com.asakusafw.windgate.hadoopfs.ssh.WindGateHadoopGetTest.java
License:Apache License
/** * Attemts to get missing files in simulation mode. * @throws Exception if failed/* w w w .ja v a2 s. c om*/ */ @Test public void missing_sim() throws Exception { RuntimeContext.set(RuntimeContext.DEFAULT.mode(ExecutionMode.SIMULATION)); Path testing = new Path(PREFIX, "testing"); ByteArrayOutputStream buffer = new ByteArrayOutputStream(); int result = new WindGateHadoopGet(conf).execute(buffer, testing.toString()); assertThat(result, is(0)); }
From source file:com.asakusafw.windgate.hadoopfs.ssh.WindGateHadoopPutTest.java
License:Apache License
/** * Not empty arguments.//from w w w .java 2 s . c o m * @throws Exception if failed */ @Test public void arguments() throws Exception { Path testing = new Path(PREFIX, "testing"); ByteArrayOutputStream buffer = new ByteArrayOutputStream(); try (FileList.Writer writer = FileList.createWriter(buffer)) { put(writer, testing, "Hello, world!"); } ByteArrayInputStream in = new ByteArrayInputStream(buffer.toByteArray()); int result = new WindGateHadoopPut(conf).execute(in, testing.toString()); assertThat(result, is(not(0))); }
From source file:com.benchmark.mapred.PiEstimator.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi/*www .j ava2 s .c o m*/ */ public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException { //setup job conf jobConf.setJobName(PiEstimator.class.getSimpleName()); jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputKeyClass(BooleanWritable.class); jobConf.setOutputValueClass(LongWritable.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); jobConf.setMapperClass(PiMapper.class); jobConf.setNumMapTasks(numMaps); jobConf.setReducerClass(PiReducer.class); jobConf.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. jobConf.setSpeculativeExecution(false); //setup input/output directories //final Path inDir = new Path(TMP_DIR, "in"); final Path inDir = new Path("/home/hadoop1/tmp_dir", "in"); System.out.println("inDir =" + inDir.toString()); //final Path outDir = new Path(TMP_DIR, "out"); final Path outDir = new Path("/home/hadoop1/tmp_dir", "out"); System.out.println("outDir =" + outDir.toString()); FileInputFormat.setInputPaths(jobConf, inDir); FileOutputFormat.setOutputPath(jobConf, outDir); final FileSystem fs = FileSystem.get(jobConf); if (fs.exists(TMP_DIR)) { throw new IOException( "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { //generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i); } //start a map/reduce job System.out.println("Starting Job"); final long startTime = System.currentTimeMillis(); JobClient.runJob(jobConf); final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Job Finished in " + duration + " seconds"); //read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf); try { reader.next(numInside, numOutside); } finally { reader.close(); } //compute estimated value return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())) .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints)); } finally { fs.delete(TMP_DIR, true); } }
From source file:com.benchmark.mapred.Sort.java
License:Apache License
/** * The main driver for sort program.//from w ww .ja v a2 s. c om * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker. */ public int run(String[] args) throws Exception { JobConf jobConf = new JobConf(getConf(), Sort.class); jobConf.setJobName("sorter"); jobConf.setMapperClass(IdentityMapper.class); jobConf.setReducerClass(IdentityReducer.class); JobClient client = new JobClient(jobConf); ClusterStatus cluster = client.getClusterStatus(); int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); String sort_reduces = jobConf.get("test.sort.reduces_per_host"); if (sort_reduces != null) { num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces); } Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class; Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; Class<? extends WritableComparable> outputKeyClass = BytesWritable.class; Class<? extends Writable> outputValueClass = BytesWritable.class; List<String> otherArgs = new ArrayList<String>(); InputSampler.Sampler<K, V> sampler = null; for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { jobConf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { num_reduces = Integer.parseInt(args[++i]); } else if ("-inFormat".equals(args[i])) { inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class); } else if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else if ("-outKey".equals(args[i])) { outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class); } else if ("-outValue".equals(args[i])) { outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class); } else if ("-totalOrder".equals(args[i])) { double pcnt = Double.parseDouble(args[++i]); int numSamples = Integer.parseInt(args[++i]); int maxSplits = Integer.parseInt(args[++i]); if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE; sampler = new InputSampler.RandomSampler<K, V>(pcnt, numSamples, maxSplits); } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } // Set user-supplied (possibly default) job configs jobConf.setNumReduceTasks(num_reduces); jobConf.setInputFormat(inputFormatClass); jobConf.setOutputFormat(outputFormatClass); jobConf.setOutputKeyClass(outputKeyClass); jobConf.setOutputValueClass(outputValueClass); // Make sure there are exactly 2 parameters left. if (otherArgs.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2."); return printUsage(); } FileInputFormat.setInputPaths(jobConf, otherArgs.get(0)); FileOutputFormat.setOutputPath(jobConf, new Path(otherArgs.get(1))); if (sampler != null) { System.out.println("Sampling input to effect total-order sort..."); jobConf.setPartitionerClass(TotalOrderPartitioner.class); Path inputDir = FileInputFormat.getInputPaths(jobConf)[0]; inputDir = inputDir.makeQualified(inputDir.getFileSystem(jobConf)); Path partitionFile = new Path(inputDir, "_sortPartitioning"); TotalOrderPartitioner.setPartitionFile(jobConf, partitionFile); InputSampler.<K, V>writePartitionFile(jobConf, sampler); URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning"); DistributedCache.addCacheFile(partitionUri, jobConf); DistributedCache.createSymlink(jobConf); } System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from " + FileInputFormat.getInputPaths(jobConf)[0] + " into " + FileOutputFormat.getOutputPath(jobConf) + " with " + num_reduces + " reduces."); Date startTime = new Date(); System.out.println("Job started: " + startTime); jobResult = JobClient.runJob(jobConf); Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return 0; }
From source file:com.benchmark.mapred.terasort.TeraSort.java
License:Apache License
public int run(String[] args) throws Exception { LOG.info("starting"); JobConf job = (JobConf) getConf();/*from w ww .ja va 2s . c o m*/ Path inputDir = new Path(args[0]); if (args.length != 3) { System.out.println("ERROR: Wrong number of parameters: " + args.length + " instead of 3."); } inputDir = inputDir.makeQualified(inputDir.getFileSystem(job)); Path partitionFile = new Path(inputDir, TeraInputFormat.PARTITION_FILENAME); URI partitionUri = new URI(partitionFile.toString() + "#" + TeraInputFormat.PARTITION_FILENAME); TeraInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraSort"); job.setJarByClass(TeraSort.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormat(TeraInputFormat.class); job.setOutputFormat(TeraOutputFormat.class); job.setPartitionerClass(TotalOrderPartitioner.class); job.setNumReduceTasks(Integer.parseInt(args[2])); TeraInputFormat.writePartitionFile(job, partitionFile); DistributedCache.addCacheFile(partitionUri, job); DistributedCache.createSymlink(job); job.setInt("dfs.replication", 1); TeraOutputFormat.setFinalSync(job, true); Date startIteration = new Date(); JobClient.runJob(job); Date endIteration = new Date(); System.out.println( "The iteration took " + (endIteration.getTime() - startIteration.getTime()) / 1000 + " seconds."); LOG.info("done"); return 0; }
From source file:com.bianfeng.bfas.hive.io.RealtimeInputFormat2.java
License:Apache License
/** * Set the array of {@link Path}s as the list of inputs * for the map-reduce job./*from w ww .j a v a2 s. c o m*/ * * @param conf Configuration of the job. * @param inputPaths the {@link Path}s of the input directories/files * for the map-reduce job. */ public static void setInputPaths(JobConf conf, Path... inputPaths) { Path path = new Path(conf.getWorkingDirectory(), inputPaths[0]); StringBuffer str = new StringBuffer(StringUtils.escapeString(path.toString())); for (int i = 1; i < inputPaths.length; i++) { str.append(StringUtils.COMMA_STR); path = new Path(conf.getWorkingDirectory(), inputPaths[i]); str.append(StringUtils.escapeString(path.toString())); } conf.set("mapred.input.dir", str.toString()); }
From source file:com.bianfeng.bfas.hive.io.RealtimeInputFormat2.java
License:Apache License
/** * Add a {@link Path} to the list of inputs for the map-reduce job. * /*from ww w . j a v a 2 s .c om*/ * @param conf The configuration of the job * @param path {@link Path} to be added to the list of inputs for * the map-reduce job. */ public static void addInputPath(JobConf conf, Path path) { path = new Path(conf.getWorkingDirectory(), path); String dirStr = StringUtils.escapeString(path.toString()); String dirs = conf.get("mapred.input.dir"); conf.set("mapred.input.dir", dirs == null ? dirStr : dirs + StringUtils.COMMA_STR + dirStr); }
From source file:com.bigjob.Client.java
License:Apache License
private void addToLocalResources(FileSystem fs, String fileSrcPath, String fileDstPath, int appId, Map<String, LocalResource> localResources, String resources) throws IOException { String suffix = appName + "/" + appId + "/" + fileDstPath; Path dst = new Path(fs.getHomeDirectory(), suffix); LOG.debug("HDFS Destination for Script: " + dst.toString()); if (fileSrcPath == null) { FSDataOutputStream ostream = null; try {//ww w . jav a 2 s . c om ostream = FileSystem.create(fs, dst, new FsPermission((short) 0710)); ostream.writeUTF(resources); } finally { IOUtils.closeQuietly(ostream); } } else { fs.copyFromLocalFile(new Path(fileSrcPath), dst); } FileStatus scFileStatus = fs.getFileStatus(dst); LocalResource scRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromURI(dst.toUri()), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, scFileStatus.getLen(), scFileStatus.getModificationTime()); localResources.put(fileDstPath, scRsrc); }
From source file:com.bizosys.unstructured.StopwordAndSynonymAnalyzer.java
License:Apache License
public void load() throws IOException { InputStream stopwordStream = null; InputStream synonumStream = null; Configuration hsearchConf = HSearchConfig.getInstance().getConfiguration(); String filenameSynonum = hsearchConf.get("synonyms.file.location", "synonyms.txt"); String filenameStopword = hsearchConf.get("stopword.file.location", "stopwords.txt"); isLowerCaseEnabled = hsearchConf.getBoolean("lucene.analysis.lowercasefilter", true); isAccentFilterEnabled = hsearchConf.getBoolean("lucene.analysis.accentfilter", true); isSnoballStemEnabled = hsearchConf.getBoolean("lucene.analysis.snowballfilter", true); isStopFilterEnabled = hsearchConf.getBoolean("lucene.analysis.stopfilter", true); if (null != stopwords) return;// w w w . j a va 2 s . c o m org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration(); FileSystem fs = FileSystem.get(conf); if (null != fs) { /** * STOPWORD */ Path stopPath = new Path(filenameStopword); if (fs.exists(stopPath)) { if (DEBUG_ENABLED) IdSearchLog.l.debug("Loading Stopword file from HDFS :" + stopPath.toString()); stopwordStream = fs.open(stopPath); } else { IdSearchLog.l.fatal("Stopword file not available in HDFS :" + stopPath.toString()); } /** * SYNONUM */ Path synPath = new Path(filenameSynonum); if (fs.exists(synPath)) { synonumStream = fs.open(synPath); if (DEBUG_ENABLED) IdSearchLog.l.debug("Loading synonym file from HDFS :" + filenameSynonum.toString()); } else { IdSearchLog.l.fatal("Synonym file not available in HDFS :" + filenameSynonum.toString()); IdSearchLog.l.fatal("Working Directory :" + fs.getWorkingDirectory().getName()); } } ClassLoader classLoader = null; if (null == stopwordStream || null == synonumStream) { classLoader = Thread.currentThread().getContextClassLoader(); } if (null == stopwordStream) { URL stopUrl = classLoader.getResource(filenameStopword); if (null != stopUrl) { String stopFile = stopUrl.getPath(); if (null != stopFile) { File stopwordFile = new File(stopFile); if (stopwordFile.exists() && stopwordFile.canRead()) { stopwordStream = new FileInputStream(stopwordFile); if (DEBUG_ENABLED) IdSearchLog.l .debug("Loading Stopword file from Local :" + stopwordFile.getAbsolutePath()); } else { IdSearchLog.l.fatal("Stopword file not available at :" + stopwordFile.getAbsolutePath()); IdSearchLog.l.fatal("Working Directory :" + fs.getHomeDirectory().getName()); } } else { if (DEBUG_ENABLED) IdSearchLog.l.debug("Ignoring Stopwords > " + filenameStopword); } } } if (null == synonumStream) { URL synUrl = classLoader.getResource(filenameSynonum); if (null != synUrl) { String synFileName = synUrl.getPath(); if (null != synFileName) { File synFile = new File(synFileName); if (synFile.exists() && synFile.canRead()) { synonumStream = new FileInputStream(synFile); if (DEBUG_ENABLED) IdSearchLog.l.debug("Loading Synonum file from Local :" + synFile.getAbsolutePath()); } else { if (DEBUG_ENABLED) IdSearchLog.l.debug("Synonum file not available at :" + synFile.getAbsolutePath()); } } else { if (DEBUG_ENABLED) IdSearchLog.l.debug("Ignoring Synonyms > " + filenameSynonum); } } } load(stopwordStream, synonumStream); }