List of usage examples for org.apache.hadoop.fs FileSystem makeQualified
public Path makeQualified(Path path)
From source file:com.twitter.algebra.nmf.ReindexerJob.java
License:Apache License
public Job run(Configuration conf, Path matrixInputPath, Path matrixOutputPath) throws IOException, InterruptedException, ClassNotFoundException { conf = new Configuration(conf); conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", "\t"); @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(ReindexerJob.class); job.setJobName(ReindexerJob.class.getSimpleName() + "-" + matrixOutputPath.getName()); FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(KeyValueTextInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setReducerClass(MyReducer.class); // this makes the reindexing very slow but is necessary to have total order job.setNumReduceTasks(1);/*from w w w . ja va2 s .c o m*/ job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(IntWritable.class); job.submit(); boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed!"); return job; }
From source file:com.twitter.algebra.nmf.RowSquareSumJob.java
License:Apache License
public void run(Configuration conf, Path matrixInputPath, Path matrixOutputPath, int aRows) throws IOException, InterruptedException, ClassNotFoundException { @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(RowSquareSumJob.class); job.setJobName(RowSquareSumJob.class.getSimpleName() + "-" + matrixOutputPath.getName()); FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); int numReducers = 1; job.setNumReduceTasks(numReducers);/*from w w w . j a v a 2 s .co m*/ job.setOutputFormatClass(MatrixOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.setMapperClass(SumMapper.class); job.setCombinerClass(MergeVectorsReducer.class); job.setReducerClass(MergeVectorsReducer.class); // RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, // aRows); job.submit(); boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed!"); }
From source file:com.twitter.algebra.nmf.SampleColsJob.java
License:Apache License
public void run(Configuration conf, Path matrixInputPath, int cols, Path matrixOutputPath, float sampleRate) throws IOException, InterruptedException, ClassNotFoundException { conf = new Configuration(conf); conf.setFloat(SAMPLERATE, sampleRate); conf.setInt(COLS, cols);/*from w ww .j av a 2 s .c o m*/ FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPath, "samplecol"); @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(SampleColsJob.class); job.setJobName(SampleColsJob.class.getSimpleName() + "-" + matrixOutputPath.getName()); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setMapperClass(MyMapper.class); job.setNumReduceTasks(0); job.setOutputFormatClass(MatrixOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.submit(); boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed!"); }
From source file:com.twitter.algebra.nmf.SampleRowsJob.java
License:Apache License
public void run(Configuration conf, Path matrixInputPath, Path matrixOutputPath, float sampleRate) throws IOException, InterruptedException, ClassNotFoundException { conf = new Configuration(conf); conf.setFloat(SAMPLERATE, sampleRate); FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPath, "samplerows"); @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(SampleRowsJob.class); job.setJobName(SampleRowsJob.class.getSimpleName() + "-" + matrixOutputPath.getName()); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setMapperClass(MyMapper.class); job.setNumReduceTasks(0);/* w w w. ja v a 2 s. co m*/ job.setOutputFormatClass(MatrixOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.submit(); boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed!"); }
From source file:com.twitter.algebra.nmf.XtXJob.java
License:Apache License
public void run(Configuration conf, Path matrixInputPath, int numCols, String xmPath, Path matrixOutputPath) throws IOException, InterruptedException, ClassNotFoundException { conf = new Configuration(conf); conf.setInt(MATRIXCOLS, numCols);/* w w w . j av a 2s. c om*/ // conf.set(XMPATH, xmPath); FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); NMFCommon.setNumberOfMapSlots(conf, fs, new Path[] { matrixInputPath }, "xtx"); @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJobName("XtXJob-" + matrixOutputPath.getName()); job.setJarByClass(XtXJob.class); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(VectorWritable.class); int numReducers = NMFCommon.getNumberOfReduceSlots(conf, "xtx"); job.setNumReduceTasks(numReducers); // ensures total order (when used with {@link MatrixOutputFormat}), RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, numCols); job.setOutputFormatClass(MatrixOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.submit(); job.waitForCompletion(true); }
From source file:com.twitter.algebra.TransposeJob.java
License:Apache License
/** * Perform transpose of A, where A refers to the path that contains a matrix * in {@link SequenceFileInputFormat}./* w ww . ja v a 2s. c o m*/ * * @param conf * the initial configuration * @param matrixInputPath * the path to the input files that we process * @param matrixOutputPath * the path of the resulting transpose matrix * @param numInputRows * rows * @param numInputCols * cols * @return the running job * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void run(Configuration conf, Path matrixInputPath, Path matrixOutputPath, int numInputRows, int numInputCols) throws IOException, InterruptedException, ClassNotFoundException { conf.setInt(NUM_ORIG_ROWS_KEY, numInputRows); conf.setInt(RowPartitioner.TOTAL_KEYS, numInputCols); FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); NMFCommon.setNumberOfMapSlots(conf, fs, matrixInputPath, "transpose"); @SuppressWarnings("deprecation") Job job = new Job(conf); job.setJarByClass(TransposeJob.class); job.setJobName(TransposeJob.class.getSimpleName()); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setMapperClass(TransposeMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(VectorWritable.class); int numReducers = NMFCommon.getNumberOfReduceSlots(conf, "transpose"); job.setNumReduceTasks(numReducers); // job.setPartitionerClass(RowPartitioner.IntRowPartitioner.class); RowPartitioner.setPartitioner(job, RowPartitioner.IntRowPartitioner.class, numInputCols); job.setCombinerClass(MergeVectorsCombiner.class); job.setReducerClass(MergeVectorsReducer.class); job.setOutputFormatClass(MatrixOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.submit(); boolean res = job.waitForCompletion(true); if (!res) throw new IOException("Job failed!"); }
From source file:com.twitter.pig.backend.hadoop.executionengine.tez.TezJobControlCompiler.java
License:Apache License
/** * The method that creates the Job corresponding to a MapReduceOper. * The assumption is that/*from www . j av a 2 s. co m*/ * every MapReduceOper will have a load and a store. The JobConf removes * the load operator and serializes the input filespec so that PigInputFormat can * take over the creation of splits. It also removes the store operator * and serializes the output filespec so that PigOutputFormat can take over * record writing. The remaining portion of the map plan and reduce plans are * serialized and stored for the PigMapReduce or PigMapOnly objects to take over * the actual running of the plans. * The Mapper & Reducer classes and the required key value formats are set. * Checks if this is a map only job and uses PigMapOnly class as the mapper * and uses PigMapReduce otherwise. * If it is a Map Reduce job, it is bound to have a package operator. Remove it from * the reduce plan and serializes it so that the PigMapReduce class can use it to package * the indexed tuples received by the reducer. * @param mro - The MapReduceOper for which the JobConf is required * @param config - the Configuration object from which JobConf is built * @param pigContext - The PigContext passed on from execution engine * @param mrrChain * @return Job corresponding to mro * @throws JobCreationException */ @SuppressWarnings({ "unchecked", "deprecation" }) private TezJob getJob(TezOperPlan dag, TezConfiguration conf, PigContext pigContext) throws JobCreationException { org.apache.hadoop.mapreduce.Job nwJob = null; try { nwJob = new org.apache.hadoop.mapreduce.Job(conf); } catch (Exception e) { throw new JobCreationException(e); } //Configuration conf = nwJob.getConfiguration(); try { String buffPercent = conf.get("mapred.job.reduce.markreset.buffer.percent"); if (buffPercent == null || Double.parseDouble(buffPercent) <= 0) { log.info("mapred.job.reduce.markreset.buffer.percent is not set, set to default 0.3"); conf.set("mapred.job.reduce.markreset.buffer.percent", "0.3"); } else { log.info("mapred.job.reduce.markreset.buffer.percent is set to " + conf.get("mapred.job.reduce.markreset.buffer.percent")); } // Convert mapred.output.* to output.compression.*, See PIG-1791 if ("true".equals(conf.get("mapred.output.compress"))) { conf.set("output.compression.enabled", "true"); String codec = conf.get("mapred.output.compression.codec"); if (codec == null) { throw new JobCreationException( "'mapred.output.compress' is set but no value is specified for 'mapred.output.compression.codec'."); } else { conf.set("output.compression.codec", codec); } } // if user specified the job name using -D switch, Pig won't reset the name then. if (System.getProperty("mapred.job.name") == null && pigContext.getProperties().getProperty(PigContext.JOB_NAME) != null) { conf.set("mapreduce.job.name", pigContext.getProperties().getProperty(PigContext.JOB_NAME)); //nwJob.setJobName(pigContext.getProperties().getProperty(PigContext.JOB_NAME)); } if (pigContext.getProperties().getProperty(PigContext.JOB_PRIORITY) != null) { // If the job priority was set, attempt to get the corresponding enum value // and set the hadoop job priority. String jobPriority = pigContext.getProperties().getProperty(PigContext.JOB_PRIORITY).toUpperCase(); try { // Allow arbitrary case; the Hadoop job priorities are all upper case. conf.set("mapred.job.priority", JobPriority.valueOf(jobPriority).toString()); } catch (IllegalArgumentException e) { StringBuffer sb = new StringBuffer("The job priority must be one of ["); JobPriority[] priorities = JobPriority.values(); for (int i = 0; i < priorities.length; ++i) { if (i > 0) sb.append(", "); sb.append(priorities[i]); } sb.append("]. You specified [" + jobPriority + "]"); throw new JobCreationException(sb.toString()); } } nwJob.setInputFormatClass(PigInputFormat.class); nwJob.setOutputFormatClass(PigOutputFormat.class); conf.setClass("mapreduce.job.inputformat.class", PigInputFormat.class, InputFormat.class); conf.setClass("mapreduce.job.outputformat.class", PigOutputFormat.class, OutputFormat.class); //nwJob.setInputFormatClass(PigInputFormat.class); //nwJob.setOutputFormatClass(PigOutputFormat.class); // tmp file compression setups if (Utils.tmpFileCompression(pigContext)) { conf.setBoolean("pig.tmpfilecompression", true); conf.set("pig.tmpfilecompression.codec", Utils.tmpFileCompressionCodec(pigContext)); } // It's a hack to set distributed cache file for hadoop 23. Once MiniMRCluster do not require local // jar on fixed location, this can be removed if (pigContext.getExecType() == ExecType.MAPREDUCE) { String newfiles = conf.get("alternative.mapreduce.job.cache.files"); if (newfiles != null) { String files = conf.get("mapreduce.job.cache.files"); conf.set("mapreduce.job.cache.files", files == null ? newfiles.toString() : files + "," + newfiles); } } // Serialize the UDF specific context info. UDFContext.getUDFContext().serialize(conf); FileSystem remoteFs = FileSystem.get(conf); ApplicationId appId = tezClient.createApplication(); Path remoteStagingDir = remoteFs.makeQualified(new Path( conf.get(TezConfiguration.TEZ_AM_STAGING_DIR, TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT), appId.toString())); tezClient.ensureExists(remoteStagingDir); DAG tezDag = createDAG(plan, remoteFs, conf, appId, remoteStagingDir); Map<String, LocalResource> amLocalResources = new HashMap<String, LocalResource>(); amLocalResources.put("pig-tez.jar", tezDag.getVertices().get(0).getTaskLocalResources().get("pig-tez.jar")); amLocalResources.put("dag_job.jar", tezDag.getVertices().get(0).getTaskLocalResources().get("dag_job.jar")); return new TezJob(conf, appId, tezDag, remoteStagingDir, null, null, null, null, amLocalResources); } catch (Exception e) { int errCode = 2017; String msg = "Internal error creating job configuration."; throw new JobCreationException(msg, errCode, PigException.BUG, e); } //jobStoreMap.put(cjob,new Pair<List<POStore>, Path>(storeLocations, tmpLocation)); /* } catch (JobCreationException jce) { throw jce; } catch(Exception e) { int errCode = 2017; String msg = "Internal error creating job configuration."; throw new JobCreationException(msg, errCode, PigException.BUG, e); } */ }
From source file:com.twitter.pig.backend.hadoop.executionengine.tez.TezJobControlCompiler.java
License:Apache License
public DAG createDAG(TezOperPlan tezPlan, FileSystem remoteFs, TezConfiguration conf, ApplicationId appId, Path remoteStagingDir) throws IOException, YarnException { DAG dag = new DAG("MRRSleepJob"); /*//w w w. j a v a2 s .co m String jarPath = ClassUtil.findContainingJar(getClass()); Path remoteJarPath = remoteFs.makeQualified( new Path(remoteStagingDir, "dag_job.jar")); remoteFs.copyFromLocalFile(new Path(jarPath), remoteJarPath); FileStatus jarFileStatus = remoteFs.getFileStatus(remoteJarPath); */ Map<String, LocalResource> commonLocalResources = new HashMap<String, LocalResource>(); if (!pigContext.inIllustrator && pigContext.getExecType() != ExecType.TEZ_LOCAL) { // Setup the DistributedCache for this job for (URL extraJar : pigContext.extraJars) { //log.debug("Adding jar to DistributedCache: " + extraJar.toString()); TezJobControlCompiler.putJarOnClassPathThroughDistributedCache(pigContext, conf, extraJar); } //Create the jar of all functions and classes required File submitJarFile = File.createTempFile("Job", ".jar"); //log.info("creating jar file "+submitJarFile.getName()); // ensure the job jar is deleted on exit submitJarFile.deleteOnExit(); FileOutputStream fos = new FileOutputStream(submitJarFile); try { JarManager.createJar(fos, new HashSet<String>(), pigContext); } catch (ClassNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } Path remoteJarPath = remoteFs.makeQualified(new Path(remoteStagingDir, "dag_job.jar")); remoteFs.copyFromLocalFile(new Path(submitJarFile.getAbsolutePath()), remoteJarPath); FileStatus jarFileStatus = remoteFs.getFileStatus(remoteJarPath); LocalResource dagJarLocalRsrc = LocalResource.newInstance( ConverterUtils.getYarnUrlFromPath(remoteJarPath), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, jarFileStatus.getLen(), jarFileStatus.getModificationTime()); commonLocalResources.put("dag_job.jar", dagJarLocalRsrc); Path remoteTezJarPath = remoteFs.makeQualified(new Path(remoteStagingDir, "pig-tez.jar")); remoteFs.copyFromLocalFile(new Path("pig-tez.jar"), remoteTezJarPath); FileStatus tezJarFileStatus = remoteFs.getFileStatus(remoteTezJarPath); LocalResource tezJarLocalRsrc = LocalResource.newInstance( ConverterUtils.getYarnUrlFromPath(remoteTezJarPath), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, tezJarFileStatus.getLen(), tezJarFileStatus.getModificationTime()); commonLocalResources.put("pig-tez.jar", tezJarLocalRsrc); //log.info("jar file "+submitJarFile.getName()+" created"); //Start setting the JobConf properties conf.set("mapred.jar", submitJarFile.getPath()); } /* LocalResource dagJarLocalRsrc = LocalResource.newInstance( ConverterUtils.getYarnUrlFromPath(remoteJarPath), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, jarFileStatus.getLen(), jarFileStatus.getModificationTime()); commonLocalResources.put("dag_job.jar", dagJarLocalRsrc); */ Hashtable<TezOperator, Pair<Vertex, Configuration>> vertexMap = new Hashtable<TezOperator, Pair<Vertex, Configuration>>(); List<TezOperator> operators = tezPlan.getRoots(); // add settings for pig statistics String setScriptProp = conf.get(ScriptState.INSERT_ENABLED, "true"); ScriptState ss = null; if (setScriptProp.equalsIgnoreCase("true")) { ss = ScriptState.get(); } while (operators != null && operators.size() != 0) { List<TezOperator> successors = new ArrayList<TezOperator>(); for (TezOperator oper : operators) { Configuration operConf = oper.configure(pigContext, conf); /* if (ss != null){ ss.addSettingsToConf(oper, conf); } */ List<TezOperator> predecessors = plan.getPredecessors(oper); if (predecessors != null && predecessors.size() != 0) { MultiStageMRConfToTezTranslator.translateVertexConfToTez(operConf, vertexMap.get(predecessors.get(0)).second); } else { MultiStageMRConfToTezTranslator.translateVertexConfToTez(operConf, null); } List<TezOperator> operSuccessors = tezPlan.getSuccessors(oper); if (operSuccessors != null) { successors.addAll(operSuccessors); } MRHelpers.doJobClientMagic(operConf); //mapStageConf.setInt(MRJobConfig.NUM_MAPS, numMapper); Vertex operVertex = new Vertex(oper.name(), new ProcessorDescriptor(oper.getProcessor(), MRHelpers.createUserPayloadFromConf(operConf)), oper.getParallelism(), MRHelpers.getMapResource(operConf)); oper.configureVertex(operVertex, operConf, commonLocalResources, remoteStagingDir); dag.addVertex(operVertex); if (predecessors != null) { for (TezOperator predecessor : predecessors) { dag.addEdge(new Edge(vertexMap.get(predecessor).first, operVertex, tezPlan.getEdgeProperty(predecessor, oper))); } } vertexMap.put(oper, new Pair<Vertex, Configuration>(operVertex, operConf)); } operators = successors; } return dag; }
From source file:com.yahoo.storm.yarn.Util.java
License:Open Source License
static LocalResource newYarnAppResource(FileSystem fs, Path path, LocalResourceType type, LocalResourceVisibility vis) throws IOException { Path qualified = fs.makeQualified(path); FileStatus status = fs.getFileStatus(qualified); LocalResource resource = Records.newRecord(LocalResource.class); resource.setType(type);//from www .jav a 2s . c o m resource.setVisibility(vis); resource.setResource(ConverterUtils.getYarnUrlFromPath(qualified)); resource.setTimestamp(status.getModificationTime()); resource.setSize(status.getLen()); return resource; }
From source file:com.yss.util.YarnUtil.java
License:Open Source License
public static LocalResource newYarnAppResource(FileSystem fs, Path path, LocalResourceType type, LocalResourceVisibility vis) throws IOException { Path qualified = fs.makeQualified(path); FileStatus status = fs.getFileStatus(qualified); LocalResource resource = Records.newRecord(LocalResource.class); resource.setType(type);//from w w w .j a va 2s.c o m resource.setVisibility(vis); resource.setResource(ConverterUtils.getYarnUrlFromPath(qualified)); resource.setTimestamp(status.getModificationTime()); resource.setSize(status.getLen()); return resource; }