List of usage examples for org.apache.hadoop.fs Path makeQualified
@Deprecated
public Path makeQualified(FileSystem fs)
From source file:org.apache.sqoop.mapreduce.MergeJob.java
License:Apache License
public boolean runMergeJob() throws IOException { Configuration conf = options.getConf(); Job job = createJob(conf);//from w w w . j a va 2s .c o m String userClassName = options.getClassName(); if (null == userClassName) { // Shouldn't get here. throw new IOException("Record class name not specified with " + "--class-name."); } // Set the external jar to use for the job. String existingJar = options.getExistingJarName(); if (existingJar != null) { // User explicitly identified a jar path. LOG.debug("Setting job jar to user-specified jar: " + existingJar); job.getConfiguration().set("mapred.jar", existingJar); } else { // Infer it from the location of the specified class, if it's on the // classpath. try { Class<? extends Object> userClass = conf.getClassByName(userClassName); if (null != userClass) { String userJar = Jars.getJarPathForClass(userClass); LOG.debug("Setting job jar based on user class " + userClassName + ": " + userJar); job.getConfiguration().set("mapred.jar", userJar); } else { LOG.warn("Specified class " + userClassName + " is not in a jar. " + "MapReduce may not find the class"); } } catch (ClassNotFoundException cnfe) { throw new IOException(cnfe); } } try { Path oldPath = new Path(options.getMergeOldPath()); Path newPath = new Path(options.getMergeNewPath()); Configuration jobConf = job.getConfiguration(); FileSystem fs = FileSystem.get(jobConf); oldPath = oldPath.makeQualified(fs); newPath = newPath.makeQualified(fs); propagateOptionsToJob(job); FileInputFormat.addInputPath(job, oldPath); FileInputFormat.addInputPath(job, newPath); jobConf.set(MERGE_OLD_PATH_KEY, oldPath.toString()); jobConf.set(MERGE_NEW_PATH_KEY, newPath.toString()); jobConf.set(MERGE_KEY_COL_KEY, options.getMergeKeyCol()); jobConf.set(MERGE_SQOOP_RECORD_KEY, userClassName); FileOutputFormat.setOutputPath(job, new Path(options.getTargetDir())); if (ExportJobBase.isSequenceFiles(jobConf, newPath)) { job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(MergeRecordMapper.class); } else { job.setMapperClass(MergeTextMapper.class); job.setOutputFormatClass(RawKeyTextOutputFormat.class); } jobConf.set("mapred.output.key.class", userClassName); job.setOutputValueClass(NullWritable.class); job.setReducerClass(MergeReducer.class); // Set the intermediate data types. job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(MergeRecord.class); // Make sure Sqoop and anything else we need is on the classpath. cacheJars(job, null); setJob(job); return this.runJob(job); } catch (InterruptedException ie) { throw new IOException(ie); } catch (ClassNotFoundException cnfe) { throw new IOException(cnfe); } }
From source file:org.apache.sqoop.mapreduce.odps.HdfsOdpsImportJob.java
License:Apache License
protected Path getInputPath() throws IOException { Path inputPath = new Path(context.getOptions().getExportDir()); Configuration conf = options.getConf(); inputPath = inputPath.makeQualified(FileSystem.get(conf)); return inputPath; }
From source file:org.apache.sysml.runtime.matrix.mapred.CSVAssignRowIDMapper.java
License:Apache License
@Override @SuppressWarnings("deprecation") public void configure(JobConf job) { byte thisIndex; try {//from w w w. j a va 2s . co m //it doesn't make sense to have repeated file names in the input, since this is for reblock thisIndex = MRJobConfiguration.getInputMatrixIndexesInMapper(job).get(0); outKey.set(thisIndex); Path thisPath = new Path(job.get(MRConfigurationNames.MR_MAP_INPUT_FILE)); FileSystem fs = IOUtilFunctions.getFileSystem(thisPath, job); thisPath = thisPath.makeQualified(fs); filename = thisPath.toString(); String[] strs = job.getStrings(CSVReblockMR.SMALLEST_FILE_NAME_PER_INPUT); Path headerPath = new Path(strs[thisIndex]).makeQualified(fs); headerFile = headerPath.toString().equals(filename); CSVReblockInstruction[] reblockInstructions = MRJobConfiguration.getCSVReblockInstructions(job); for (CSVReblockInstruction ins : reblockInstructions) if (ins.input == thisIndex) { delim = Pattern.quote(ins.delim); ignoreFirstLine = ins.hasHeader; break; } } catch (Exception e) { throw new RuntimeException(e); } }
From source file:org.apache.sysml.runtime.matrix.mapred.CSVReblockMapper.java
License:Apache License
@Override @SuppressWarnings("deprecation") public void configure(JobConf job) { super.configure(job); //get the number colums per block //load the offset mapping byte matrixIndex = representativeMatrixes.get(0); try {//from w w w .ja va2 s. c o m Path thisPath = new Path(job.get(MRConfigurationNames.MR_MAP_INPUT_FILE)); FileSystem fs = IOUtilFunctions.getFileSystem(thisPath, job); thisPath = thisPath.makeQualified(fs); String filename = thisPath.toString(); Path headerPath = new Path(job.getStrings(CSVReblockMR.SMALLEST_FILE_NAME_PER_INPUT)[matrixIndex]) .makeQualified(fs); if (headerPath.toString().equals(filename)) headerFile = true; ByteWritable key = new ByteWritable(); OffsetCount value = new OffsetCount(); Path p = new Path(job.get(CSVReblockMR.ROWID_FILE_NAME)); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, p, job); while (reader.next(key, value)) { if (key.get() == matrixIndex && filename.equals(value.filename)) offsetMap.put(value.fileOffset, value.count); } } finally { IOUtilFunctions.closeSilently(reader); } } catch (IOException e) { throw new RuntimeException(e); } CSVReblockInstruction ins = csv_reblock_instructions.get(0).get(0); _delim = ins.delim; ignoreFirstLine = ins.hasHeader; idxRow = new IndexedBlockRow(); int maxBclen = 0; for (ArrayList<CSVReblockInstruction> insv : csv_reblock_instructions) for (CSVReblockInstruction in : insv) { if (maxBclen < in.bclen) maxBclen = in.bclen; } //always dense since common csv usecase idxRow.getRow().data.reset(1, maxBclen, false); }
From source file:org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.java
License:Apache License
public static ArrayList<Byte> getInputMatrixIndexesInMapper(JobConf job) throws IOException { String[] matrices = job.getStrings(INPUT_MATRICIES_DIRS_CONFIG); String str = job.get(MAPFUNC_INPUT_MATRICIES_INDEXES_CONFIG); byte[] indexes; if (str == null || str.isEmpty()) { indexes = new byte[matrices.length]; for (int i = 0; i < indexes.length; i++) indexes[i] = (byte) i; } else {/*from ww w. j a v a 2s . co m*/ String[] strs = str.split(Instruction.INSTRUCTION_DELIM); indexes = new byte[strs.length]; for (int i = 0; i < strs.length; i++) indexes[i] = Byte.parseByte(strs[i]); } int numMatrices = matrices.length; if (numMatrices > Byte.MAX_VALUE) throw new RuntimeException("number of matrices is too large > " + Byte.MAX_VALUE); for (int i = 0; i < matrices.length; i++) matrices[i] = new Path(matrices[i]).toString(); Path thisFile = new Path(job.get(MRConfigurationNames.MR_MAP_INPUT_FILE)); FileSystem fs = IOUtilFunctions.getFileSystem(thisFile, job); thisFile = thisFile.makeQualified(fs); Path thisDir = thisFile.getParent().makeQualified(fs); ArrayList<Byte> representativeMatrixes = new ArrayList<>(); for (int i = 0; i < matrices.length; i++) { Path p = new Path(matrices[i]).makeQualified(fs); if (thisFile.toUri().equals(p.toUri()) || thisDir.toUri().equals(p.toUri())) representativeMatrixes.add(indexes[i]); } return representativeMatrixes; }
From source file:org.apache.sysml.runtime.matrix.SortMR.java
License:Apache License
@SuppressWarnings({ "unchecked", "rawtypes" }) public static JobReturn runJob(MRJobInstruction inst, String input, InputInfo inputInfo, long rlen, long clen, int brlen, int bclen, String combineInst, String sortInst, int numReducers, int replication, String output, OutputInfo outputInfo, boolean valueIsWeight) throws Exception { boolean sortIndexes = getSortInstructionType(sortInst) == SortKeys.OperationTypes.Indexes; String tmpOutput = sortIndexes ? MRJobConfiguration.constructTempOutputFilename() : output; JobConf job = new JobConf(SortMR.class); job.setJobName("SortMR"); //setup partition file String pfname = MRJobConfiguration.setUpSortPartitionFilename(job); Path partitionFile = new Path(pfname); URI partitionUri = new URI(partitionFile.toString()); //setup input/output paths Path inputDir = new Path(input); inputDir = inputDir.makeQualified(inputDir.getFileSystem(job)); FileInputFormat.setInputPaths(job, inputDir); Path outpath = new Path(tmpOutput); FileOutputFormat.setOutputPath(job, outpath); MapReduceTool.deleteFileIfExistOnHDFS(outpath, job); //set number of reducers (1 if local mode) if (!InfrastructureAnalyzer.isLocalMode(job)) { MRJobConfiguration.setNumReducers(job, numReducers, numReducers); //ensure partition size <= 10M records to avoid scalability bottlenecks //on cp-side qpick instructions for quantile/iqm/median (~128MB) if (!(getSortInstructionType(sortInst) == SortKeys.OperationTypes.Indexes)) job.setNumReduceTasks((int) Math.max(job.getNumReduceTasks(), rlen / 10000000)); } else //in case of local mode job.setNumReduceTasks(1);/*from ww w.j a v a 2 s.c o m*/ //setup input/output format job.setInputFormat(SamplingSortMRInputFormat.class); SamplingSortMRInputFormat.setTargetKeyValueClasses(job, (Class<? extends WritableComparable>) outputInfo.outputKeyClass, outputInfo.outputValueClass); //setup instructions and meta information if (combineInst != null && !combineInst.trim().isEmpty()) job.set(COMBINE_INSTRUCTION, combineInst); job.set(SORT_INSTRUCTION, sortInst); job.setBoolean(VALUE_IS_WEIGHT, valueIsWeight); boolean desc = getSortInstructionDescending(sortInst); job.setBoolean(SORT_DECREASING, desc); MRJobConfiguration.setBlockSize(job, (byte) 0, brlen, bclen); MRJobConfiguration.setInputInfo(job, (byte) 0, inputInfo, brlen, bclen, ConvertTarget.CELL); int partitionWith0 = SamplingSortMRInputFormat.writePartitionFile(job, partitionFile); //setup mapper/reducer/partitioner/output classes if (getSortInstructionType(sortInst) == SortKeys.OperationTypes.Indexes) { MRJobConfiguration.setInputInfo(job, (byte) 0, inputInfo, brlen, bclen, ConvertTarget.CELL); job.setOutputFormat(OutputInfo.BinaryBlockOutputInfo.outputFormatClass); job.setMapperClass(IndexSortMapper.class); job.setReducerClass(IndexSortReducer.class); job.setMapOutputKeyClass(!desc ? IndexSortComparable.class : IndexSortComparableDesc.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(MatrixIndexes.class); job.setOutputValueClass(MatrixBlock.class); } else { //default case: SORT w/wo weights MRJobConfiguration.setInputInfo(job, (byte) 0, inputInfo, brlen, bclen, ConvertTarget.CELL); job.setOutputFormat(CompactOutputFormat.class); job.setMapperClass(ValueSortMapper.class); job.setReducerClass(ValueSortReducer.class); job.setOutputKeyClass(outputInfo.outputKeyClass); //double job.setOutputValueClass(outputInfo.outputValueClass); //int } job.setPartitionerClass(TotalOrderPartitioner.class); //setup distributed cache DistributedCache.addCacheFile(partitionUri, job); DistributedCache.createSymlink(job); //setup replication factor job.setInt(MRConfigurationNames.DFS_REPLICATION, replication); //set up custom map/reduce configurations DMLConfig config = ConfigurationManager.getDMLConfig(); MRJobConfiguration.setupCustomMRConfigurations(job, config); MatrixCharacteristics[] s = new MatrixCharacteristics[1]; s[0] = new MatrixCharacteristics(rlen, clen, brlen, bclen); // Print the complete instruction if (LOG.isTraceEnabled()) inst.printCompleteMRJobInstruction(s); //set unique working dir MRJobConfiguration.setUniqueWorkingDir(job); //run mr job RunningJob runjob = JobClient.runJob(job); Group group = runjob.getCounters().getGroup(NUM_VALUES_PREFIX); numReducers = job.getNumReduceTasks(); //process final meta data long[] counts = new long[numReducers]; long total = 0; for (int i = 0; i < numReducers; i++) { counts[i] = group.getCounter(Integer.toString(i)); total += counts[i]; } //add missing 0s back to the results long missing0s = 0; if (total < rlen * clen) { if (partitionWith0 < 0) throw new RuntimeException("no partition contains 0, which is wrong!"); missing0s = rlen * clen - total; counts[partitionWith0] += missing0s; } else partitionWith0 = -1; if (sortIndexes) { //run builtin job for shifting partially sorted blocks according to global offsets //we do this in this custom form since it would not fit into the current structure //of systemml to output two intermediates (partially sorted data, offsets) out of a //single SortKeys lop boolean success = runjob.isSuccessful(); if (success) { success = runStitchupJob(tmpOutput, rlen, clen, brlen, bclen, counts, numReducers, replication, output); } MapReduceTool.deleteFileIfExistOnHDFS(tmpOutput); MapReduceTool.deleteFileIfExistOnHDFS(pfname); return new JobReturn(s[0], OutputInfo.BinaryBlockOutputInfo, success); } else { MapReduceTool.deleteFileIfExistOnHDFS(pfname); return new JobReturn(s[0], counts, partitionWith0, missing0s, runjob.isSuccessful()); } }
From source file:org.apache.sysml.yarn.DMLYarnClient.java
License:Apache License
@SuppressWarnings("deprecation") private void copyResourcesToHdfsWorkingDir(YarnConfiguration yconf, String hdfsWD) throws ParseException, IOException, DMLRuntimeException, InterruptedException { Path confPath = new Path(hdfsWD, DML_CONFIG_NAME); FileSystem fs = IOUtilFunctions.getFileSystem(confPath, yconf); //create working directory MapReduceTool.createDirIfNotExistOnHDFS(confPath, DMLConfig.DEFAULT_SHARED_DIR_PERMISSION); //serialize the dml config to HDFS file //NOTE: we do not modify and ship the absolute scratch space path of the current user //because this might result in permission issues if the app master is run with a different user //(runtime plan migration during resource reoptimizations now needs to use qualified names //for shipping/reading intermediates) TODO modify resource reoptimizer on prototype integration. try (FSDataOutputStream fout = fs.create(confPath, true)) { fout.writeBytes(_dmlConfig.serializeDMLConfig() + "\n"); }/* ww w .j av a 2 s.c o m*/ _hdfsDMLConfig = confPath.makeQualified(fs).toString(); LOG.debug("DML config written to HDFS file: " + _hdfsDMLConfig + ""); //serialize the dml script to HDFS file Path scriptPath = new Path(hdfsWD, DML_SCRIPT_NAME); try (FSDataOutputStream fout2 = fs.create(scriptPath, true)) { fout2.writeBytes(_dmlScript); } _hdfsDMLScript = scriptPath.makeQualified(fs).toString(); LOG.debug("DML script written to HDFS file: " + _hdfsDMLScript + ""); // copy local jar file to HDFS (try to get the original jar filename) String fname = getLocalJarFileNameFromEnvConst(); if (fname == null) { //get location of unpacked jar classes and repackage (if required) String lclassFile = DMLYarnClient.class.getProtectionDomain().getCodeSource().getLocation().getPath() .toString(); File flclassFile = new File(lclassFile); if (!flclassFile.isDirectory()) //called w/ jar fname = lclassFile; else //called w/ unpacked jar (need to be repackaged) fname = createJar(lclassFile); } Path srcPath = new Path(fname); Path dstPath = new Path(hdfsWD, srcPath.getName()); FileUtil.copy(FileSystem.getLocal(yconf), srcPath, fs, dstPath, false, true, yconf); _hdfsJarFile = dstPath.makeQualified(fs).toString(); LOG.debug( "Jar file copied from local file: " + srcPath.toString() + " to HDFS file: " + dstPath.toString()); }
From source file:org.apache.tajo.storage.s3.SmallBlockS3FileSystem.java
License:Apache License
@Override public FileStatus[] listStatus(Path f) throws IOException { Path absolutePath = makeAbsolute(f); INode inode = store.retrieveINode(absolutePath); if (inode == null) { throw new FileNotFoundException("File " + f + " does not exist."); }/*ww w . j a va 2s. c o m*/ if (inode.isFile()) { return new FileStatus[] { new S3FileStatus(f.makeQualified(this), inode) }; } ArrayList<FileStatus> ret = new ArrayList<FileStatus>(); for (Path p : store.listSubPaths(absolutePath)) { ret.add(getFileStatus(p.makeQualified(this))); } return ret.toArray(new FileStatus[0]); }
From source file:org.apache.tajo.storage.s3.SmallBlockS3FileSystem.java
License:Apache License
/** * FileStatus for S3 file systems./* www . j a v a 2s . c o m*/ */ @Override public FileStatus getFileStatus(Path f) throws IOException { INode inode = store.retrieveINode(makeAbsolute(f)); if (inode == null) { throw new FileNotFoundException(f + ": No such file or directory."); } return new S3FileStatus(f.makeQualified(this), inode); }
From source file:org.apache.tajo.storage.thirdparty.parquet.ParquetFileWriter.java
License:Apache License
/** * writes a _metadata file/*from w w w. j a v a2s . com*/ * @param configuration the configuration to use to get the FileSystem * @param outputPath the directory to write the _metadata file to * @param footers the list of footers to merge * @throws java.io.IOException */ public static void writeMetadataFile(Configuration configuration, Path outputPath, List<Footer> footers) throws IOException { Path metaDataPath = new Path(outputPath, PARQUET_METADATA_FILE); FileSystem fs = outputPath.getFileSystem(configuration); outputPath = outputPath.makeQualified(fs); FSDataOutputStream metadata = fs.create(metaDataPath); metadata.write(MAGIC); ParquetMetadata metadataFooter = mergeFooters(outputPath, footers); serializeFooter(metadataFooter, metadata); metadata.close(); }