List of usage examples for org.apache.hadoop.mapred JobConf get
public String get(String name)
name
property, null
if no such property exists. From source file:com.ibm.bi.dml.runtime.matrix.data.hadoopfix.MultipleInputs.java
License:Apache License
/** * Retrieves a map of {@link Path}s to the {@link Mapper} class that * should be used for them./*from w w w. j a v a 2 s . com*/ * * @param conf The confuration of the job * @see #addInputPath(JobConf, Path, Class, Class) * @return A map of paths to mappers for the job */ @SuppressWarnings("unchecked") static Map<Path, Class<? extends Mapper>> getMapperTypeMap(JobConf conf) { if (conf.get("mapred.input.dir.mappers") == null) { return Collections.emptyMap(); } Map<Path, Class<? extends Mapper>> m = new HashMap<Path, Class<? extends Mapper>>(); String[] pathMappings = conf.get("mapred.input.dir.mappers").split(","); for (String pathMapping : pathMappings) { String[] split = pathMapping.split(";"); Class<? extends Mapper> mapClass; try { mapClass = (Class<? extends Mapper>) conf.getClassByName(split[1]); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } m.put(new Path(split[0]), mapClass); } return m; }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.CSVAssignRowIDMapper.java
License:Open Source License
@Override @SuppressWarnings("deprecation") public void configure(JobConf job) { byte thisIndex; try {/*ww w. j a v a 2 s . co m*/ //it doesn't make sense to have repeated file names in the input, since this is for reblock thisIndex = MRJobConfiguration.getInputMatrixIndexesInMapper(job).get(0); outKey.set(thisIndex); FileSystem fs = FileSystem.get(job); Path thisPath = new Path(job.get("map.input.file")).makeQualified(fs); filename = thisPath.toString(); String[] strs = job.getStrings(CSVReblockMR.SMALLEST_FILE_NAME_PER_INPUT); Path headerPath = new Path(strs[thisIndex]).makeQualified(fs); if (headerPath.toString().equals(filename)) headerFile = true; } catch (IOException e) { throw new RuntimeException(e); } try { CSVReblockInstruction[] reblockInstructions = MRJobConfiguration.getCSVReblockInstructions(job); for (CSVReblockInstruction ins : reblockInstructions) { if (ins.input == thisIndex) { delim = Pattern.quote(ins.delim); ignoreFirstLine = ins.hasHeader; break; } } } catch (DMLUnsupportedOperationException e) { throw new RuntimeException(e); } catch (DMLRuntimeException e) { throw new RuntimeException(e); } // load properties relevant to transform try { boolean omit = job.getBoolean(MRJobConfiguration.TF_TRANSFORM, false); if (omit) _agents = new TfUtils(job, true); } catch (IOException e) { throw new RuntimeException(e); } catch (JSONException e) { throw new RuntimeException(e); } }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.CSVReblockMapper.java
License:Open Source License
@Override @SuppressWarnings("deprecation") public void configure(JobConf job) { super.configure(job); //get the number colums per block //load the offset mapping byte matrixIndex = representativeMatrixes.get(0); try {//from w w w . j a va 2 s . co m FileSystem fs = FileSystem.get(job); Path thisPath = new Path(job.get("map.input.file")).makeQualified(fs); String filename = thisPath.toString(); Path headerPath = new Path(job.getStrings(CSVReblockMR.SMALLEST_FILE_NAME_PER_INPUT)[matrixIndex]) .makeQualified(fs); if (headerPath.toString().equals(filename)) headerFile = true; ByteWritable key = new ByteWritable(); OffsetCount value = new OffsetCount(); Path p = new Path(job.get(CSVReblockMR.ROWID_FILE_NAME)); SequenceFile.Reader reader = new SequenceFile.Reader(fs, p, job); while (reader.next(key, value)) { if (key.get() == matrixIndex && filename.equals(value.filename)) offsetMap.put(value.fileOffset, value.count); } reader.close(); } catch (IOException e) { throw new RuntimeException(e); } CSVReblockInstruction ins = csv_reblock_instructions.get(0).get(0); _delim = ins.delim; ignoreFirstLine = ins.hasHeader; idxRow = new IndexedBlockRow(); int maxBclen = 0; for (ArrayList<CSVReblockInstruction> insv : csv_reblock_instructions) for (CSVReblockInstruction in : insv) { if (maxBclen < in.bclen) maxBclen = in.bclen; } //always dense since common csv usecase idxRow.getRow().data.reset(1, maxBclen, false); }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.GMRMapper.java
License:Open Source License
public void configure(JobConf job) { super.configure(job); mapperID = job.get("mapred.task.id"); dimsUnknownFilePrefix = job.get("dims.unknown.file.prefix"); _filterEmptyInputBlocks = allowsFilterEmptyInputBlocks(); //assign the temporay vairables try {//from ww w . j av a 2 s .c om // System.out.println(valueClass.getName()); // System.out.println(MatrixCell.class.getName()); if (job.getMapOutputValueClass().equals(TaggedMatrixPackedCell.class)) taggedValueBuffer = TaggedMatrixValue.createObject(MatrixPackedCell.class); else taggedValueBuffer = TaggedMatrixValue.createObject(valueClass); } catch (Exception e) { throw new RuntimeException(e); } //decide whether it is a maponly job mapOnlyJob = (job.getNumReduceTasks() <= 0); if (!mapOnlyJob) return; //get the indexes of the final output matrices resultIndexes = MRJobConfiguration.getResultIndexes(job); resultDimsUnknown = MRJobConfiguration.getResultDimsUnknown(job); //initialize SystemML Counters (defined in MRJobConfiguration) resultsNonZeros = new long[resultIndexes.length]; resultsMaxRowDims = new long[resultIndexes.length]; resultsMaxColDims = new long[resultIndexes.length]; tagMapping = new HashMap<Byte, ArrayList<Integer>>(); for (int i = 0; i < resultIndexes.length; i++) { byte output = resultIndexes[i]; ArrayList<Integer> vec = tagMapping.get(output); if (vec == null) { vec = new ArrayList<Integer>(); tagMapping.put(output, vec); } vec.add(i); } //for map only job, get the map output converters collectFinalMultipleOutputs = MRJobConfiguration.getMultipleConvertedOutputs(job); }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java
License:Open Source License
public static final int getJVMMaxMemSize(JobConf job) { String str = job.get("mapred.child.java.opts"); int start = str.indexOf("-Xmx"); if (start < 0) return 209715200; //default 200MB str = str.substring(start + 4);//from w w w . ja v a 2 s .c o m int i = 0; for (; i < str.length() && str.charAt(i) <= '9' && str.charAt(i) >= '0'; i++) ; int ret = Integer.parseInt(str.substring(0, i)); if (i >= str.length()) return ret; switch (str.charAt(i)) { case 'k': case 'K': ret = ret * 1024; break; case 'm': case 'M': ret = ret * 1048576; break; case 'g': case 'G': ret = ret * 1073741824; break; default: } return ret; }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java
License:Open Source License
public static String getLocalWorkingDirPrefix(JobConf job) { return job.get("mapred.local.dir"); }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java
License:Open Source License
public static String getSystemWorkingDirPrefix(JobConf job) { return job.get("mapred.system.dir"); }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java
License:Open Source License
public static String getStagingWorkingDirPrefix(JobConf job) { return job.get("mapreduce.jobtracker.staging.root.dir"); }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java
License:Open Source License
public static MRInstruction[] getInstructionsInReducer(JobConf job) throws DMLUnsupportedOperationException, DMLRuntimeException { String str = job.get(INSTRUCTIONS_IN_REDUCER_CONFIG); MRInstruction[] mixed_ops = MRInstructionParser.parseMixedInstructions(str); return mixed_ops; }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java
License:Open Source License
public static ReblockInstruction[] getReblockInstructions(JobConf job) throws DMLUnsupportedOperationException, DMLRuntimeException { String str = job.get(REBLOCK_INSTRUCTIONS_CONFIG); ReblockInstruction[] reblock_instructions = MRInstructionParser.parseReblockInstructions(str); return reblock_instructions; }