List of usage examples for org.apache.hadoop.mapred JobConf get
public String get(String name)
name
property, null
if no such property exists. From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java
License:Open Source License
public static byte[] getResultDimsUnknown(JobConf job) { String str = job.get(RESULT_DIMS_UNKNOWN_CONFIG); if (str == null || str.isEmpty()) return null; String[] istrs = str.split(Instruction.INSTRUCTION_DELIM); return stringArrayToByteArray(istrs); }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java
License:Open Source License
public static byte[] getIntermediateMatrixIndexes(JobConf job) { String str = job.get(INTERMEDIATE_INDEXES_CONFIG); if (str == null || str.isEmpty()) return null; String[] istrs = str.split(Instruction.INSTRUCTION_DELIM); return stringArrayToByteArray(istrs); }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java
License:Open Source License
public static String getDistCacheInputIndices(JobConf job) { return job.get(DISTCACHE_INPUT_INDICES); }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java
License:Open Source License
public static String getDistCacheInputPaths(JobConf job) { return job.get(DISTCACHE_INPUT_PATHS); }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java
License:Open Source License
public static PDataPartitionFormat[] getInputPartitionFormats(JobConf job) { return MRJobConfiguration.csv2PFormat(job.get(PARTITIONING_OUTPUT_FORMAT_CONFIG)); }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java
License:Open Source License
/** * /*from w w w. java 2 s . c o m*/ * @param job * @return */ public static String getSortPartitionFilename(JobConf job) { return job.get(SORT_PARTITION_FILENAME); }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java
License:Open Source License
public static String getSystemMLLocalTmpDir(JobConf job) { return job.get(SYSTEMML_LOCAL_TMP_DIR); }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.ReduceBase.java
License:Open Source License
public void configure(JobConf job) { super.configure(job); reducerID = job.get("mapred.task.id"); dimsUnknownFilePrefix = job.get("dims.unknown.file.prefix"); //get the indexes of the final output matrices resultIndexes = MRJobConfiguration.getResultIndexes(job); resultDimsUnknown = MRJobConfiguration.getResultDimsUnknown(job); //initialize SystemML Counters (defined in MRJobConfiguration) resultsNonZeros = new long[resultIndexes.length]; resultsMaxRowDims = new long[resultIndexes.length]; resultsMaxColDims = new long[resultIndexes.length]; collectFinalMultipleOutputs = MRJobConfiguration.getMultipleConvertedOutputs(job); //parse aggregate operations AggregateInstruction[] agg_insts = null; try {/*from w w w. j a v a 2 s. c om*/ agg_insts = MRJobConfiguration.getAggregateInstructions(job); //parse unary and binary operations MRInstruction[] tmp = MRJobConfiguration.getInstructionsInReducer(job); if (tmp != null) { mixed_instructions = new ArrayList<MRInstruction>(); Collections.addAll(mixed_instructions, tmp); } } catch (DMLUnsupportedOperationException e) { throw new RuntimeException(e); } catch (DMLRuntimeException e) { throw new RuntimeException(e); } //load data from distributed cache (if required, reuse if jvm_reuse) try { setupDistCacheFiles(job); } catch (IOException ex) { throw new RuntimeException(ex); } //reorganize the aggregate instructions, so that they are all associatied with each input if (agg_insts != null) { for (AggregateInstruction ins : agg_insts) { //associate instruction to its input ArrayList<AggregateInstruction> vec = agg_instructions.get(ins.input); if (vec == null) { vec = new ArrayList<AggregateInstruction>(); agg_instructions.put(ins.input, vec); } vec.add(ins); if (ins.input == ins.output) continue; //need to add new aggregate instructions so that partial aggregation can be applied //this is important for combiner in the reducer side AggregateInstruction partialIns = new AggregateInstruction(ins.getOperator(), ins.output, ins.output, ins.toString()); vec = agg_instructions.get(partialIns.input); if (vec == null) { vec = new ArrayList<AggregateInstruction>(); agg_instructions.put(partialIns.input, vec); } vec.add(partialIns); } } }
From source file:com.ibm.bi.dml.runtime.matrix.sort.IndexSortStitchupMapper.java
License:Open Source License
@Override public void configure(JobConf job) { super.configure(job); _offsets = parseOffsets(job.get(SortMR.SORT_INDEXES_OFFSETS)); _rlen = MRJobConfiguration.getNumRows(job, (byte) 0); _brlen = MRJobConfiguration.getNumRowsPerBlock(job, (byte) 0); _tmpIx = new MatrixIndexes(); _tmpBlk = new MatrixBlock((int) _brlen, 1, false); }
From source file:com.ibm.bi.dml.runtime.transform.ApplyTfBBMapper.java
License:Open Source License
@Override public void configure(JobConf job) { super.configure(job); try {/* w w w . j a v a 2 s . c o m*/ _partFileWithHeader = TfUtils.isPartFileWithHeader(job); tfmapper = new TfUtils(job); tfmapper.loadTfMetadata(job, true); // Load relevant information for CSV Reblock ByteWritable key = new ByteWritable(); OffsetCount value = new OffsetCount(); Path p = new Path(job.get(CSVReblockMR.ROWID_FILE_NAME)); FileSystem fs = FileSystem.get(job); Path thisPath = new Path(job.get("map.input.file")).makeQualified(fs); String thisfile = thisPath.toString(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, p, job); while (reader.next(key, value)) { // "key" needn't be checked since the offset file has information about a single CSV input (the raw data file) if (thisfile.equals(value.filename)) offsetMap.put(value.fileOffset, value.count); } reader.close(); idxRow = new CSVReblockMapper.IndexedBlockRow(); int maxBclen = 0; for (ArrayList<CSVReblockInstruction> insv : csv_reblock_instructions) for (CSVReblockInstruction in : insv) { if (maxBclen < in.bclen) maxBclen = in.bclen; } //always dense since common csv usecase idxRow.getRow().data.reset(1, maxBclen, false); } catch (IOException e) { throw new RuntimeException(e); } catch (JSONException e) { throw new RuntimeException(e); } }