List of usage examples for org.apache.hadoop.mapred OutputCollector collect
void collect(K key, V value) throws IOException;
From source file:org.apache.parquet.cascading.ParquetValueScheme.java
License:Apache License
@SuppressWarnings("unchecked") @Override//ww w . j av a 2 s . co m public void sink(FlowProcess<JobConf> fp, SinkCall<Object[], OutputCollector> sc) throws IOException { TupleEntry tuple = sc.getOutgoingEntry(); if (tuple.size() != 1) { throw new RuntimeException( "ParquetValueScheme expects tuples with an arity of exactly 1, but found " + tuple.getFields()); } T value = (T) tuple.getObject(0); OutputCollector output = sc.getOutput(); output.collect(null, value); }
From source file:org.apache.pig.test.utils.datagen.DataGenMapper.java
License:Apache License
public void map(LongWritable key, Text value, OutputCollector<String, String> output, Reporter reporter) throws IOException { int initialSize = colSpecs.size() * 50; if (!hasInput) { long numRows = Long.parseLong(value.toString().trim()); // dg.numRows = numRows; //TODO?? for (int i = 0; i < numRows; i++) { StringWriter str = new StringWriter(initialSize); PrintWriter pw = new PrintWriter(str); writer.writeLine(pw);/*from ww w .jav a 2 s . com*/ output.collect(null, str.toString()); if ((i + 1) % 10000 == 0) { reporter.progress(); reporter.setStatus("" + (i + 1) + " tuples generated."); } } } else { StringWriter str = new StringWriter(initialSize); PrintWriter pw = new PrintWriter(str); pw.write(value.toString()); writer.writeLine(pw); output.collect(null, str.toString()); } }
From source file:org.apache.sysml.runtime.matrix.mapred.CSVReblockMapper.java
License:Apache License
public static IndexedBlockRow processRow(IndexedBlockRow row, String[] cells, long rowOffset, long num, byte outTag, int brlen, int bclen, boolean fill, double fillValue, OutputCollector<TaggedFirstSecondIndexes, BlockRow> out) throws IOException { int start = 0; row.getIndexes().setTag(outTag);/*from w ww . ja v a 2 s . c o m*/ long rowIndex = UtilFunctions.computeBlockIndex(rowOffset + num + 1, brlen); row.getRow().indexInBlock = UtilFunctions.computeCellInBlock(rowOffset + num + 1, brlen); long col = 0; for (; col < cells.length / bclen; col++) { row.getRow().data.reset(1, bclen); row.getIndexes().setIndexes(rowIndex, col + 1); for (int k = 0; k < bclen; k++) { if (cells[k + start] == null || cells[k + start].isEmpty()) { IOUtilFunctions.checkAndRaiseErrorCSVEmptyField(null, fill, true); row.getRow().data.appendValue(0, k, fillValue); } else row.getRow().data.appendValue(0, k, UtilFunctions.parseToDouble(cells[k + start])); } out.collect(row.getIndexes(), row.getRow()); start += bclen; } row.getIndexes().setIndexes(rowIndex, col + 1); int lastBclen = cells.length % bclen; if (lastBclen != 0) { row.getRow().data.reset(1, lastBclen); for (int k = 0; k < lastBclen; k++) { if (cells[k + start] == null || cells[k + start].isEmpty()) { if (!fill) throw new RuntimeException( "Empty fields found in the input delimited file. Use \"fill\" option to read delimited files with empty fields."); row.getRow().data.appendValue(0, k, fillValue); } else row.getRow().data.appendValue(0, k, UtilFunctions.parseToDouble(cells[k + start])); } out.collect(row.getIndexes(), row.getRow()); } return row; }
From source file:org.apache.sysml.runtime.matrix.mapred.CSVWriteMapper.java
License:Apache License
@Override public void map(Writable rawKey, Writable rawValue, OutputCollector<TaggedFirstSecondIndexes, MatrixBlock> out, Reporter reporter) throws IOException { long start = System.currentTimeMillis(); //for each represenattive matrix, read the record and apply instructions for (int i = 0; i < representativeMatrixes.size(); i++) { //convert the record into the right format for the representatice matrix inputConverter.setBlockSize(brlens[i], bclens[i]); inputConverter.convert(rawKey, rawValue); byte thisMatrix = representativeMatrixes.get(i); //apply unary instructions on the converted indexes and values while (inputConverter.hasNext()) { Pair<MatrixIndexes, MatrixBlock> pair = inputConverter.next(); MatrixIndexes indexes = pair.getKey(); MatrixBlock value = pair.getValue(); outIndexes.setIndexes(indexes.getRowIndex(), indexes.getColumnIndex()); ArrayList<Byte> outputs = inputOutputMap.get(thisMatrix); for (byte output : outputs) { outIndexes.setTag(output); out.collect(outIndexes, value); //LOG.info("Mapper output: "+outIndexes+", "+value+", tag: "+output); }/*w w w . j av a2 s. co m*/ } } reporter.incrCounter(Counters.MAP_TIME, System.currentTimeMillis() - start); }
From source file:org.apache.sysml.runtime.matrix.mapred.FrameReblockBuffer.java
License:Apache License
/** * //from www . ja v a2s . c o m * @param out * @param key * @param block * @throws IOException */ private static void outputBlock(OutputCollector<Long, Writable> out, Long key, FrameBlock block) throws IOException { //skip output of unassigned blocks if (key == -1) return; //output block out.collect(key, block); }
From source file:org.apache.sysml.runtime.matrix.mapred.GMRMapper.java
License:Apache License
protected void processMapOutputToReducerForGMR(int index, TaggedMatrixValue taggedValueBuffer, OutputCollector<Writable, Writable> out) throws IOException { for (byte output : outputIndexes.get(index)) { ArrayList<IndexedMatrixValue> results = cachedValues.get(output); if (results == null) continue; for (IndexedMatrixValue result : results) { if (result == null) continue; //prepare tagged output value //(special case for conversion from matrixcell to taggedmatrixpackedcell, e.g., ctable) if (valueClass.equals(MatrixCell.class)) taggedValueBuffer.getBaseObject().copy(result.getValue()); else/* w w w . j av a 2 s. c o m*/ taggedValueBuffer.setBaseObject(result.getValue()); taggedValueBuffer.setTag(output); //collect output (exactly once) out.collect(result.getIndexes(), taggedValueBuffer); } } }
From source file:org.apache.sysml.runtime.matrix.mapred.GroupedAggMRCombiner.java
License:Apache License
@Override public void reduce(TaggedMatrixIndexes key, Iterator<WeightedCell> values, OutputCollector<TaggedMatrixIndexes, WeightedCell> out, Reporter reporter) throws IOException { long start = System.currentTimeMillis(); //get aggregate operator GroupedAggregateInstruction ins = grpaggInstructions.get(key.getTag()); Operator op = ins.getOperator();// w ww. j a v a 2 s . co m boolean isPartialAgg = true; //combine iterator to single value try { if (op instanceof CMOperator) //everything except sum { if (((CMOperator) op).isPartialAggregateOperator()) { cmObj.reset(); CM lcmFn = cmFn.get(key.getTag()); //partial aggregate cm operator while (values.hasNext()) { WeightedCell value = values.next(); lcmFn.execute(cmObj, value.getValue(), value.getWeight()); } outCell.setValue(cmObj.getRequiredPartialResult(op)); outCell.setWeight(cmObj.getWeight()); } else //forward tuples to reducer { isPartialAgg = false; while (values.hasNext()) out.collect(key, values.next()); } } else if (op instanceof AggregateOperator) //sum { AggregateOperator aggop = (AggregateOperator) op; if (aggop.correctionExists) { KahanObject buffer = new KahanObject(aggop.initialValue, 0); KahanPlus.getKahanPlusFnObject(); //partial aggregate with correction while (values.hasNext()) { WeightedCell value = values.next(); aggop.increOp.fn.execute(buffer, value.getValue() * value.getWeight()); } outCell.setValue(buffer._sum); outCell.setWeight(1); } else //no correction { double v = aggop.initialValue; //partial aggregate without correction while (values.hasNext()) { WeightedCell value = values.next(); v = aggop.increOp.fn.execute(v, value.getValue() * value.getWeight()); } outCell.setValue(v); outCell.setWeight(1); } } else throw new IOException("Unsupported operator in instruction: " + ins); } catch (Exception ex) { throw new IOException(ex); } //collect the output (to reducer) if (isPartialAgg) out.collect(key, outCell); reporter.incrCounter(Counters.COMBINE_OR_REDUCE_TIME, System.currentTimeMillis() - start); }
From source file:org.apache.sysml.runtime.matrix.mapred.GroupedAggMRMapper.java
License:Apache License
@Override public void map(MatrixIndexes key, MatrixValue value, OutputCollector<TaggedMatrixIndexes, WeightedCell> out, Reporter reporter) throws IOException { for (int k = 0; k < representativeMatrixes.size(); k++) for (GroupedAggregateInstruction ins : groupAgg_instructions.get(k)) { //set the tag once for the block outKey.setTag(ins.output);//from w w w . ja v a 2 s . c om //get block and unroll into weighted cells //(it will be in dense format) MatrixBlock block = (MatrixBlock) value; int rlen = block.getNumRows(); int clen = block.getNumColumns(); if (!ins.hasWeights()) //w/o weights (input vector or matrix) { long coloff = (key.getColumnIndex() - 1) * ins.getBclen(); //local pre-aggregation for sum w/ known output dimensions if (ins.getOperator() instanceof AggregateOperator && ins.getNGroups() > 0 && OptimizerUtils.isValidCPDimensions(ins.getNGroups(), block.getNumColumns() - 1)) { try { MatrixBlock group = block.sliceOperations(0, block.getNumRows() - 1, block.getNumColumns() - 1, block.getNumColumns() - 1, new MatrixBlock()); MatrixBlock target = block.sliceOperations(0, block.getNumRows() - 1, 0, block.getNumColumns() - 2, new MatrixBlock()); MatrixBlock tmp = group.groupedAggOperations(target, null, new MatrixBlock(), ins.getNGroups(), ins.getOperator()); for (int i = 0; i < tmp.getNumRows(); i++) { for (int j = 0; j < tmp.getNumColumns(); j++) { double tmpval = tmp.quickGetValue(i, j); if (tmpval != 0) { outKeyValue.setIndexes(i + 1, coloff + j + 1); outValue.setValue(tmpval); outValue.setWeight(1); out.collect(outKey, outValue); } } } } catch (Exception ex) { throw new IOException(ex); } } //general case without pre-aggregation else { for (int r = 0; r < rlen; r++) { int group = (int) block.quickGetValue(r, clen - 1); for (int c = 0; c < clen - 1; c++) { outKeyValue.setIndexes(group, coloff + c + 1); outValue.setValue(block.quickGetValue(r, c)); outValue.setWeight(1); out.collect(outKey, outValue); } } } } else //w/ weights (input vector) { for (int r = 0; r < rlen; r++) { outKeyValue.setIndexes((int) block.quickGetValue(r, 1), 1); outValue.setValue(block.quickGetValue(r, 0)); outValue.setWeight(block.quickGetValue(r, 2)); out.collect(outKey, outValue); } } } }
From source file:org.apache.sysml.runtime.matrix.mapred.ReblockBuffer.java
License:Apache License
public void flushBuffer(byte index, OutputCollector<Writable, Writable> out) throws IOException { if (_count == 0) return;//from w w w .j a va2 s.c o m //Step 1) sort reblock buffer (blockwise, no in-block sorting!) Arrays.sort(_buff, 0, _count, new ReblockBufferComparator()); //Step 2) scan for number of created blocks long numBlocks = 0; //number of blocks in buffer long cbi = -1, cbj = -1; //current block indexes for (int i = 0; i < _count; i++) { long bi = UtilFunctions.computeBlockIndex(_buff[i][0], _brlen); long bj = UtilFunctions.computeBlockIndex(_buff[i][1], _bclen); //switch to next block if (bi != cbi || bj != cbj) { cbi = bi; cbj = bj; numBlocks++; } } //Step 3) decide on intermediate representation (for entire buffer) //decision based on binarycell vs binaryblock_ultrasparse (worstcase) long blockedSize = 16 * numBlocks + 16 * _count; //<long,long>,#<int,int,double> long cellSize = 24 * _count; //#<long,long>,<double> boolean blocked = (blockedSize <= cellSize); //Step 4) output blocks / binary cell (one-at-a-time) TaggedAdaptivePartialBlock outTVal = new TaggedAdaptivePartialBlock(); AdaptivePartialBlock outVal = new AdaptivePartialBlock(); MatrixIndexes tmpIx = new MatrixIndexes(); outTVal.setTag(index); outTVal.setBaseObject(outVal); //setup wrapper writables if (blocked) //output binaryblock { //create intermediate blocks boolean sparse = MatrixBlock.evalSparseFormatInMemory(_brlen, _bclen, _count / numBlocks); MatrixBlock tmpBlock = new MatrixBlock(); //put values into block and output cbi = -1; cbj = -1; //current block indexes for (int i = 0; i < _count; i++) { long bi = UtilFunctions.computeBlockIndex(_buff[i][0], _brlen); long bj = UtilFunctions.computeBlockIndex(_buff[i][1], _bclen); //output block and switch to next index pair if (bi != cbi || bj != cbj) { outputBlock(out, tmpIx, outTVal, tmpBlock); cbi = bi; cbj = bj; tmpIx.setIndexes(bi, bj); tmpBlock.reset(Math.min(_brlen, (int) (_rlen - (bi - 1) * _brlen)), Math.min(_bclen, (int) (_clen - (bj - 1) * _bclen)), sparse); } int ci = UtilFunctions.computeCellInBlock(_buff[i][0], _brlen); int cj = UtilFunctions.computeCellInBlock(_buff[i][1], _bclen); double tmp = Double.longBitsToDouble(_buff[i][2]); tmpBlock.appendValue(ci, cj, tmp); } //output last block outputBlock(out, tmpIx, outTVal, tmpBlock); } else //output binarycell { PartialBlock tmpVal = new PartialBlock(); outVal.set(tmpVal); for (int i = 0; i < _count; i++) { long bi = UtilFunctions.computeBlockIndex(_buff[i][0], _brlen); long bj = UtilFunctions.computeBlockIndex(_buff[i][1], _bclen); int ci = UtilFunctions.computeCellInBlock(_buff[i][0], _brlen); int cj = UtilFunctions.computeCellInBlock(_buff[i][1], _bclen); double tmp = Double.longBitsToDouble(_buff[i][2]); tmpIx.setIndexes(bi, bj); tmpVal.set(ci, cj, tmp); //in outVal, in outTVal out.collect(tmpIx, outTVal); } } _count = 0; }
From source file:org.apache.sysml.runtime.matrix.mapred.ReblockBuffer.java
License:Apache License
private static void outputBlock(OutputCollector<Writable, Writable> out, MatrixIndexes key, TaggedAdaptivePartialBlock value, MatrixBlock block) throws IOException { //skip output of unassigned blocks if (key.getRowIndex() == -1 || key.getColumnIndex() == -1) return;//from www . ja v a 2s . c o m //sort sparse rows due to blockwise buffer sort and append if (block.isInSparseFormat()) block.sortSparseRows(); //output block value.getBaseObject().set(block); out.collect(key, value); }