Example usage for org.apache.hadoop.mapred OutputCollector collect

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred OutputCollector collect.

Prototype

void collect(K key, V value) throws IOException;

Source Link

Document

Adds a key/value pair to the output.

Usage

From source file:org.apache.parquet.cascading.ParquetValueScheme.java

License:Apache License

@SuppressWarnings("unchecked")
@Override//ww w .  j av  a 2 s  . co  m
public void sink(FlowProcess<JobConf> fp, SinkCall<Object[], OutputCollector> sc) throws IOException {
    TupleEntry tuple = sc.getOutgoingEntry();

    if (tuple.size() != 1) {
        throw new RuntimeException(
                "ParquetValueScheme expects tuples with an arity of exactly 1, but found " + tuple.getFields());
    }

    T value = (T) tuple.getObject(0);
    OutputCollector output = sc.getOutput();
    output.collect(null, value);
}

From source file:org.apache.pig.test.utils.datagen.DataGenMapper.java

License:Apache License

public void map(LongWritable key, Text value, OutputCollector<String, String> output, Reporter reporter)
        throws IOException {
    int initialSize = colSpecs.size() * 50;

    if (!hasInput) {
        long numRows = Long.parseLong(value.toString().trim());
        // dg.numRows = numRows; //TODO??

        for (int i = 0; i < numRows; i++) {
            StringWriter str = new StringWriter(initialSize);
            PrintWriter pw = new PrintWriter(str);
            writer.writeLine(pw);/*from   ww w .jav  a  2  s .  com*/
            output.collect(null, str.toString());

            if ((i + 1) % 10000 == 0) {
                reporter.progress();
                reporter.setStatus("" + (i + 1) + " tuples generated.");
            }
        }
    } else {
        StringWriter str = new StringWriter(initialSize);
        PrintWriter pw = new PrintWriter(str);
        pw.write(value.toString());
        writer.writeLine(pw);
        output.collect(null, str.toString());
    }
}

From source file:org.apache.sysml.runtime.matrix.mapred.CSVReblockMapper.java

License:Apache License

public static IndexedBlockRow processRow(IndexedBlockRow row, String[] cells, long rowOffset, long num,
        byte outTag, int brlen, int bclen, boolean fill, double fillValue,
        OutputCollector<TaggedFirstSecondIndexes, BlockRow> out) throws IOException {
    int start = 0;
    row.getIndexes().setTag(outTag);/*from w  ww .  ja v  a 2 s  . c  o  m*/
    long rowIndex = UtilFunctions.computeBlockIndex(rowOffset + num + 1, brlen);
    row.getRow().indexInBlock = UtilFunctions.computeCellInBlock(rowOffset + num + 1, brlen);

    long col = 0;
    for (; col < cells.length / bclen; col++) {
        row.getRow().data.reset(1, bclen);
        row.getIndexes().setIndexes(rowIndex, col + 1);
        for (int k = 0; k < bclen; k++) {
            if (cells[k + start] == null || cells[k + start].isEmpty()) {
                IOUtilFunctions.checkAndRaiseErrorCSVEmptyField(null, fill, true);
                row.getRow().data.appendValue(0, k, fillValue);
            } else
                row.getRow().data.appendValue(0, k, UtilFunctions.parseToDouble(cells[k + start]));
        }
        out.collect(row.getIndexes(), row.getRow());
        start += bclen;
    }
    row.getIndexes().setIndexes(rowIndex, col + 1);
    int lastBclen = cells.length % bclen;
    if (lastBclen != 0) {
        row.getRow().data.reset(1, lastBclen);
        for (int k = 0; k < lastBclen; k++) {
            if (cells[k + start] == null || cells[k + start].isEmpty()) {
                if (!fill)
                    throw new RuntimeException(
                            "Empty fields found in the input delimited file. Use \"fill\" option to read delimited files with empty fields.");
                row.getRow().data.appendValue(0, k, fillValue);
            } else
                row.getRow().data.appendValue(0, k, UtilFunctions.parseToDouble(cells[k + start]));
        }
        out.collect(row.getIndexes(), row.getRow());
    }
    return row;
}

From source file:org.apache.sysml.runtime.matrix.mapred.CSVWriteMapper.java

License:Apache License

@Override
public void map(Writable rawKey, Writable rawValue, OutputCollector<TaggedFirstSecondIndexes, MatrixBlock> out,
        Reporter reporter) throws IOException {
    long start = System.currentTimeMillis();

    //for each represenattive matrix, read the record and apply instructions
    for (int i = 0; i < representativeMatrixes.size(); i++) {
        //convert the record into the right format for the representatice matrix
        inputConverter.setBlockSize(brlens[i], bclens[i]);
        inputConverter.convert(rawKey, rawValue);

        byte thisMatrix = representativeMatrixes.get(i);

        //apply unary instructions on the converted indexes and values
        while (inputConverter.hasNext()) {
            Pair<MatrixIndexes, MatrixBlock> pair = inputConverter.next();
            MatrixIndexes indexes = pair.getKey();

            MatrixBlock value = pair.getValue();

            outIndexes.setIndexes(indexes.getRowIndex(), indexes.getColumnIndex());
            ArrayList<Byte> outputs = inputOutputMap.get(thisMatrix);
            for (byte output : outputs) {
                outIndexes.setTag(output);
                out.collect(outIndexes, value);
                //LOG.info("Mapper output: "+outIndexes+", "+value+", tag: "+output);
            }/*w  w  w  . j  av  a2  s. co  m*/
        }
    }
    reporter.incrCounter(Counters.MAP_TIME, System.currentTimeMillis() - start);
}

From source file:org.apache.sysml.runtime.matrix.mapred.FrameReblockBuffer.java

License:Apache License

/**
 * //from   www  .  ja  v  a2s .  c  o m
 * @param out
 * @param key
 * @param block
 * @throws IOException
 */
private static void outputBlock(OutputCollector<Long, Writable> out, Long key, FrameBlock block)
        throws IOException {
    //skip output of unassigned blocks
    if (key == -1)
        return;

    //output block
    out.collect(key, block);
}

From source file:org.apache.sysml.runtime.matrix.mapred.GMRMapper.java

License:Apache License

protected void processMapOutputToReducerForGMR(int index, TaggedMatrixValue taggedValueBuffer,
        OutputCollector<Writable, Writable> out) throws IOException {
    for (byte output : outputIndexes.get(index)) {
        ArrayList<IndexedMatrixValue> results = cachedValues.get(output);
        if (results == null)
            continue;
        for (IndexedMatrixValue result : results) {
            if (result == null)
                continue;

            //prepare tagged output value
            //(special case for conversion from matrixcell to taggedmatrixpackedcell, e.g., ctable)
            if (valueClass.equals(MatrixCell.class))
                taggedValueBuffer.getBaseObject().copy(result.getValue());
            else/*  w w w  . j  av  a 2 s. c o  m*/
                taggedValueBuffer.setBaseObject(result.getValue());
            taggedValueBuffer.setTag(output);

            //collect output (exactly once)
            out.collect(result.getIndexes(), taggedValueBuffer);
        }
    }
}

From source file:org.apache.sysml.runtime.matrix.mapred.GroupedAggMRCombiner.java

License:Apache License

@Override
public void reduce(TaggedMatrixIndexes key, Iterator<WeightedCell> values,
        OutputCollector<TaggedMatrixIndexes, WeightedCell> out, Reporter reporter) throws IOException {
    long start = System.currentTimeMillis();

    //get aggregate operator
    GroupedAggregateInstruction ins = grpaggInstructions.get(key.getTag());
    Operator op = ins.getOperator();// w ww.  j a v a 2 s  .  co  m
    boolean isPartialAgg = true;

    //combine iterator to single value
    try {
        if (op instanceof CMOperator) //everything except sum
        {
            if (((CMOperator) op).isPartialAggregateOperator()) {
                cmObj.reset();
                CM lcmFn = cmFn.get(key.getTag());

                //partial aggregate cm operator 
                while (values.hasNext()) {
                    WeightedCell value = values.next();
                    lcmFn.execute(cmObj, value.getValue(), value.getWeight());
                }

                outCell.setValue(cmObj.getRequiredPartialResult(op));
                outCell.setWeight(cmObj.getWeight());
            } else //forward tuples to reducer
            {
                isPartialAgg = false;
                while (values.hasNext())
                    out.collect(key, values.next());
            }
        } else if (op instanceof AggregateOperator) //sum
        {
            AggregateOperator aggop = (AggregateOperator) op;

            if (aggop.correctionExists) {
                KahanObject buffer = new KahanObject(aggop.initialValue, 0);

                KahanPlus.getKahanPlusFnObject();

                //partial aggregate with correction
                while (values.hasNext()) {
                    WeightedCell value = values.next();
                    aggop.increOp.fn.execute(buffer, value.getValue() * value.getWeight());
                }

                outCell.setValue(buffer._sum);
                outCell.setWeight(1);
            } else //no correction
            {
                double v = aggop.initialValue;

                //partial aggregate without correction
                while (values.hasNext()) {
                    WeightedCell value = values.next();
                    v = aggop.increOp.fn.execute(v, value.getValue() * value.getWeight());
                }

                outCell.setValue(v);
                outCell.setWeight(1);
            }
        } else
            throw new IOException("Unsupported operator in instruction: " + ins);
    } catch (Exception ex) {
        throw new IOException(ex);
    }

    //collect the output (to reducer)
    if (isPartialAgg)
        out.collect(key, outCell);

    reporter.incrCounter(Counters.COMBINE_OR_REDUCE_TIME, System.currentTimeMillis() - start);
}

From source file:org.apache.sysml.runtime.matrix.mapred.GroupedAggMRMapper.java

License:Apache License

@Override
public void map(MatrixIndexes key, MatrixValue value, OutputCollector<TaggedMatrixIndexes, WeightedCell> out,
        Reporter reporter) throws IOException {
    for (int k = 0; k < representativeMatrixes.size(); k++)
        for (GroupedAggregateInstruction ins : groupAgg_instructions.get(k)) {
            //set the tag once for the block
            outKey.setTag(ins.output);//from  w  w w . ja v a  2  s  . c om

            //get block and unroll into weighted cells
            //(it will be in dense format)
            MatrixBlock block = (MatrixBlock) value;

            int rlen = block.getNumRows();
            int clen = block.getNumColumns();
            if (!ins.hasWeights()) //w/o weights (input vector or matrix)
            {
                long coloff = (key.getColumnIndex() - 1) * ins.getBclen();

                //local pre-aggregation for sum w/ known output dimensions
                if (ins.getOperator() instanceof AggregateOperator && ins.getNGroups() > 0
                        && OptimizerUtils.isValidCPDimensions(ins.getNGroups(), block.getNumColumns() - 1)) {
                    try {
                        MatrixBlock group = block.sliceOperations(0, block.getNumRows() - 1,
                                block.getNumColumns() - 1, block.getNumColumns() - 1, new MatrixBlock());
                        MatrixBlock target = block.sliceOperations(0, block.getNumRows() - 1, 0,
                                block.getNumColumns() - 2, new MatrixBlock());

                        MatrixBlock tmp = group.groupedAggOperations(target, null, new MatrixBlock(),
                                ins.getNGroups(), ins.getOperator());

                        for (int i = 0; i < tmp.getNumRows(); i++) {
                            for (int j = 0; j < tmp.getNumColumns(); j++) {
                                double tmpval = tmp.quickGetValue(i, j);
                                if (tmpval != 0) {
                                    outKeyValue.setIndexes(i + 1, coloff + j + 1);
                                    outValue.setValue(tmpval);
                                    outValue.setWeight(1);
                                    out.collect(outKey, outValue);
                                }
                            }
                        }
                    } catch (Exception ex) {
                        throw new IOException(ex);
                    }
                }
                //general case without pre-aggregation
                else {
                    for (int r = 0; r < rlen; r++) {
                        int group = (int) block.quickGetValue(r, clen - 1);
                        for (int c = 0; c < clen - 1; c++) {
                            outKeyValue.setIndexes(group, coloff + c + 1);
                            outValue.setValue(block.quickGetValue(r, c));
                            outValue.setWeight(1);
                            out.collect(outKey, outValue);
                        }
                    }
                }
            } else //w/ weights (input vector)
            {
                for (int r = 0; r < rlen; r++) {
                    outKeyValue.setIndexes((int) block.quickGetValue(r, 1), 1);
                    outValue.setValue(block.quickGetValue(r, 0));
                    outValue.setWeight(block.quickGetValue(r, 2));
                    out.collect(outKey, outValue);
                }
            }
        }
}

From source file:org.apache.sysml.runtime.matrix.mapred.ReblockBuffer.java

License:Apache License

public void flushBuffer(byte index, OutputCollector<Writable, Writable> out) throws IOException {
    if (_count == 0)
        return;//from   w  w w .j a  va2  s.c o  m

    //Step 1) sort reblock buffer (blockwise, no in-block sorting!)
    Arrays.sort(_buff, 0, _count, new ReblockBufferComparator());

    //Step 2) scan for number of created blocks
    long numBlocks = 0; //number of blocks in buffer
    long cbi = -1, cbj = -1; //current block indexes
    for (int i = 0; i < _count; i++) {
        long bi = UtilFunctions.computeBlockIndex(_buff[i][0], _brlen);
        long bj = UtilFunctions.computeBlockIndex(_buff[i][1], _bclen);

        //switch to next block
        if (bi != cbi || bj != cbj) {
            cbi = bi;
            cbj = bj;
            numBlocks++;
        }
    }

    //Step 3) decide on intermediate representation (for entire buffer)
    //decision based on binarycell vs binaryblock_ultrasparse (worstcase)
    long blockedSize = 16 * numBlocks + 16 * _count; //<long,long>,#<int,int,double>
    long cellSize = 24 * _count; //#<long,long>,<double>
    boolean blocked = (blockedSize <= cellSize);

    //Step 4) output blocks / binary cell (one-at-a-time)
    TaggedAdaptivePartialBlock outTVal = new TaggedAdaptivePartialBlock();
    AdaptivePartialBlock outVal = new AdaptivePartialBlock();
    MatrixIndexes tmpIx = new MatrixIndexes();
    outTVal.setTag(index);
    outTVal.setBaseObject(outVal); //setup wrapper writables
    if (blocked) //output binaryblock
    {
        //create intermediate blocks
        boolean sparse = MatrixBlock.evalSparseFormatInMemory(_brlen, _bclen, _count / numBlocks);
        MatrixBlock tmpBlock = new MatrixBlock();

        //put values into block and output
        cbi = -1;
        cbj = -1; //current block indexes
        for (int i = 0; i < _count; i++) {
            long bi = UtilFunctions.computeBlockIndex(_buff[i][0], _brlen);
            long bj = UtilFunctions.computeBlockIndex(_buff[i][1], _bclen);

            //output block and switch to next index pair
            if (bi != cbi || bj != cbj) {
                outputBlock(out, tmpIx, outTVal, tmpBlock);
                cbi = bi;
                cbj = bj;
                tmpIx.setIndexes(bi, bj);
                tmpBlock.reset(Math.min(_brlen, (int) (_rlen - (bi - 1) * _brlen)),
                        Math.min(_bclen, (int) (_clen - (bj - 1) * _bclen)), sparse);
            }

            int ci = UtilFunctions.computeCellInBlock(_buff[i][0], _brlen);
            int cj = UtilFunctions.computeCellInBlock(_buff[i][1], _bclen);
            double tmp = Double.longBitsToDouble(_buff[i][2]);
            tmpBlock.appendValue(ci, cj, tmp);
        }

        //output last block 
        outputBlock(out, tmpIx, outTVal, tmpBlock);
    } else //output binarycell
    {
        PartialBlock tmpVal = new PartialBlock();
        outVal.set(tmpVal);
        for (int i = 0; i < _count; i++) {
            long bi = UtilFunctions.computeBlockIndex(_buff[i][0], _brlen);
            long bj = UtilFunctions.computeBlockIndex(_buff[i][1], _bclen);
            int ci = UtilFunctions.computeCellInBlock(_buff[i][0], _brlen);
            int cj = UtilFunctions.computeCellInBlock(_buff[i][1], _bclen);
            double tmp = Double.longBitsToDouble(_buff[i][2]);
            tmpIx.setIndexes(bi, bj);
            tmpVal.set(ci, cj, tmp); //in outVal, in outTVal
            out.collect(tmpIx, outTVal);
        }
    }

    _count = 0;
}

From source file:org.apache.sysml.runtime.matrix.mapred.ReblockBuffer.java

License:Apache License

private static void outputBlock(OutputCollector<Writable, Writable> out, MatrixIndexes key,
        TaggedAdaptivePartialBlock value, MatrixBlock block) throws IOException {
    //skip output of unassigned blocks
    if (key.getRowIndex() == -1 || key.getColumnIndex() == -1)
        return;//from www  . ja  v a  2s  . c  o  m

    //sort sparse rows due to blockwise buffer sort and append  
    if (block.isInSparseFormat())
        block.sortSparseRows();

    //output block
    value.getBaseObject().set(block);
    out.collect(key, value);
}