Example usage for org.apache.hadoop.mapred OutputCollector collect

List of usage examples for org.apache.hadoop.mapred OutputCollector collect

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred OutputCollector collect.

Prototype

void collect(K key, V value) throws IOException;

Source Link

Document

Adds a key/value pair to the output.

Usage

From source file:org.apache.parquet.cascading.ParquetValueScheme.java

License:Apache License

@SuppressWarnings("unchecked")
@Override//ww w .  j av  a 2 s  . co  m
public void sink(FlowProcess<JobConf> fp, SinkCall<Object[], OutputCollector> sc) throws IOException {
    TupleEntry tuple = sc.getOutgoingEntry();

    if (tuple.size() != 1) {
        throw new RuntimeException(
                "ParquetValueScheme expects tuples with an arity of exactly 1, but found " + tuple.getFields());
    }

    T value = (T) tuple.getObject(0);
    OutputCollector output = sc.getOutput();
    output.collect(null, value);
}

From source file:org.apache.pig.test.utils.datagen.DataGenMapper.java

License:Apache License

public void map(LongWritable key, Text value, OutputCollector<String, String> output, Reporter reporter)
        throws IOException {
    int initialSize = colSpecs.size() * 50;

    if (!hasInput) {
        long numRows = Long.parseLong(value.toString().trim());
        // dg.numRows = numRows; //TODO??

        for (int i = 0; i < numRows; i++) {
            StringWriter str = new StringWriter(initialSize);
            PrintWriter pw = new PrintWriter(str);
            writer.writeLine(pw);/*from   ww w .jav  a  2  s .  com*/
            output.collect(null, str.toString());

            if ((i + 1) % 10000 == 0) {
                reporter.progress();
                reporter.setStatus("" + (i + 1) + " tuples generated.");
            }
        }
    } else {
        StringWriter str = new StringWriter(initialSize);
        PrintWriter pw = new PrintWriter(str);
        pw.write(value.toString());
        writer.writeLine(pw);
        output.collect(null, str.toString());
    }
}

From source file:org.apache.sysml.runtime.matrix.mapred.CSVReblockMapper.java

License:Apache License

public static IndexedBlockRow processRow(IndexedBlockRow row, String[] cells, long rowOffset, long num,
        byte outTag, int brlen, int bclen, boolean fill, double fillValue,
        OutputCollector<TaggedFirstSecondIndexes, BlockRow> out) throws IOException {
    int start = 0;
    row.getIndexes().setTag(outTag);/*from w  ww .  ja v  a 2 s  . c  o  m*/
    long rowIndex = UtilFunctions.computeBlockIndex(rowOffset + num + 1, brlen);
    row.getRow().indexInBlock = UtilFunctions.computeCellInBlock(rowOffset + num + 1, brlen);

    long col = 0;
    for (; col < cells.length / bclen; col++) {
        row.getRow().data.reset(1, bclen);
        row.getIndexes().setIndexes(rowIndex, col + 1);
        for (int k = 0; k < bclen; k++) {
            if (cells[k + start] == null || cells[k + start].isEmpty()) {
                IOUtilFunctions.checkAndRaiseErrorCSVEmptyField(null, fill, true);
                row.getRow().data.appendValue(0, k, fillValue);
            } else
                row.getRow().data.appendValue(0, k, UtilFunctions.parseToDouble(cells[k + start]));
        }
        out.collect(row.getIndexes(), row.getRow());
        start += bclen;
    }
    row.getIndexes().setIndexes(rowIndex, col + 1);
    int lastBclen = cells.length % bclen;
    if (lastBclen != 0) {
        row.getRow().data.reset(1, lastBclen);
        for (int k = 0; k < lastBclen; k++) {
            if (cells[k + start] == null || cells[k + start].isEmpty()) {
                if (!fill)
                    throw new RuntimeException(
                            "Empty fields found in the input delimited file. Use \"fill\" option to read delimited files with empty fields.");
                row.getRow().data.appendValue(0, k, fillValue);
            } else
                row.getRow().data.appendValue(0, k, UtilFunctions.parseToDouble(cells[k + start]));
        }
        out.collect(row.getIndexes(), row.getRow());
    }
    return row;
}

From source file:org.apache.sysml.runtime.matrix.mapred.CSVWriteMapper.java

License:Apache License

@Override
public void map(Writable rawKey, Writable rawValue, OutputCollector<TaggedFirstSecondIndexes, MatrixBlock> out,
        Reporter reporter) throws IOException {
    long start = System.currentTimeMillis();

    //for each represenattive matrix, read the record and apply instructions
    for (int i = 0; i < representativeMatrixes.size(); i++) {
        //convert the record into the right format for the representatice matrix
        inputConverter.setBlockSize(brlens[i], bclens[i]);
        inputConverter.convert(rawKey, rawValue);

        byte thisMatrix = representativeMatrixes.get(i);

        //apply unary instructions on the converted indexes and values
        while (inputConverter.hasNext()) {
            Pair<MatrixIndexes, MatrixBlock> pair = inputConverter.next();
            MatrixIndexes indexes = pair.getKey();

            MatrixBlock value = pair.getValue();

            outIndexes.setIndexes(indexes.getRowIndex(), indexes.getColumnIndex());
            ArrayList<Byte> outputs = inputOutputMap.get(thisMatrix);
            for (byte output : outputs) {
                outIndexes.setTag(output);
                out.collect(outIndexes, value);
                //LOG.info("Mapper output: "+outIndexes+", "+value+", tag: "+output);
            }/*w  w  w  . j  av  a2  s. co  m*/
        }
    }
    reporter.incrCounter(Counters.MAP_TIME, System.currentTimeMillis() - start);
}

From source file:org.apache.sysml.runtime.matrix.mapred.FrameReblockBuffer.java

License:Apache License

/**
 * //from   www  .  ja  v  a2s .  c  o m
 * @param out
 * @param key
 * @param block
 * @throws IOException
 */
private static void outputBlock(OutputCollector<Long, Writable> out, Long key, FrameBlock block)
        throws IOException {
    //skip output of unassigned blocks
    if (key == -1)
        return;

    //output block
    out.collect(key, block);
}

From source file:org.apache.sysml.runtime.matrix.mapred.GMRMapper.java

License:Apache License

protected void processMapOutputToReducerForGMR(int index, TaggedMatrixValue taggedValueBuffer,
        OutputCollector<Writable, Writable> out) throws IOException {
    for (byte output : outputIndexes.get(index)) {
        ArrayList<IndexedMatrixValue> results = cachedValues.get(output);
        if (results == null)
            continue;
        for (IndexedMatrixValue result : results) {
            if (result == null)
                continue;

            //prepare tagged output value
            //(special case for conversion from matrixcell to taggedmatrixpackedcell, e.g., ctable)
            if (valueClass.equals(MatrixCell.class))
                taggedValueBuffer.getBaseObject().copy(result.getValue());
            else/*  w w w  . j  av  a 2 s. c o  m*/
                taggedValueBuffer.setBaseObject(result.getValue());
            taggedValueBuffer.setTag(output);

            //collect output (exactly once)
            out.collect(result.getIndexes(), taggedValueBuffer);
        }
    }
}

From source file:org.apache.sysml.runtime.matrix.mapred.GroupedAggMRCombiner.java

License:Apache License

@Override
public void reduce(TaggedMatrixIndexes key, Iterator<WeightedCell> values,
        OutputCollector<TaggedMatrixIndexes, WeightedCell> out, Reporter reporter) throws IOException {
    long start = System.currentTimeMillis();

    //get aggregate operator
    GroupedAggregateInstruction ins = grpaggInstructions.get(key.getTag());
    Operator op = ins.getOperator();// w ww.  j a v a 2 s  .  co  m
    boolean isPartialAgg = true;

    //combine iterator to single value
    try {
        if (op instanceof CMOperator) //everything except sum
        {
            if (((CMOperator) op).isPartialAggregateOperator()) {
                cmObj.reset();
                CM lcmFn = cmFn.get(key.getTag());

                //partial aggregate cm operator 
                while (values.hasNext()) {
                    WeightedCell value = values.next();
                    lcmFn.execute(cmObj, value.getValue(), value.getWeight());
                }

                outCell.setValue(cmObj.getRequiredPartialResult(op));
                outCell.setWeight(cmObj.getWeight());
            } else //forward tuples to reducer
            {
                isPartialAgg = false;
                while (values.hasNext())
                    out.collect(key, values.next());
            }
        } else if (op instanceof AggregateOperator) //sum
        {
            AggregateOperator aggop = (AggregateOperator) op;

            if (aggop.correctionExists) {
                KahanObject buffer = new KahanObject(aggop.initialValue, 0);

                KahanPlus.getKahanPlusFnObject();

                //partial aggregate with correction
                while (values.hasNext()) {
                    WeightedCell value = values.next();
                    aggop.increOp.fn.execute(buffer, value.getValue() * value.getWeight());
                }

                outCell.setValue(buffer._sum);
                outCell.setWeight(1);
            } else //no correction
            {
                double v = aggop.initialValue;

                //partial aggregate without correction
                while (values.hasNext()) {
                    WeightedCell value = values.next();
                    v = aggop.increOp.fn.execute(v, value.getValue() * value.getWeight());
                }

                outCell.setValue(v);
                outCell.setWeight(1);
            }
        } else
            throw new IOException("Unsupported operator in instruction: " + ins);
    } catch (Exception ex) {
        throw new IOException(ex);
    }

    //collect the output (to reducer)
    if (isPartialAgg)
        out.collect(key, outCell);

    reporter.incrCounter(Counters.COMBINE_OR_REDUCE_TIME, System.currentTimeMillis() - start);
}

From source file:org.apache.sysml.runtime.matrix.mapred.GroupedAggMRMapper.java

License:Apache License

@Override
public void map(MatrixIndexes key, MatrixValue value, OutputCollector<TaggedMatrixIndexes, WeightedCell> out,
        Reporter reporter) throws IOException {
    for (int k = 0; k < representativeMatrixes.size(); k++)
        for (GroupedAggregateInstruction ins : groupAgg_instructions.get(k)) {
            //set the tag once for the block
            outKey.setTag(ins.output);//from  w  w w . ja v a  2  s  . c om

            //get block and unroll into weighted cells
            //(it will be in dense format)
            MatrixBlock block = (MatrixBlock) value;

            int rlen = block.getNumRows();
            int clen = block.getNumColumns();
            if (!ins.hasWeights()) //w/o weights (input vector or matrix)
            {
                long coloff = (key.getColumnIndex() - 1) * ins.getBclen();

                //local pre-aggregation for sum w/ known output dimensions
                if (ins.getOperator() instanceof AggregateOperator && ins.getNGroups() > 0
                        && OptimizerUtils.isValidCPDimensions(ins.getNGroups(), block.getNumColumns() - 1)) {
                    try {
                        MatrixBlock group = block.sliceOperations(0, block.getNumRows() - 1,
                                block.getNumColumns() - 1, block.getNumColumns() - 1, new MatrixBlock());
                        MatrixBlock target = block.sliceOperations(0, block.getNumRows() - 1, 0,
                                block.getNumColumns() - 2, new MatrixBlock());

                        MatrixBlock tmp = group.groupedAggOperations(target, null, new MatrixBlock(),
                                ins.getNGroups(), ins.getOperator());

                        for (int i = 0; i < tmp.getNumRows(); i++) {
                            for (int j = 0; j < tmp.getNumColumns(); j++) {
                                double tmpval = tmp.quickGetValue(i, j);
                                if (tmpval != 0) {
                                    outKeyValue.setIndexes(i + 1, coloff + j + 1);
                                    outValue.setValue(tmpval);
                                    outValue.setWeight(1);
                                    out.collect(outKey, outValue);
                                }
                            }
                        }
                    } catch (Exception ex) {
                        throw new IOException(ex);
                    }
                }
                //general case without pre-aggregation
                else {
                    for (int r = 0; r < rlen; r++) {
                        int group = (int) block.quickGetValue(r, clen - 1);
                        for (int c = 0; c < clen - 1; c++) {
                            outKeyValue.setIndexes(group, coloff + c + 1);
                            outValue.setValue(block.quickGetValue(r, c));
                            outValue.setWeight(1);
                            out.collect(outKey, outValue);
                        }
                    }
                }
            } else //w/ weights (input vector)
            {
                for (int r = 0; r < rlen; r++) {
                    outKeyValue.setIndexes((int) block.quickGetValue(r, 1), 1);
                    outValue.setValue(block.quickGetValue(r, 0));
                    outValue.setWeight(block.quickGetValue(r, 2));
                    out.collect(outKey, outValue);
                }
            }
        }
}

From source file:org.apache.sysml.runtime.matrix.mapred.ReblockBuffer.java

License:Apache License

public void flushBuffer(byte index, OutputCollector<Writable, Writable> out) throws IOException {
    if (_count == 0)
        return;//from   w  w w .j a  va2  s.c o  m

    //Step 1) sort reblock buffer (blockwise, no in-block sorting!)
    Arrays.sort(_buff, 0, _count, new ReblockBufferComparator());

    //Step 2) scan for number of created blocks
    long numBlocks = 0; //number of blocks in buffer
    long cbi = -1, cbj = -1; //current block indexes
    for (int i = 0; i < _count; i++) {
        long bi = UtilFunctions.computeBlockIndex(_buff[i][0], _brlen);
        long bj = UtilFunctions.computeBlockIndex(_buff[i][1], _bclen);

        //switch to next block
        if (bi != cbi || bj != cbj) {
            cbi = bi;
            cbj = bj;
            numBlocks++;
        }
    }

    //Step 3) decide on intermediate representation (for entire buffer)
    //decision based on binarycell vs binaryblock_ultrasparse (worstcase)
    long blockedSize = 16 * numBlocks + 16 * _count; //<long,long>,#<int,int,double>
    long cellSize = 24 * _count; //#<long,long>,<double>
    boolean blocked = (blockedSize <= cellSize);

    //Step 4) output blocks / binary cell (one-at-a-time)
    TaggedAdaptivePartialBlock outTVal = new TaggedAdaptivePartialBlock();
    AdaptivePartialBlock outVal = new AdaptivePartialBlock();
    MatrixIndexes tmpIx = new MatrixIndexes();
    outTVal.setTag(index);
    outTVal.setBaseObject(outVal); //setup wrapper writables
    if (blocked) //output binaryblock
    {
        //create intermediate blocks
        boolean sparse = MatrixBlock.evalSparseFormatInMemory(_brlen, _bclen, _count / numBlocks);
        MatrixBlock tmpBlock = new MatrixBlock();

        //put values into block and output
        cbi = -1;
        cbj = -1; //current block indexes
        for (int i = 0; i < _count; i++) {
            long bi = UtilFunctions.computeBlockIndex(_buff[i][0], _brlen);
            long bj = UtilFunctions.computeBlockIndex(_buff[i][1], _bclen);

            //output block and switch to next index pair
            if (bi != cbi || bj != cbj) {
                outputBlock(out, tmpIx, outTVal, tmpBlock);
                cbi = bi;
                cbj = bj;
                tmpIx.setIndexes(bi, bj);
                tmpBlock.reset(Math.min(_brlen, (int) (_rlen - (bi - 1) * _brlen)),
                        Math.min(_bclen, (int) (_clen - (bj - 1) * _bclen)), sparse);
            }

            int ci = UtilFunctions.computeCellInBlock(_buff[i][0], _brlen);
            int cj = UtilFunctions.computeCellInBlock(_buff[i][1], _bclen);
            double tmp = Double.longBitsToDouble(_buff[i][2]);
            tmpBlock.appendValue(ci, cj, tmp);
        }

        //output last block 
        outputBlock(out, tmpIx, outTVal, tmpBlock);
    } else //output binarycell
    {
        PartialBlock tmpVal = new PartialBlock();
        outVal.set(tmpVal);
        for (int i = 0; i < _count; i++) {
            long bi = UtilFunctions.computeBlockIndex(_buff[i][0], _brlen);
            long bj = UtilFunctions.computeBlockIndex(_buff[i][1], _bclen);
            int ci = UtilFunctions.computeCellInBlock(_buff[i][0], _brlen);
            int cj = UtilFunctions.computeCellInBlock(_buff[i][1], _bclen);
            double tmp = Double.longBitsToDouble(_buff[i][2]);
            tmpIx.setIndexes(bi, bj);
            tmpVal.set(ci, cj, tmp); //in outVal, in outTVal
            out.collect(tmpIx, outTVal);
        }
    }

    _count = 0;
}

From source file:org.apache.sysml.runtime.matrix.mapred.ReblockBuffer.java

License:Apache License

private static void outputBlock(OutputCollector<Writable, Writable> out, MatrixIndexes key,
        TaggedAdaptivePartialBlock value, MatrixBlock block) throws IOException {
    //skip output of unassigned blocks
    if (key.getRowIndex() == -1 || key.getColumnIndex() == -1)
        return;//from www  . ja  v a  2s  . c  o  m

    //sort sparse rows due to blockwise buffer sort and append  
    if (block.isInSparseFormat())
        block.sortSparseRows();

    //output block
    value.getBaseObject().set(block);
    out.collect(key, value);
}