Example usage for org.apache.hadoop.io LongWritable get

List of usage examples for org.apache.hadoop.io LongWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io LongWritable get.

Prototype

public long get() 

Source Link

Document

Return the value of this LongWritable.

Usage

From source file:org.apache.pirk.responder.wideskies.mapreduce.FinalResponseReducer.java

License:Apache License

@Override
public void reduce(LongWritable colNum, Iterable<Text> colVals, Context ctx)
        throws IOException, InterruptedException {
    logger.debug("Processing reducer for colNum = " + colNum.toString());
    ctx.getCounter(MRStats.NUM_COLUMNS).increment(1);

    BigInteger column = null;/*  ww  w.ja  va 2  s.  c o m*/
    for (Text val : colVals) // there is only one column value
    {
        column = new BigInteger(val.toString());
        logger.debug("colNum = " + (int) colNum.get() + " column = " + column.toString());
    }
    response.addElement((int) colNum.get(), column);
}

From source file:org.apache.rya.prospector.plans.impl.CountPlan.java

License:Apache License

@Override
public Collection<Map.Entry<IntermediateProspect, LongWritable>> combine(final IntermediateProspect prospect,
        final Iterable<LongWritable> counts) {
    long sum = 0;
    for (final LongWritable count : counts) {
        sum += count.get();
    }/* ww w. j  a  va  2 s.  c om*/
    return Collections
            .singleton(new CustomEntry<IntermediateProspect, LongWritable>(prospect, new LongWritable(sum)));
}

From source file:org.apache.rya.prospector.plans.impl.CountPlan.java

License:Apache License

@Override
public void reduce(final IntermediateProspect prospect, final Iterable<LongWritable> counts,
        final Date timestamp, final Reducer.Context context) throws IOException, InterruptedException {
    long sum = 0;
    for (final LongWritable count : counts) {
        sum += count.get();
    }/*from w ww .j  a  va 2s .  co m*/

    final String indexType = prospect.getTripleValueType().getIndexType();

    // not sure if this is the best idea..
    if ((sum >= 0) || indexType.equals(TripleValueType.PREDICATE.getIndexType())) {
        final Mutation m = new Mutation(indexType + DELIM + prospect.getData() + DELIM
                + ProspectorUtils.getReverseIndexDateTime(timestamp));

        final String dataType = prospect.getDataType();
        final ColumnVisibility visibility = new ColumnVisibility(prospect.getVisibility());
        final Value sumValue = new Value(("" + sum).getBytes(StandardCharsets.UTF_8));
        m.put(COUNT, prospect.getDataType(), visibility, timestamp.getTime(), sumValue);

        context.write(null, m);
    }
}

From source file:org.apache.sysml.runtime.controlprogram.parfor.RemoteDPParForMR.java

License:Apache License

/**
 * Result file contains hierarchy of workerID-resultvar(incl filename). We deduplicate
 * on the workerID. Without JVM reuse each task refers to a unique workerID, so we
 * will not find any duplicates. With JVM reuse, however, each slot refers to a workerID, 
 * and there are duplicate filenames due to partial aggregation and overwrite of fname 
 * (the RemoteParWorkerMapper ensures uniqueness of those files independent of the 
 * runtime implementation). //w w w.  j av a 2  s  . c  o  m
 * 
 * @param job job configuration
 * @param fname file name
 * @return array of local variable maps
 * @throws DMLRuntimeException if DMLRuntimeException occurs
 * @throws IOException if IOException occurs
 */
@SuppressWarnings("deprecation")
public static LocalVariableMap[] readResultFile(JobConf job, String fname)
        throws DMLRuntimeException, IOException {
    HashMap<Long, LocalVariableMap> tmp = new HashMap<>();

    Path path = new Path(fname);
    FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
    LongWritable key = new LongWritable(); //workerID
    Text value = new Text(); //serialized var header (incl filename)

    int countAll = 0;
    for (Path lpath : IOUtilFunctions.getSequenceFilePaths(fs, path)) {
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);
        try {
            while (reader.next(key, value)) {
                if (!tmp.containsKey(key.get()))
                    tmp.put(key.get(), new LocalVariableMap());
                Object[] dat = ProgramConverter.parseDataObject(value.toString());
                tmp.get(key.get()).put((String) dat[0], (Data) dat[1]);
                countAll++;
            }
        } finally {
            IOUtilFunctions.closeSilently(reader);
        }
    }

    LOG.debug("Num remote worker results (before deduplication): " + countAll);
    LOG.debug("Num remote worker results: " + tmp.size());

    //create return array
    return tmp.values().toArray(new LocalVariableMap[0]);
}

From source file:org.apache.sysml.runtime.controlprogram.parfor.RemoteDPParWorkerReducer.java

License:Apache License

@Override
public void reduce(LongWritable key, Iterator<Writable> valueList, OutputCollector<Writable, Writable> out,
        Reporter reporter) throws IOException {
    //cache collector/reporter (for write in close)
    _out = out;/*from   w  w  w.  j  a v  a2  s .c  o  m*/
    _report = reporter;

    //collect input partition
    if (_info == OutputInfo.BinaryBlockOutputInfo)
        _partition = collectBinaryBlock(valueList);
    else
        _partition = collectBinaryCellInput(valueList);

    //execute program
    LOG.trace("execute RemoteDPParWorkerReducer " + _stringID + " (" + _workerID + ")");
    try {
        //update in-memory matrix partition
        MatrixObject mo = _ec.getMatrixObject(_inputVar);
        mo.setInMemoryPartition(_partition);

        //create tasks for input data
        Task lTask = new Task(_iterVar, TaskType.SET);
        lTask.addIteration(new IntObject(key.get()));

        //execute program
        executeTask(lTask);
    } catch (Exception ex) {
        throw new IOException("ParFOR: Failed to execute task.", ex);
    }

    //statistic maintenance (after final export)
    RemoteParForUtils.incrementParForMRCounters(_report, 1, 1);
}

From source file:org.apache.sysml.runtime.controlprogram.parfor.RemoteParForMR.java

License:Apache License

/**
 * Result file contains hierarchy of workerID-resultvar(incl filename). We deduplicate
 * on the workerID. Without JVM reuse each task refers to a unique workerID, so we
 * will not find any duplicates. With JVM reuse, however, each slot refers to a workerID, 
 * and there are duplicate filenames due to partial aggregation and overwrite of fname 
 * (the RemoteParWorkerMapper ensures uniqueness of those files independent of the 
 * runtime implementation). /*w ww.  ja v  a  2 s. c  om*/
 * 
 * @param job job configuration
 * @param fname file name
 * @return array of local variable maps
 * @throws DMLRuntimeException if DMLRuntimeException occurs
 * @throws IOException if IOException occurs
 */
@SuppressWarnings("deprecation")
public static LocalVariableMap[] readResultFile(JobConf job, String fname)
        throws DMLRuntimeException, IOException {
    HashMap<Long, LocalVariableMap> tmp = new HashMap<>();

    Path path = new Path(fname);
    FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
    LongWritable key = new LongWritable(); //workerID
    Text value = new Text(); //serialized var header (incl filename)

    int countAll = 0;
    for (Path lpath : IOUtilFunctions.getSequenceFilePaths(fs, path)) {
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);
        try {
            while (reader.next(key, value)) {
                //System.out.println("key="+key.get()+", value="+value.toString());
                if (!tmp.containsKey(key.get()))
                    tmp.put(key.get(), new LocalVariableMap());
                Object[] dat = ProgramConverter.parseDataObject(value.toString());
                tmp.get(key.get()).put((String) dat[0], (Data) dat[1]);
                countAll++;
            }
        } finally {
            IOUtilFunctions.closeSilently(reader);
        }
    }

    LOG.debug("Num remote worker results (before deduplication): " + countAll);
    LOG.debug("Num remote worker results: " + tmp.size());

    //create return array
    return tmp.values().toArray(new LocalVariableMap[0]);
}

From source file:org.apache.sysml.runtime.io.FrameReaderBinaryBlock.java

License:Apache License

@SuppressWarnings({ "deprecation" })
protected static void readBinaryBlockFrameFromSequenceFile(Path path, JobConf job, FileSystem fs,
        FrameBlock dest) throws IOException, DMLRuntimeException {
    int rlen = dest.getNumRows();
    int clen = dest.getNumColumns();

    //directly read from sequence files (individual partfiles)
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, job);
    LongWritable key = new LongWritable(-1L);
    FrameBlock value = new FrameBlock();

    try {//www .  ja v  a 2  s. co  m
        while (reader.next(key, value)) {
            int row_offset = (int) (key.get() - 1);
            int rows = value.getNumRows();
            int cols = value.getNumColumns();

            if (rows == 0 || cols == 0) //Empty block, ignore it.
                continue;

            //bound check per block
            if (row_offset + rows < 0 || row_offset + rows > rlen) {
                throw new IOException("Frame block [" + (row_offset + 1) + ":" + (row_offset + rows) + "," + ":"
                        + "] " + "out of overall frame range [1:" + rlen + ",1:" + clen + "].");
            }

            //copy block into target frame, incl meta on first
            dest.copy(row_offset, row_offset + rows - 1, 0, cols - 1, value);
            if (row_offset == 0)
                dest.setColumnMetadata(value.getColumnMetadata());
        }
    } finally {
        IOUtilFunctions.closeSilently(reader);
    }
}

From source file:org.apache.sysml.runtime.matrix.mapred.CSVAssignRowIDMapper.java

License:Apache License

@Override
public void map(LongWritable key, Text value, OutputCollector<ByteWritable, OffsetCount> out, Reporter report)
        throws IOException {
    if (first) {/*w w  w  .ja  v a2  s  .  c  o  m*/
        first = false;
        fileOffset = key.get();
        outCache = out;
    }

    //getting the number of colums
    if (key.get() == 0 && headerFile) {
        if (!ignoreFirstLine) {
            report.incrCounter(CSVReblockMR.NUM_COLS_IN_MATRIX, outKey.toString(),
                    value.toString().split(delim, -1).length);
            num++;
        } else
            realFirstLine = true;
    } else {
        if (realFirstLine) {
            report.incrCounter(CSVReblockMR.NUM_COLS_IN_MATRIX, outKey.toString(),
                    value.toString().split(delim, -1).length);
            realFirstLine = false;
        }
        num++;
    }
}

From source file:org.apache.tez.mapreduce.input.TestMultiMRInput.java

License:Apache License

public static LinkedHashMap<LongWritable, Text> createInputData(FileSystem fs, Path workDir, JobConf job,
        String filename, long startKey, long numKeys) throws IOException {
    LinkedHashMap<LongWritable, Text> data = new LinkedHashMap<LongWritable, Text>();
    Path file = new Path(workDir, filename);
    LOG.info("Generating data at path: " + file);
    // create a file with length entries
    @SuppressWarnings("deprecation")
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, file, LongWritable.class, Text.class);
    try {//from   w w  w  .  j  ava2  s .  co m
        Random r = new Random(System.currentTimeMillis());
        LongWritable key = new LongWritable();
        Text value = new Text();
        for (long i = startKey; i < numKeys; i++) {
            key.set(i);
            value.set(Integer.toString(r.nextInt(10000)));
            data.put(new LongWritable(key.get()), new Text(value.toString()));
            writer.append(key, value);
            LOG.info("<k, v> : <" + key.get() + ", " + value + ">");
        }
    } finally {
        writer.close();
    }
    return data;
}

From source file:org.apache.tez.mapreduce.processor.map.TestMapProcessor.java

License:Apache License

@Test(timeout = 5000)
public void testMapProcessor() throws Exception {
    String dagName = "mrdag0";
    String vertexName = MultiStageMRConfigUtil.getInitialMapVertexName();
    JobConf jobConf = new JobConf(defaultConf);
    setUpJobConf(jobConf);//from  w  w  w. j  ava 2s .  c om

    MRHelpers.translateMRConfToTez(jobConf);
    jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);

    jobConf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false);

    jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR,
            new Path(workDir, "localized-resources").toUri().toString());

    Path mapInput = new Path(workDir, "map0");

    MapUtils.generateInputSplit(localFs, workDir, jobConf, mapInput);

    InputSpec mapInputSpec = new InputSpec("NullSrcVertex",
            InputDescriptor.create(MRInputLegacy.class.getName())
                    .setUserPayload(UserPayload.create(ByteBuffer.wrap(MRRuntimeProtos.MRInputUserPayloadProto
                            .newBuilder().setConfigurationBytes(TezUtils.createByteStringFromConf(jobConf))
                            .build().toByteArray()))),
            1);
    OutputSpec mapOutputSpec = new OutputSpec("NullDestVertex",
            OutputDescriptor.create(OrderedPartitionedKVOutput.class.getName())
                    .setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)),
            1);

    LogicalIOProcessorRuntimeTask task = MapUtils.createLogicalTask(localFs, workDir, jobConf, 0,
            new Path(workDir, "map0"), new TestUmbilical(), dagName, vertexName,
            Collections.singletonList(mapInputSpec), Collections.singletonList(mapOutputSpec));

    task.initialize();
    task.run();
    task.close();

    OutputContext outputContext = task.getOutputContexts().iterator().next();
    TezTaskOutput mapOutputs = new TezTaskOutputFiles(jobConf, outputContext.getUniqueIdentifier());

    // TODO NEWTEZ FIXME OutputCommitter verification
    //    MRTask mrTask = (MRTask)t.getProcessor();
    //    Assert.assertEquals(TezNullOutputCommitter.class.getName(), mrTask
    //        .getCommitter().getClass().getName());
    //    t.close();

    Path mapOutputFile = getMapOutputFile(jobConf, outputContext);
    LOG.info("mapOutputFile = " + mapOutputFile);
    IFile.Reader reader = new IFile.Reader(localFs, mapOutputFile, null, null, null, false, 0, -1);
    LongWritable key = new LongWritable();
    Text value = new Text();
    DataInputBuffer keyBuf = new DataInputBuffer();
    DataInputBuffer valueBuf = new DataInputBuffer();
    long prev = Long.MIN_VALUE;
    while (reader.nextRawKey(keyBuf)) {
        reader.nextRawValue(valueBuf);
        key.readFields(keyBuf);
        value.readFields(valueBuf);
        if (prev != Long.MIN_VALUE) {
            assert (prev <= key.get());
            prev = key.get();
        }
        LOG.info("key = " + key.get() + "; value = " + value);
    }
    reader.close();
}