List of usage examples for org.apache.hadoop.io LongWritable get
public long get()
From source file:org.apache.pirk.responder.wideskies.mapreduce.FinalResponseReducer.java
License:Apache License
@Override public void reduce(LongWritable colNum, Iterable<Text> colVals, Context ctx) throws IOException, InterruptedException { logger.debug("Processing reducer for colNum = " + colNum.toString()); ctx.getCounter(MRStats.NUM_COLUMNS).increment(1); BigInteger column = null;/* ww w.ja va 2 s. c o m*/ for (Text val : colVals) // there is only one column value { column = new BigInteger(val.toString()); logger.debug("colNum = " + (int) colNum.get() + " column = " + column.toString()); } response.addElement((int) colNum.get(), column); }
From source file:org.apache.rya.prospector.plans.impl.CountPlan.java
License:Apache License
@Override public Collection<Map.Entry<IntermediateProspect, LongWritable>> combine(final IntermediateProspect prospect, final Iterable<LongWritable> counts) { long sum = 0; for (final LongWritable count : counts) { sum += count.get(); }/* ww w. j a va 2 s. c om*/ return Collections .singleton(new CustomEntry<IntermediateProspect, LongWritable>(prospect, new LongWritable(sum))); }
From source file:org.apache.rya.prospector.plans.impl.CountPlan.java
License:Apache License
@Override public void reduce(final IntermediateProspect prospect, final Iterable<LongWritable> counts, final Date timestamp, final Reducer.Context context) throws IOException, InterruptedException { long sum = 0; for (final LongWritable count : counts) { sum += count.get(); }/*from w ww .j a va 2s . co m*/ final String indexType = prospect.getTripleValueType().getIndexType(); // not sure if this is the best idea.. if ((sum >= 0) || indexType.equals(TripleValueType.PREDICATE.getIndexType())) { final Mutation m = new Mutation(indexType + DELIM + prospect.getData() + DELIM + ProspectorUtils.getReverseIndexDateTime(timestamp)); final String dataType = prospect.getDataType(); final ColumnVisibility visibility = new ColumnVisibility(prospect.getVisibility()); final Value sumValue = new Value(("" + sum).getBytes(StandardCharsets.UTF_8)); m.put(COUNT, prospect.getDataType(), visibility, timestamp.getTime(), sumValue); context.write(null, m); } }
From source file:org.apache.sysml.runtime.controlprogram.parfor.RemoteDPParForMR.java
License:Apache License
/** * Result file contains hierarchy of workerID-resultvar(incl filename). We deduplicate * on the workerID. Without JVM reuse each task refers to a unique workerID, so we * will not find any duplicates. With JVM reuse, however, each slot refers to a workerID, * and there are duplicate filenames due to partial aggregation and overwrite of fname * (the RemoteParWorkerMapper ensures uniqueness of those files independent of the * runtime implementation). //w w w. j av a 2 s . c o m * * @param job job configuration * @param fname file name * @return array of local variable maps * @throws DMLRuntimeException if DMLRuntimeException occurs * @throws IOException if IOException occurs */ @SuppressWarnings("deprecation") public static LocalVariableMap[] readResultFile(JobConf job, String fname) throws DMLRuntimeException, IOException { HashMap<Long, LocalVariableMap> tmp = new HashMap<>(); Path path = new Path(fname); FileSystem fs = IOUtilFunctions.getFileSystem(path, job); LongWritable key = new LongWritable(); //workerID Text value = new Text(); //serialized var header (incl filename) int countAll = 0; for (Path lpath : IOUtilFunctions.getSequenceFilePaths(fs, path)) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job); try { while (reader.next(key, value)) { if (!tmp.containsKey(key.get())) tmp.put(key.get(), new LocalVariableMap()); Object[] dat = ProgramConverter.parseDataObject(value.toString()); tmp.get(key.get()).put((String) dat[0], (Data) dat[1]); countAll++; } } finally { IOUtilFunctions.closeSilently(reader); } } LOG.debug("Num remote worker results (before deduplication): " + countAll); LOG.debug("Num remote worker results: " + tmp.size()); //create return array return tmp.values().toArray(new LocalVariableMap[0]); }
From source file:org.apache.sysml.runtime.controlprogram.parfor.RemoteDPParWorkerReducer.java
License:Apache License
@Override public void reduce(LongWritable key, Iterator<Writable> valueList, OutputCollector<Writable, Writable> out, Reporter reporter) throws IOException { //cache collector/reporter (for write in close) _out = out;/*from w w w. j a v a2 s .c o m*/ _report = reporter; //collect input partition if (_info == OutputInfo.BinaryBlockOutputInfo) _partition = collectBinaryBlock(valueList); else _partition = collectBinaryCellInput(valueList); //execute program LOG.trace("execute RemoteDPParWorkerReducer " + _stringID + " (" + _workerID + ")"); try { //update in-memory matrix partition MatrixObject mo = _ec.getMatrixObject(_inputVar); mo.setInMemoryPartition(_partition); //create tasks for input data Task lTask = new Task(_iterVar, TaskType.SET); lTask.addIteration(new IntObject(key.get())); //execute program executeTask(lTask); } catch (Exception ex) { throw new IOException("ParFOR: Failed to execute task.", ex); } //statistic maintenance (after final export) RemoteParForUtils.incrementParForMRCounters(_report, 1, 1); }
From source file:org.apache.sysml.runtime.controlprogram.parfor.RemoteParForMR.java
License:Apache License
/** * Result file contains hierarchy of workerID-resultvar(incl filename). We deduplicate * on the workerID. Without JVM reuse each task refers to a unique workerID, so we * will not find any duplicates. With JVM reuse, however, each slot refers to a workerID, * and there are duplicate filenames due to partial aggregation and overwrite of fname * (the RemoteParWorkerMapper ensures uniqueness of those files independent of the * runtime implementation). /*w ww. ja v a 2 s. c om*/ * * @param job job configuration * @param fname file name * @return array of local variable maps * @throws DMLRuntimeException if DMLRuntimeException occurs * @throws IOException if IOException occurs */ @SuppressWarnings("deprecation") public static LocalVariableMap[] readResultFile(JobConf job, String fname) throws DMLRuntimeException, IOException { HashMap<Long, LocalVariableMap> tmp = new HashMap<>(); Path path = new Path(fname); FileSystem fs = IOUtilFunctions.getFileSystem(path, job); LongWritable key = new LongWritable(); //workerID Text value = new Text(); //serialized var header (incl filename) int countAll = 0; for (Path lpath : IOUtilFunctions.getSequenceFilePaths(fs, path)) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job); try { while (reader.next(key, value)) { //System.out.println("key="+key.get()+", value="+value.toString()); if (!tmp.containsKey(key.get())) tmp.put(key.get(), new LocalVariableMap()); Object[] dat = ProgramConverter.parseDataObject(value.toString()); tmp.get(key.get()).put((String) dat[0], (Data) dat[1]); countAll++; } } finally { IOUtilFunctions.closeSilently(reader); } } LOG.debug("Num remote worker results (before deduplication): " + countAll); LOG.debug("Num remote worker results: " + tmp.size()); //create return array return tmp.values().toArray(new LocalVariableMap[0]); }
From source file:org.apache.sysml.runtime.io.FrameReaderBinaryBlock.java
License:Apache License
@SuppressWarnings({ "deprecation" }) protected static void readBinaryBlockFrameFromSequenceFile(Path path, JobConf job, FileSystem fs, FrameBlock dest) throws IOException, DMLRuntimeException { int rlen = dest.getNumRows(); int clen = dest.getNumColumns(); //directly read from sequence files (individual partfiles) SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, job); LongWritable key = new LongWritable(-1L); FrameBlock value = new FrameBlock(); try {//www . ja v a 2 s. co m while (reader.next(key, value)) { int row_offset = (int) (key.get() - 1); int rows = value.getNumRows(); int cols = value.getNumColumns(); if (rows == 0 || cols == 0) //Empty block, ignore it. continue; //bound check per block if (row_offset + rows < 0 || row_offset + rows > rlen) { throw new IOException("Frame block [" + (row_offset + 1) + ":" + (row_offset + rows) + "," + ":" + "] " + "out of overall frame range [1:" + rlen + ",1:" + clen + "]."); } //copy block into target frame, incl meta on first dest.copy(row_offset, row_offset + rows - 1, 0, cols - 1, value); if (row_offset == 0) dest.setColumnMetadata(value.getColumnMetadata()); } } finally { IOUtilFunctions.closeSilently(reader); } }
From source file:org.apache.sysml.runtime.matrix.mapred.CSVAssignRowIDMapper.java
License:Apache License
@Override public void map(LongWritable key, Text value, OutputCollector<ByteWritable, OffsetCount> out, Reporter report) throws IOException { if (first) {/*w w w .ja v a2 s . c o m*/ first = false; fileOffset = key.get(); outCache = out; } //getting the number of colums if (key.get() == 0 && headerFile) { if (!ignoreFirstLine) { report.incrCounter(CSVReblockMR.NUM_COLS_IN_MATRIX, outKey.toString(), value.toString().split(delim, -1).length); num++; } else realFirstLine = true; } else { if (realFirstLine) { report.incrCounter(CSVReblockMR.NUM_COLS_IN_MATRIX, outKey.toString(), value.toString().split(delim, -1).length); realFirstLine = false; } num++; } }
From source file:org.apache.tez.mapreduce.input.TestMultiMRInput.java
License:Apache License
public static LinkedHashMap<LongWritable, Text> createInputData(FileSystem fs, Path workDir, JobConf job, String filename, long startKey, long numKeys) throws IOException { LinkedHashMap<LongWritable, Text> data = new LinkedHashMap<LongWritable, Text>(); Path file = new Path(workDir, filename); LOG.info("Generating data at path: " + file); // create a file with length entries @SuppressWarnings("deprecation") SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, file, LongWritable.class, Text.class); try {//from w w w . j ava2 s . co m Random r = new Random(System.currentTimeMillis()); LongWritable key = new LongWritable(); Text value = new Text(); for (long i = startKey; i < numKeys; i++) { key.set(i); value.set(Integer.toString(r.nextInt(10000))); data.put(new LongWritable(key.get()), new Text(value.toString())); writer.append(key, value); LOG.info("<k, v> : <" + key.get() + ", " + value + ">"); } } finally { writer.close(); } return data; }
From source file:org.apache.tez.mapreduce.processor.map.TestMapProcessor.java
License:Apache License
@Test(timeout = 5000) public void testMapProcessor() throws Exception { String dagName = "mrdag0"; String vertexName = MultiStageMRConfigUtil.getInitialMapVertexName(); JobConf jobConf = new JobConf(defaultConf); setUpJobConf(jobConf);//from w w w. j ava 2s . c om MRHelpers.translateMRConfToTez(jobConf); jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); jobConf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false); jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, new Path(workDir, "localized-resources").toUri().toString()); Path mapInput = new Path(workDir, "map0"); MapUtils.generateInputSplit(localFs, workDir, jobConf, mapInput); InputSpec mapInputSpec = new InputSpec("NullSrcVertex", InputDescriptor.create(MRInputLegacy.class.getName()) .setUserPayload(UserPayload.create(ByteBuffer.wrap(MRRuntimeProtos.MRInputUserPayloadProto .newBuilder().setConfigurationBytes(TezUtils.createByteStringFromConf(jobConf)) .build().toByteArray()))), 1); OutputSpec mapOutputSpec = new OutputSpec("NullDestVertex", OutputDescriptor.create(OrderedPartitionedKVOutput.class.getName()) .setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)), 1); LogicalIOProcessorRuntimeTask task = MapUtils.createLogicalTask(localFs, workDir, jobConf, 0, new Path(workDir, "map0"), new TestUmbilical(), dagName, vertexName, Collections.singletonList(mapInputSpec), Collections.singletonList(mapOutputSpec)); task.initialize(); task.run(); task.close(); OutputContext outputContext = task.getOutputContexts().iterator().next(); TezTaskOutput mapOutputs = new TezTaskOutputFiles(jobConf, outputContext.getUniqueIdentifier()); // TODO NEWTEZ FIXME OutputCommitter verification // MRTask mrTask = (MRTask)t.getProcessor(); // Assert.assertEquals(TezNullOutputCommitter.class.getName(), mrTask // .getCommitter().getClass().getName()); // t.close(); Path mapOutputFile = getMapOutputFile(jobConf, outputContext); LOG.info("mapOutputFile = " + mapOutputFile); IFile.Reader reader = new IFile.Reader(localFs, mapOutputFile, null, null, null, false, 0, -1); LongWritable key = new LongWritable(); Text value = new Text(); DataInputBuffer keyBuf = new DataInputBuffer(); DataInputBuffer valueBuf = new DataInputBuffer(); long prev = Long.MIN_VALUE; while (reader.nextRawKey(keyBuf)) { reader.nextRawValue(valueBuf); key.readFields(keyBuf); value.readFields(valueBuf); if (prev != Long.MIN_VALUE) { assert (prev <= key.get()); prev = key.get(); } LOG.info("key = " + key.get() + "; value = " + value); } reader.close(); }