List of usage examples for org.apache.hadoop.io LongWritable LongWritable
public LongWritable(long value)
From source file:com.ML_Hadoop.K_meansClustering.K_meansClusteringMap.java
@Override protected void cleanup(Context context) throws IOException, InterruptedException { FloatWritable[] temp = new FloatWritable[feature_size + 1]; for (int i = 0; i < number_of_clusters; i++) { temp[0] = new FloatWritable(num_of_members_in_a_cluster[i]); for (int j = 1; j < feature_size + 1; j++) { temp[j] = new FloatWritable(sum_of_members_in_a_cluster.get(i)[j - 1]); }/* w w w.j a va2 s . c o m*/ context.write(new LongWritable(i), new FloatArrayWritable(temp)); } }
From source file:com.ML_Hadoop.MultipleLinearRegression.MultipleLinearRegressionMap.java
@Override // is used as innermap to aggregate data before shuffling protected void cleanup(Context context) throws IOException, InterruptedException { // aggregate results from the same map and then send to reducers Float[] temp = new Float[theta.length + 1]; for (int i = 0; i < temp.length; i++) temp[i] = 0.0f;/*from w w w. j av a 2 s . c o m*/ for (int i = 0; i < prediction_error.size(); i++) // iterates on rows for (int j = 0; j < prediction_error.get(i).size(); j++) { // iterates on columns temp[j] += prediction_error.get(i).get(j); } for (int i = 0; i < temp.length; i++) context.write(new LongWritable(i), new FloatWritable(temp[i])); }
From source file:com.ML_Hadoop.NaiveBayesClassifier_Continuous_Features.NaiveBayesClassifierMap_Continuous_Features.java
@Override // is used as innermap to aggregate data before shuffling protected void cleanup(Context context) throws IOException, InterruptedException { //features_probabilities.put(class_id, features); Float[] sigma_x2 = new Float[number_of_features]; Float[] sigma_x = new Float[number_of_features]; Float[] mu_x_local = new Float[number_of_features]; Float[] num_x_local = new Float[number_of_features]; MapWritable[] map_output = new MapWritable[number_of_features]; // It is a MUST to initilize all arrays before usage. for (int class_id = 0; class_id < number_of_classes; class_id++) { for (int i = 0; i < number_of_features; i++) { map_output[i] = new MapWritable(); // the way to initilize MapWritable[] sigma_x2[i] = 0.0f;/*from ww w. ja v a 2 s . com*/ sigma_x[i] = 0.0f; mu_x_local[i] = 0.0f; num_x_local[i] = 0.0f; } for (int member_id_in_a_class_id = 0; member_id_in_a_class_id < num_of_members_in_each_class[class_id]; member_id_in_a_class_id++) { for (int feature_id_in_a_member_id = 0; feature_id_in_a_member_id < number_of_features; feature_id_in_a_member_id++) { sigma_x[feature_id_in_a_member_id] += (features_probabilities.get(class_id) .get(member_id_in_a_class_id))[feature_id_in_a_member_id]; sigma_x2[feature_id_in_a_member_id] += (features_probabilities.get(class_id) .get(member_id_in_a_class_id))[feature_id_in_a_member_id] * ((features_probabilities.get(class_id) .get(member_id_in_a_class_id))[feature_id_in_a_member_id]); } } for (int feature_id_in_a_member_id = 0; feature_id_in_a_member_id < number_of_features; feature_id_in_a_member_id++) { num_x_local[feature_id_in_a_member_id] = (float) num_of_members_in_each_class[class_id]; if (num_x_local[feature_id_in_a_member_id] == 0) mu_x_local[feature_id_in_a_member_id] = 0.0f; else mu_x_local[feature_id_in_a_member_id] = sigma_x[feature_id_in_a_member_id] / num_x_local[feature_id_in_a_member_id]; } for (int feature_id_in_a_member_id = 0; feature_id_in_a_member_id < number_of_features; feature_id_in_a_member_id++) { // key of MAP must be Writable (i.e., new Text("...")), but new string("...") is wrong. // value of MAP must be Writable or one subset !!! like FloatWritable map_output[feature_id_in_a_member_id].put(new Text("sigma_x"), new FloatWritable(sigma_x[feature_id_in_a_member_id])); map_output[feature_id_in_a_member_id].put(new Text("sigma_x2"), new FloatWritable(sigma_x2[feature_id_in_a_member_id])); map_output[feature_id_in_a_member_id].put(new Text("mu_x_local"), new FloatWritable(mu_x_local[feature_id_in_a_member_id])); map_output[feature_id_in_a_member_id].put(new Text("num_x_local"), new FloatWritable(num_x_local[feature_id_in_a_member_id])); } context.write(new LongWritable(class_id), new MapArrayWritable(map_output)); } }
From source file:com.nikoo28.excel.mapreduce.ExcelRecordReader.java
License:Apache License
@Override public boolean nextKeyValue() throws IOException, InterruptedException { if (key == null) { key = new LongWritable(0); value = new Text(strArrayofLines[0]); } else {// ww w. j ava 2 s . c o m if (key.get() < (this.strArrayofLines.length - 1)) { long pos = (int) key.get(); key.set(pos + 1); value.set(this.strArrayofLines[(int) (pos + 1)]); pos++; } else { return false; } } if (key == null || value == null) { return false; } else { return true; } }
From source file:com.ostor.dedup.hadoop.BinaryRecordReader.java
License:Open Source License
public LongWritable createKey() { return new LongWritable(start); }
From source file:com.phantom.hadoop.examples.QuasiMonteCarlo.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi//from w w w . jav a2 s.c o m */ public static BigDecimal estimatePi(int numMaps, long numPoints, Path tmpDir, Configuration conf) throws IOException, ClassNotFoundException, InterruptedException { Job job = new Job(conf); // setup job conf job.setJobName(QuasiMonteCarlo.class.getSimpleName()); job.setJarByClass(QuasiMonteCarlo.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(BooleanWritable.class); job.setOutputValueClass(LongWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(QmcMapper.class); job.setReducerClass(QmcReducer.class); job.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. job.setSpeculativeExecution(false); // setup input/output directories final Path inDir = new Path(tmpDir, "in"); final Path outDir = new Path(tmpDir, "out"); FileInputFormat.setInputPaths(job, inDir); FileOutputFormat.setOutputPath(job, outDir); final FileSystem fs = FileSystem.get(conf); if (fs.exists(tmpDir)) { throw new IOException( "Tmp directory " + fs.makeQualified(tmpDir) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { // generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i); } // start a map/reduce job System.out.println("Starting Job"); final long startTime = System.currentTimeMillis(); job.waitForCompletion(true); final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Job Finished in " + duration + " seconds"); // read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf); try { reader.next(numInside, numOutside); } finally { reader.close(); } // compute estimated value final BigDecimal numTotal = BigDecimal.valueOf(numMaps).multiply(BigDecimal.valueOf(numPoints)); return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())).divide(numTotal, RoundingMode.HALF_UP); } finally { fs.delete(tmpDir, true); } }
From source file:com.pinterest.hdfsbackup.distcp.DistCp.java
License:Apache License
/** * Initialize DFSCopyFileMapper specific job-configuration. * @param conf : The dfs/mapred configuration. * @param jobConf : The handle to the jobConf object to be initialized. * @param args Arguments/*from w ww . ja va 2 s. c o m*/ */ private static void setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException { jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString()); //set boolean values final boolean update = args.flags.contains(Options.UPDATE); final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE); jobConf.setBoolean(Options.UPDATE.propertyname, update); jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite); jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname, args.flags.contains(Options.IGNORE_READ_FAILURES)); jobConf.setBoolean(Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS)); final String randomId = getRandomId(); JobClient jClient = new JobClient(jobConf); Path jobDirectory = new Path(jClient.getSystemDir(), NAME + "_" + randomId); jobConf.set(JOB_DIR_LABEL, jobDirectory.toString()); FileSystem dstfs = args.dst.getFileSystem(conf); boolean dstExists = dstfs.exists(args.dst); boolean dstIsDir = false; if (dstExists) { dstIsDir = dstfs.getFileStatus(args.dst).isDir(); } // default logPath Path logPath = args.log; if (logPath == null) { String filename = "_distcp_logs_" + randomId; if (!dstExists || !dstIsDir) { Path parent = args.dst.getParent(); if (!dstfs.exists(parent)) { dstfs.mkdirs(parent); } logPath = new Path(parent, filename); } else { logPath = new Path(args.dst, filename); } } FileOutputFormat.setOutputPath(jobConf, logPath); // create src list, dst list FileSystem jobfs = jobDirectory.getFileSystem(jobConf); Path srcfilelist = new Path(jobDirectory, "_distcp_src_files"); jobConf.set(SRC_LIST_LABEL, srcfilelist.toString()); SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class, FilePair.class, SequenceFile.CompressionType.NONE); Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files"); SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class, Text.class, SequenceFile.CompressionType.NONE); Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs"); jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString()); SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class, FilePair.class, SequenceFile.CompressionType.NONE); // handle the case where the destination directory doesn't exist // and we've only a single src directory OR we're updating/overwriting // the contents of the destination directory. final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite; int srcCount = 0, cnsyncf = 0, dirsyn = 0; long fileCount = 0L, byteCount = 0L, cbsyncs = 0L; try { for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) { final Path src = srcItr.next(); FileSystem srcfs = src.getFileSystem(conf); FileStatus srcfilestat = srcfs.getFileStatus(src); Path root = special && srcfilestat.isDir() ? src : src.getParent(); if (srcfilestat.isDir()) { ++srcCount; } Stack<FileStatus> pathstack = new Stack<FileStatus>(); for (pathstack.push(srcfilestat); !pathstack.empty();) { FileStatus cur = pathstack.pop(); FileStatus[] children = srcfs.listStatus(cur.getPath()); for (int i = 0; i < children.length; i++) { boolean skipfile = false; final FileStatus child = children[i]; final String dst = makeRelative(root, child.getPath()); ++srcCount; if (child.isDir()) { pathstack.push(child); } else { //skip file if the src and the dst files are the same. skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst, dst)); //skip file if it exceed file limit or size limit skipfile |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit; if (!skipfile) { ++fileCount; byteCount += child.getLen(); if (LOG.isTraceEnabled()) { LOG.trace("adding file " + child.getPath()); } ++cnsyncf; cbsyncs += child.getLen(); if (cnsyncf > SYNC_FILE_MAX || cbsyncs > BYTES_PER_MAP) { src_writer.sync(); dst_writer.sync(); cnsyncf = 0; cbsyncs = 0L; } } } if (!skipfile) { src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()), new FilePair(child, dst)); } dst_writer.append(new Text(dst), new Text(child.getPath().toString())); } if (cur.isDir()) { String dst = makeRelative(root, cur.getPath()); dir_writer.append(new Text(dst), new FilePair(cur, dst)); if (++dirsyn > SYNC_FILE_MAX) { dirsyn = 0; dir_writer.sync(); } } } } } finally { checkAndClose(src_writer); checkAndClose(dst_writer); checkAndClose(dir_writer); } FileStatus dststatus = null; try { dststatus = dstfs.getFileStatus(args.dst); } catch (FileNotFoundException fnfe) { LOG.info(args.dst + " does not exist."); } // create dest path dir if copying > 1 file if (dststatus == null) { if (srcCount > 1 && !dstfs.mkdirs(args.dst)) { throw new IOException("Failed to create" + args.dst); } } final Path sorted = new Path(jobDirectory, "_distcp_sorted"); checkDuplication(jobfs, dstfilelist, sorted, conf); if (dststatus != null && args.flags.contains(Options.DELETE)) { deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf); } Path tmpDir = new Path( (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst, "_distcp_tmp_" + randomId); jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString()); LOG.info("srcCount=" + srcCount); jobConf.setInt(SRC_COUNT_LABEL, srcCount); jobConf.setLong(TOTAL_SIZE_LABEL, byteCount); setMapCount(byteCount, jobConf); }
From source file:com.pinterest.secor.io.impl.SequenceFileReaderWriter.java
License:Apache License
@Override public void write(KeyValue keyValue) throws IOException { LongWritable writeableKey = new LongWritable(keyValue.getKey()); BytesWritable writeableValue = new BytesWritable(keyValue.getValue()); this.mWriter.append(writeableKey, writeableValue); }
From source file:com.pinterest.secor.storage.seqfile.HadoopSequenceFileWriter.java
License:Apache License
@Override public void append(final ParsedMessage message) throws IOException { LongWritable key = new LongWritable(message.getOffset()); BytesWritable value = new BytesWritable(message.getPayload()); mBackedWriter.append(key, value);//ww w . jav a 2 s. co m }
From source file:com.pinterest.secor.writer.MessageWriter.java
License:Apache License
public void write(ParsedMessage message) throws IOException { adjustOffset(message);//from w w w. j a v a2s . c o m TopicPartition topicPartition = new TopicPartition(message.getTopic(), message.getKafkaPartition()); long offset = mOffsetTracker.getAdjustedCommittedOffsetCount(topicPartition); String localPrefix = mConfig.getLocalPath() + '/' + IdUtil.getLocalMessageDir(); LogFilePath path = new LogFilePath(localPrefix, mConfig.getGeneration(), offset, message, mFileExtension); LongWritable key = new LongWritable(message.getOffset()); BytesWritable value = new BytesWritable(message.getPayload()); SequenceFile.Writer writer; writer = mFileRegistry.getOrCreateWriter(path, mCodec); writer.append(key, value); LOG.debug("appended message " + message + " to file " + path.getLogFilePath() + ". File length " + writer.getLength()); }