Example usage for org.apache.hadoop.io LongWritable LongWritable

Introduction

In this page you can find the example usage for org.apache.hadoop.io LongWritable LongWritable.

Prototype

public LongWritable(long value)

Source Link

Usage

From source file:com.ML_Hadoop.K_meansClustering.K_meansClusteringMap.java

@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
    FloatWritable[] temp = new FloatWritable[feature_size + 1];

    for (int i = 0; i < number_of_clusters; i++) {
        temp[0] = new FloatWritable(num_of_members_in_a_cluster[i]);
        for (int j = 1; j < feature_size + 1; j++) {
            temp[j] = new FloatWritable(sum_of_members_in_a_cluster.get(i)[j - 1]);
        }/*  w  w w.j a  va2  s .  c  o  m*/
        context.write(new LongWritable(i), new FloatArrayWritable(temp));
    }
}

From source file:com.ML_Hadoop.MultipleLinearRegression.MultipleLinearRegressionMap.java

@Override // is used as innermap to aggregate data before shuffling
protected void cleanup(Context context) throws IOException, InterruptedException {
    // aggregate results from the same map and then send to reducers
    Float[] temp = new Float[theta.length + 1];
    for (int i = 0; i < temp.length; i++)
        temp[i] = 0.0f;/*from w  w  w.  j av a 2 s  . c o m*/

    for (int i = 0; i < prediction_error.size(); i++) // iterates on rows
        for (int j = 0; j < prediction_error.get(i).size(); j++) { // iterates on columns
            temp[j] += prediction_error.get(i).get(j);
        }

    for (int i = 0; i < temp.length; i++)
        context.write(new LongWritable(i), new FloatWritable(temp[i]));
}

From source file:com.ML_Hadoop.NaiveBayesClassifier_Continuous_Features.NaiveBayesClassifierMap_Continuous_Features.java

@Override // is used as innermap to aggregate data before shuffling
protected void cleanup(Context context) throws IOException, InterruptedException {
    //features_probabilities.put(class_id, features);
    Float[] sigma_x2 = new Float[number_of_features];
    Float[] sigma_x = new Float[number_of_features];
    Float[] mu_x_local = new Float[number_of_features];
    Float[] num_x_local = new Float[number_of_features];
    MapWritable[] map_output = new MapWritable[number_of_features];

    // It is a MUST to initilize all arrays before usage.
    for (int class_id = 0; class_id < number_of_classes; class_id++) {
        for (int i = 0; i < number_of_features; i++) {
            map_output[i] = new MapWritable(); // the way to initilize MapWritable[]
            sigma_x2[i] = 0.0f;/*from   ww w. ja  v  a 2 s . com*/
            sigma_x[i] = 0.0f;
            mu_x_local[i] = 0.0f;
            num_x_local[i] = 0.0f;
        }
        for (int member_id_in_a_class_id = 0; member_id_in_a_class_id < num_of_members_in_each_class[class_id]; member_id_in_a_class_id++) {
            for (int feature_id_in_a_member_id = 0; feature_id_in_a_member_id < number_of_features; feature_id_in_a_member_id++) {
                sigma_x[feature_id_in_a_member_id] += (features_probabilities.get(class_id)
                        .get(member_id_in_a_class_id))[feature_id_in_a_member_id];
                sigma_x2[feature_id_in_a_member_id] += (features_probabilities.get(class_id)
                        .get(member_id_in_a_class_id))[feature_id_in_a_member_id]
                        * ((features_probabilities.get(class_id)
                                .get(member_id_in_a_class_id))[feature_id_in_a_member_id]);
            }
        }
        for (int feature_id_in_a_member_id = 0; feature_id_in_a_member_id < number_of_features; feature_id_in_a_member_id++) {
            num_x_local[feature_id_in_a_member_id] = (float) num_of_members_in_each_class[class_id];
            if (num_x_local[feature_id_in_a_member_id] == 0)
                mu_x_local[feature_id_in_a_member_id] = 0.0f;
            else
                mu_x_local[feature_id_in_a_member_id] = sigma_x[feature_id_in_a_member_id]
                        / num_x_local[feature_id_in_a_member_id];
        }

        for (int feature_id_in_a_member_id = 0; feature_id_in_a_member_id < number_of_features; feature_id_in_a_member_id++) {
            // key of MAP must be Writable (i.e., new Text("...")), but new string("...") is wrong.
            // value of MAP must be Writable or one subset !!! like FloatWritable
            map_output[feature_id_in_a_member_id].put(new Text("sigma_x"),
                    new FloatWritable(sigma_x[feature_id_in_a_member_id]));
            map_output[feature_id_in_a_member_id].put(new Text("sigma_x2"),
                    new FloatWritable(sigma_x2[feature_id_in_a_member_id]));
            map_output[feature_id_in_a_member_id].put(new Text("mu_x_local"),
                    new FloatWritable(mu_x_local[feature_id_in_a_member_id]));
            map_output[feature_id_in_a_member_id].put(new Text("num_x_local"),
                    new FloatWritable(num_x_local[feature_id_in_a_member_id]));
        }

        context.write(new LongWritable(class_id), new MapArrayWritable(map_output));
    }

}

From source file:com.nikoo28.excel.mapreduce.ExcelRecordReader.java

License:Apache License

@Override
public boolean nextKeyValue() throws IOException, InterruptedException {

    if (key == null) {
        key = new LongWritable(0);
        value = new Text(strArrayofLines[0]);

    } else {//  ww w.  j ava  2  s . c  o m

        if (key.get() < (this.strArrayofLines.length - 1)) {
            long pos = (int) key.get();

            key.set(pos + 1);
            value.set(this.strArrayofLines[(int) (pos + 1)]);

            pos++;
        } else {
            return false;
        }

    }

    if (key == null || value == null) {
        return false;
    } else {
        return true;
    }

}

From source file:com.ostor.dedup.hadoop.BinaryRecordReader.java

License:Open Source License

public LongWritable createKey() {
    return new LongWritable(start);
}

From source file:com.phantom.hadoop.examples.QuasiMonteCarlo.java

License:Apache License

/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi//from w  w  w .  jav a2  s.c  o  m
 */
public static BigDecimal estimatePi(int numMaps, long numPoints, Path tmpDir, Configuration conf)
        throws IOException, ClassNotFoundException, InterruptedException {
    Job job = new Job(conf);
    // setup job conf
    job.setJobName(QuasiMonteCarlo.class.getSimpleName());
    job.setJarByClass(QuasiMonteCarlo.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setOutputKeyClass(BooleanWritable.class);
    job.setOutputValueClass(LongWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(QmcMapper.class);

    job.setReducerClass(QmcReducer.class);
    job.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    job.setSpeculativeExecution(false);

    // setup input/output directories
    final Path inDir = new Path(tmpDir, "in");
    final Path outDir = new Path(tmpDir, "out");
    FileInputFormat.setInputPaths(job, inDir);
    FileOutputFormat.setOutputPath(job, outDir);

    final FileSystem fs = FileSystem.get(conf);
    if (fs.exists(tmpDir)) {
        throw new IOException(
                "Tmp directory " + fs.makeQualified(tmpDir) + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }

    try {
        // generate an input file for each map task
        for (int i = 0; i < numMaps; ++i) {
            final Path file = new Path(inDir, "part" + i);
            final LongWritable offset = new LongWritable(i * numPoints);
            final LongWritable size = new LongWritable(numPoints);
            final SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, LongWritable.class,
                    LongWritable.class, CompressionType.NONE);
            try {
                writer.append(offset, size);
            } finally {
                writer.close();
            }
            System.out.println("Wrote input for Map #" + i);
        }

        // start a map/reduce job
        System.out.println("Starting Job");
        final long startTime = System.currentTimeMillis();
        job.waitForCompletion(true);
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        System.out.println("Job Finished in " + duration + " seconds");

        // read outputs
        Path inFile = new Path(outDir, "reduce-out");
        LongWritable numInside = new LongWritable();
        LongWritable numOutside = new LongWritable();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf);
        try {
            reader.next(numInside, numOutside);
        } finally {
            reader.close();
        }

        // compute estimated value
        final BigDecimal numTotal = BigDecimal.valueOf(numMaps).multiply(BigDecimal.valueOf(numPoints));
        return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())).divide(numTotal,
                RoundingMode.HALF_UP);
    } finally {
        fs.delete(tmpDir, true);
    }
}

From source file:com.pinterest.hdfsbackup.distcp.DistCp.java

License:Apache License

/**
 * Initialize DFSCopyFileMapper specific job-configuration.
 * @param conf : The dfs/mapred configuration.
 * @param jobConf : The handle to the jobConf object to be initialized.
 * @param args Arguments/*from  w  ww .  ja va  2 s. c o m*/
 */
private static void setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException {
    jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString());

    //set boolean values
    final boolean update = args.flags.contains(Options.UPDATE);
    final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE);
    jobConf.setBoolean(Options.UPDATE.propertyname, update);
    jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite);
    jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname,
            args.flags.contains(Options.IGNORE_READ_FAILURES));
    jobConf.setBoolean(Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS));

    final String randomId = getRandomId();
    JobClient jClient = new JobClient(jobConf);
    Path jobDirectory = new Path(jClient.getSystemDir(), NAME + "_" + randomId);
    jobConf.set(JOB_DIR_LABEL, jobDirectory.toString());

    FileSystem dstfs = args.dst.getFileSystem(conf);
    boolean dstExists = dstfs.exists(args.dst);
    boolean dstIsDir = false;
    if (dstExists) {
        dstIsDir = dstfs.getFileStatus(args.dst).isDir();
    }

    // default logPath
    Path logPath = args.log;
    if (logPath == null) {
        String filename = "_distcp_logs_" + randomId;
        if (!dstExists || !dstIsDir) {
            Path parent = args.dst.getParent();
            if (!dstfs.exists(parent)) {
                dstfs.mkdirs(parent);
            }
            logPath = new Path(parent, filename);
        } else {
            logPath = new Path(args.dst, filename);
        }
    }
    FileOutputFormat.setOutputPath(jobConf, logPath);

    // create src list, dst list
    FileSystem jobfs = jobDirectory.getFileSystem(jobConf);

    Path srcfilelist = new Path(jobDirectory, "_distcp_src_files");
    jobConf.set(SRC_LIST_LABEL, srcfilelist.toString());
    SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files");
    SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class,
            Text.class, SequenceFile.CompressionType.NONE);

    Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs");
    jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString());
    SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    // handle the case where the destination directory doesn't exist
    // and we've only a single src directory OR we're updating/overwriting
    // the contents of the destination directory.
    final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite;
    int srcCount = 0, cnsyncf = 0, dirsyn = 0;
    long fileCount = 0L, byteCount = 0L, cbsyncs = 0L;
    try {
        for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) {
            final Path src = srcItr.next();
            FileSystem srcfs = src.getFileSystem(conf);
            FileStatus srcfilestat = srcfs.getFileStatus(src);
            Path root = special && srcfilestat.isDir() ? src : src.getParent();
            if (srcfilestat.isDir()) {
                ++srcCount;
            }

            Stack<FileStatus> pathstack = new Stack<FileStatus>();
            for (pathstack.push(srcfilestat); !pathstack.empty();) {
                FileStatus cur = pathstack.pop();
                FileStatus[] children = srcfs.listStatus(cur.getPath());
                for (int i = 0; i < children.length; i++) {
                    boolean skipfile = false;
                    final FileStatus child = children[i];
                    final String dst = makeRelative(root, child.getPath());
                    ++srcCount;

                    if (child.isDir()) {
                        pathstack.push(child);
                    } else {
                        //skip file if the src and the dst files are the same.
                        skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst, dst));
                        //skip file if it exceed file limit or size limit
                        skipfile |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit;

                        if (!skipfile) {
                            ++fileCount;
                            byteCount += child.getLen();

                            if (LOG.isTraceEnabled()) {
                                LOG.trace("adding file " + child.getPath());
                            }

                            ++cnsyncf;
                            cbsyncs += child.getLen();
                            if (cnsyncf > SYNC_FILE_MAX || cbsyncs > BYTES_PER_MAP) {
                                src_writer.sync();
                                dst_writer.sync();
                                cnsyncf = 0;
                                cbsyncs = 0L;
                            }
                        }
                    }

                    if (!skipfile) {
                        src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()),
                                new FilePair(child, dst));
                    }

                    dst_writer.append(new Text(dst), new Text(child.getPath().toString()));
                }

                if (cur.isDir()) {
                    String dst = makeRelative(root, cur.getPath());
                    dir_writer.append(new Text(dst), new FilePair(cur, dst));
                    if (++dirsyn > SYNC_FILE_MAX) {
                        dirsyn = 0;
                        dir_writer.sync();
                    }
                }
            }
        }
    } finally {
        checkAndClose(src_writer);
        checkAndClose(dst_writer);
        checkAndClose(dir_writer);
    }

    FileStatus dststatus = null;
    try {
        dststatus = dstfs.getFileStatus(args.dst);
    } catch (FileNotFoundException fnfe) {
        LOG.info(args.dst + " does not exist.");
    }

    // create dest path dir if copying > 1 file
    if (dststatus == null) {
        if (srcCount > 1 && !dstfs.mkdirs(args.dst)) {
            throw new IOException("Failed to create" + args.dst);
        }
    }

    final Path sorted = new Path(jobDirectory, "_distcp_sorted");
    checkDuplication(jobfs, dstfilelist, sorted, conf);

    if (dststatus != null && args.flags.contains(Options.DELETE)) {
        deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf);
    }

    Path tmpDir = new Path(
            (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst,
            "_distcp_tmp_" + randomId);
    jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString());
    LOG.info("srcCount=" + srcCount);
    jobConf.setInt(SRC_COUNT_LABEL, srcCount);
    jobConf.setLong(TOTAL_SIZE_LABEL, byteCount);
    setMapCount(byteCount, jobConf);
}

From source file:com.pinterest.secor.io.impl.SequenceFileReaderWriter.java

License:Apache License

@Override
public void write(KeyValue keyValue) throws IOException {
    LongWritable writeableKey = new LongWritable(keyValue.getKey());
    BytesWritable writeableValue = new BytesWritable(keyValue.getValue());
    this.mWriter.append(writeableKey, writeableValue);
}

From source file:com.pinterest.secor.storage.seqfile.HadoopSequenceFileWriter.java

License:Apache License

@Override
public void append(final ParsedMessage message) throws IOException {
    LongWritable key = new LongWritable(message.getOffset());
    BytesWritable value = new BytesWritable(message.getPayload());
    mBackedWriter.append(key, value);//ww w  . jav a 2 s.  co m
}

From source file:com.pinterest.secor.writer.MessageWriter.java

License:Apache License

public void write(ParsedMessage message) throws IOException {
    adjustOffset(message);//from  w  w  w. j  a v  a2s  . c  o m
    TopicPartition topicPartition = new TopicPartition(message.getTopic(), message.getKafkaPartition());
    long offset = mOffsetTracker.getAdjustedCommittedOffsetCount(topicPartition);
    String localPrefix = mConfig.getLocalPath() + '/' + IdUtil.getLocalMessageDir();
    LogFilePath path = new LogFilePath(localPrefix, mConfig.getGeneration(), offset, message, mFileExtension);
    LongWritable key = new LongWritable(message.getOffset());
    BytesWritable value = new BytesWritable(message.getPayload());
    SequenceFile.Writer writer;
    writer = mFileRegistry.getOrCreateWriter(path, mCodec);
    writer.append(key, value);
    LOG.debug("appended message " + message + " to file " + path.getLogFilePath() + ".  File length "
            + writer.getLength());
}