Example usage for org.apache.hadoop.io LongWritable LongWritable

List of usage examples for org.apache.hadoop.io LongWritable LongWritable

Introduction

In this page you can find the example usage for org.apache.hadoop.io LongWritable LongWritable.

Prototype

public LongWritable(long value) 

Source Link

Usage

From source file:com.ML_Hadoop.K_meansClustering.K_meansClusteringMap.java

@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
    FloatWritable[] temp = new FloatWritable[feature_size + 1];

    for (int i = 0; i < number_of_clusters; i++) {
        temp[0] = new FloatWritable(num_of_members_in_a_cluster[i]);
        for (int j = 1; j < feature_size + 1; j++) {
            temp[j] = new FloatWritable(sum_of_members_in_a_cluster.get(i)[j - 1]);
        }/*  w  w w.j a  va2  s .  c  o  m*/
        context.write(new LongWritable(i), new FloatArrayWritable(temp));
    }
}

From source file:com.ML_Hadoop.MultipleLinearRegression.MultipleLinearRegressionMap.java

@Override // is used as innermap to aggregate data before shuffling
protected void cleanup(Context context) throws IOException, InterruptedException {
    // aggregate results from the same map and then send to reducers
    Float[] temp = new Float[theta.length + 1];
    for (int i = 0; i < temp.length; i++)
        temp[i] = 0.0f;/*from w  w  w.  j av a 2 s  . c o m*/

    for (int i = 0; i < prediction_error.size(); i++) // iterates on rows
        for (int j = 0; j < prediction_error.get(i).size(); j++) { // iterates on columns
            temp[j] += prediction_error.get(i).get(j);
        }

    for (int i = 0; i < temp.length; i++)
        context.write(new LongWritable(i), new FloatWritable(temp[i]));
}

From source file:com.ML_Hadoop.NaiveBayesClassifier_Continuous_Features.NaiveBayesClassifierMap_Continuous_Features.java

@Override // is used as innermap to aggregate data before shuffling
protected void cleanup(Context context) throws IOException, InterruptedException {
    //features_probabilities.put(class_id, features);
    Float[] sigma_x2 = new Float[number_of_features];
    Float[] sigma_x = new Float[number_of_features];
    Float[] mu_x_local = new Float[number_of_features];
    Float[] num_x_local = new Float[number_of_features];
    MapWritable[] map_output = new MapWritable[number_of_features];

    // It is a MUST to initilize all arrays before usage.
    for (int class_id = 0; class_id < number_of_classes; class_id++) {
        for (int i = 0; i < number_of_features; i++) {
            map_output[i] = new MapWritable(); // the way to initilize MapWritable[]
            sigma_x2[i] = 0.0f;/*from   ww w. ja  v  a 2 s . com*/
            sigma_x[i] = 0.0f;
            mu_x_local[i] = 0.0f;
            num_x_local[i] = 0.0f;
        }
        for (int member_id_in_a_class_id = 0; member_id_in_a_class_id < num_of_members_in_each_class[class_id]; member_id_in_a_class_id++) {
            for (int feature_id_in_a_member_id = 0; feature_id_in_a_member_id < number_of_features; feature_id_in_a_member_id++) {
                sigma_x[feature_id_in_a_member_id] += (features_probabilities.get(class_id)
                        .get(member_id_in_a_class_id))[feature_id_in_a_member_id];
                sigma_x2[feature_id_in_a_member_id] += (features_probabilities.get(class_id)
                        .get(member_id_in_a_class_id))[feature_id_in_a_member_id]
                        * ((features_probabilities.get(class_id)
                                .get(member_id_in_a_class_id))[feature_id_in_a_member_id]);
            }
        }
        for (int feature_id_in_a_member_id = 0; feature_id_in_a_member_id < number_of_features; feature_id_in_a_member_id++) {
            num_x_local[feature_id_in_a_member_id] = (float) num_of_members_in_each_class[class_id];
            if (num_x_local[feature_id_in_a_member_id] == 0)
                mu_x_local[feature_id_in_a_member_id] = 0.0f;
            else
                mu_x_local[feature_id_in_a_member_id] = sigma_x[feature_id_in_a_member_id]
                        / num_x_local[feature_id_in_a_member_id];
        }

        for (int feature_id_in_a_member_id = 0; feature_id_in_a_member_id < number_of_features; feature_id_in_a_member_id++) {
            // key of MAP must be Writable (i.e., new Text("...")), but new string("...") is wrong.
            // value of MAP must be Writable or one subset !!! like FloatWritable
            map_output[feature_id_in_a_member_id].put(new Text("sigma_x"),
                    new FloatWritable(sigma_x[feature_id_in_a_member_id]));
            map_output[feature_id_in_a_member_id].put(new Text("sigma_x2"),
                    new FloatWritable(sigma_x2[feature_id_in_a_member_id]));
            map_output[feature_id_in_a_member_id].put(new Text("mu_x_local"),
                    new FloatWritable(mu_x_local[feature_id_in_a_member_id]));
            map_output[feature_id_in_a_member_id].put(new Text("num_x_local"),
                    new FloatWritable(num_x_local[feature_id_in_a_member_id]));
        }

        context.write(new LongWritable(class_id), new MapArrayWritable(map_output));
    }

}

From source file:com.nikoo28.excel.mapreduce.ExcelRecordReader.java

License:Apache License

@Override
public boolean nextKeyValue() throws IOException, InterruptedException {

    if (key == null) {
        key = new LongWritable(0);
        value = new Text(strArrayofLines[0]);

    } else {//  ww w.  j ava  2  s . c  o m

        if (key.get() < (this.strArrayofLines.length - 1)) {
            long pos = (int) key.get();

            key.set(pos + 1);
            value.set(this.strArrayofLines[(int) (pos + 1)]);

            pos++;
        } else {
            return false;
        }

    }

    if (key == null || value == null) {
        return false;
    } else {
        return true;
    }

}

From source file:com.ostor.dedup.hadoop.BinaryRecordReader.java

License:Open Source License

public LongWritable createKey() {
    return new LongWritable(start);
}

From source file:com.phantom.hadoop.examples.QuasiMonteCarlo.java

License:Apache License

/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi//from w  w  w .  jav a2  s.c  o  m
 */
public static BigDecimal estimatePi(int numMaps, long numPoints, Path tmpDir, Configuration conf)
        throws IOException, ClassNotFoundException, InterruptedException {
    Job job = new Job(conf);
    // setup job conf
    job.setJobName(QuasiMonteCarlo.class.getSimpleName());
    job.setJarByClass(QuasiMonteCarlo.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setOutputKeyClass(BooleanWritable.class);
    job.setOutputValueClass(LongWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(QmcMapper.class);

    job.setReducerClass(QmcReducer.class);
    job.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    job.setSpeculativeExecution(false);

    // setup input/output directories
    final Path inDir = new Path(tmpDir, "in");
    final Path outDir = new Path(tmpDir, "out");
    FileInputFormat.setInputPaths(job, inDir);
    FileOutputFormat.setOutputPath(job, outDir);

    final FileSystem fs = FileSystem.get(conf);
    if (fs.exists(tmpDir)) {
        throw new IOException(
                "Tmp directory " + fs.makeQualified(tmpDir) + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }

    try {
        // generate an input file for each map task
        for (int i = 0; i < numMaps; ++i) {
            final Path file = new Path(inDir, "part" + i);
            final LongWritable offset = new LongWritable(i * numPoints);
            final LongWritable size = new LongWritable(numPoints);
            final SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, LongWritable.class,
                    LongWritable.class, CompressionType.NONE);
            try {
                writer.append(offset, size);
            } finally {
                writer.close();
            }
            System.out.println("Wrote input for Map #" + i);
        }

        // start a map/reduce job
        System.out.println("Starting Job");
        final long startTime = System.currentTimeMillis();
        job.waitForCompletion(true);
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        System.out.println("Job Finished in " + duration + " seconds");

        // read outputs
        Path inFile = new Path(outDir, "reduce-out");
        LongWritable numInside = new LongWritable();
        LongWritable numOutside = new LongWritable();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf);
        try {
            reader.next(numInside, numOutside);
        } finally {
            reader.close();
        }

        // compute estimated value
        final BigDecimal numTotal = BigDecimal.valueOf(numMaps).multiply(BigDecimal.valueOf(numPoints));
        return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())).divide(numTotal,
                RoundingMode.HALF_UP);
    } finally {
        fs.delete(tmpDir, true);
    }
}

From source file:com.pinterest.hdfsbackup.distcp.DistCp.java

License:Apache License

/**
 * Initialize DFSCopyFileMapper specific job-configuration.
 * @param conf : The dfs/mapred configuration.
 * @param jobConf : The handle to the jobConf object to be initialized.
 * @param args Arguments/*from  w  ww .  ja va  2 s. c o m*/
 */
private static void setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException {
    jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString());

    //set boolean values
    final boolean update = args.flags.contains(Options.UPDATE);
    final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE);
    jobConf.setBoolean(Options.UPDATE.propertyname, update);
    jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite);
    jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname,
            args.flags.contains(Options.IGNORE_READ_FAILURES));
    jobConf.setBoolean(Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS));

    final String randomId = getRandomId();
    JobClient jClient = new JobClient(jobConf);
    Path jobDirectory = new Path(jClient.getSystemDir(), NAME + "_" + randomId);
    jobConf.set(JOB_DIR_LABEL, jobDirectory.toString());

    FileSystem dstfs = args.dst.getFileSystem(conf);
    boolean dstExists = dstfs.exists(args.dst);
    boolean dstIsDir = false;
    if (dstExists) {
        dstIsDir = dstfs.getFileStatus(args.dst).isDir();
    }

    // default logPath
    Path logPath = args.log;
    if (logPath == null) {
        String filename = "_distcp_logs_" + randomId;
        if (!dstExists || !dstIsDir) {
            Path parent = args.dst.getParent();
            if (!dstfs.exists(parent)) {
                dstfs.mkdirs(parent);
            }
            logPath = new Path(parent, filename);
        } else {
            logPath = new Path(args.dst, filename);
        }
    }
    FileOutputFormat.setOutputPath(jobConf, logPath);

    // create src list, dst list
    FileSystem jobfs = jobDirectory.getFileSystem(jobConf);

    Path srcfilelist = new Path(jobDirectory, "_distcp_src_files");
    jobConf.set(SRC_LIST_LABEL, srcfilelist.toString());
    SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files");
    SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class,
            Text.class, SequenceFile.CompressionType.NONE);

    Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs");
    jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString());
    SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    // handle the case where the destination directory doesn't exist
    // and we've only a single src directory OR we're updating/overwriting
    // the contents of the destination directory.
    final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite;
    int srcCount = 0, cnsyncf = 0, dirsyn = 0;
    long fileCount = 0L, byteCount = 0L, cbsyncs = 0L;
    try {
        for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) {
            final Path src = srcItr.next();
            FileSystem srcfs = src.getFileSystem(conf);
            FileStatus srcfilestat = srcfs.getFileStatus(src);
            Path root = special && srcfilestat.isDir() ? src : src.getParent();
            if (srcfilestat.isDir()) {
                ++srcCount;
            }

            Stack<FileStatus> pathstack = new Stack<FileStatus>();
            for (pathstack.push(srcfilestat); !pathstack.empty();) {
                FileStatus cur = pathstack.pop();
                FileStatus[] children = srcfs.listStatus(cur.getPath());
                for (int i = 0; i < children.length; i++) {
                    boolean skipfile = false;
                    final FileStatus child = children[i];
                    final String dst = makeRelative(root, child.getPath());
                    ++srcCount;

                    if (child.isDir()) {
                        pathstack.push(child);
                    } else {
                        //skip file if the src and the dst files are the same.
                        skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst, dst));
                        //skip file if it exceed file limit or size limit
                        skipfile |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit;

                        if (!skipfile) {
                            ++fileCount;
                            byteCount += child.getLen();

                            if (LOG.isTraceEnabled()) {
                                LOG.trace("adding file " + child.getPath());
                            }

                            ++cnsyncf;
                            cbsyncs += child.getLen();
                            if (cnsyncf > SYNC_FILE_MAX || cbsyncs > BYTES_PER_MAP) {
                                src_writer.sync();
                                dst_writer.sync();
                                cnsyncf = 0;
                                cbsyncs = 0L;
                            }
                        }
                    }

                    if (!skipfile) {
                        src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()),
                                new FilePair(child, dst));
                    }

                    dst_writer.append(new Text(dst), new Text(child.getPath().toString()));
                }

                if (cur.isDir()) {
                    String dst = makeRelative(root, cur.getPath());
                    dir_writer.append(new Text(dst), new FilePair(cur, dst));
                    if (++dirsyn > SYNC_FILE_MAX) {
                        dirsyn = 0;
                        dir_writer.sync();
                    }
                }
            }
        }
    } finally {
        checkAndClose(src_writer);
        checkAndClose(dst_writer);
        checkAndClose(dir_writer);
    }

    FileStatus dststatus = null;
    try {
        dststatus = dstfs.getFileStatus(args.dst);
    } catch (FileNotFoundException fnfe) {
        LOG.info(args.dst + " does not exist.");
    }

    // create dest path dir if copying > 1 file
    if (dststatus == null) {
        if (srcCount > 1 && !dstfs.mkdirs(args.dst)) {
            throw new IOException("Failed to create" + args.dst);
        }
    }

    final Path sorted = new Path(jobDirectory, "_distcp_sorted");
    checkDuplication(jobfs, dstfilelist, sorted, conf);

    if (dststatus != null && args.flags.contains(Options.DELETE)) {
        deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf);
    }

    Path tmpDir = new Path(
            (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst,
            "_distcp_tmp_" + randomId);
    jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString());
    LOG.info("srcCount=" + srcCount);
    jobConf.setInt(SRC_COUNT_LABEL, srcCount);
    jobConf.setLong(TOTAL_SIZE_LABEL, byteCount);
    setMapCount(byteCount, jobConf);
}

From source file:com.pinterest.secor.io.impl.SequenceFileReaderWriter.java

License:Apache License

@Override
public void write(KeyValue keyValue) throws IOException {
    LongWritable writeableKey = new LongWritable(keyValue.getKey());
    BytesWritable writeableValue = new BytesWritable(keyValue.getValue());
    this.mWriter.append(writeableKey, writeableValue);
}

From source file:com.pinterest.secor.storage.seqfile.HadoopSequenceFileWriter.java

License:Apache License

@Override
public void append(final ParsedMessage message) throws IOException {
    LongWritable key = new LongWritable(message.getOffset());
    BytesWritable value = new BytesWritable(message.getPayload());
    mBackedWriter.append(key, value);//ww w  . jav a 2 s.  co m
}

From source file:com.pinterest.secor.writer.MessageWriter.java

License:Apache License

public void write(ParsedMessage message) throws IOException {
    adjustOffset(message);//from  w  w  w. j  a v  a2s  . c  o m
    TopicPartition topicPartition = new TopicPartition(message.getTopic(), message.getKafkaPartition());
    long offset = mOffsetTracker.getAdjustedCommittedOffsetCount(topicPartition);
    String localPrefix = mConfig.getLocalPath() + '/' + IdUtil.getLocalMessageDir();
    LogFilePath path = new LogFilePath(localPrefix, mConfig.getGeneration(), offset, message, mFileExtension);
    LongWritable key = new LongWritable(message.getOffset());
    BytesWritable value = new BytesWritable(message.getPayload());
    SequenceFile.Writer writer;
    writer = mFileRegistry.getOrCreateWriter(path, mCodec);
    writer.append(key, value);
    LOG.debug("appended message " + message + " to file " + path.getLogFilePath() + ".  File length "
            + writer.getLength());
}