Example usage for org.apache.hadoop.io DoubleWritable DoubleWritable

List of usage examples for org.apache.hadoop.io DoubleWritable DoubleWritable

Introduction

In this page you can find the example usage for org.apache.hadoop.io DoubleWritable DoubleWritable.

Prototype

public DoubleWritable() 

Source Link

Usage

From source file:org.apache.mahout.utils.vectors.text.DictionaryVectorizer.java

License:Apache License

/**
 * Create Term Frequency (Tf) Vectors from the input set of documents in {@link SequenceFile} format. This
 * tries to fix the maximum memory used by the feature chunk per node thereby splitting the process across
 * multiple map/reduces./* ww w.  j a v  a2  s  . com*/
 * 
 * @param input
 *          input directory of the documents in {@link SequenceFile} format
 * @param output
 *          output directory where {@link org.apache.mahout.math.RandomAccessSparseVector}'s of the document
 *          are generated
 * @param minSupport
 *          the minimum frequency of the feature in the entire corpus to be considered for inclusion in the
 *          sparse vector
 * @param maxNGramSize
 *          1 = unigram, 2 = unigram and bigram, 3 = unigram, bigram and trigram
 * @param minLLRValue
 *          minValue of log likelihood ratio to used to prune ngrams
 * @param chunkSizeInMegabytes
 *          the size in MB of the feature => id chunk to be kept in memory at each node during Map/Reduce
 *          stage. Its recommended you calculated this based on the number of cores and the free memory
 *          available to you per node. Say, you have 2 cores and around 1GB extra memory to spare we
 *          recommend you use a split size of around 400-500MB so that two simultaneous reducers can create
 *          partial vectors without thrashing the system due to increased swapping
 * @throws IOException
 * @throws ClassNotFoundException 
 * @throws InterruptedException 
 */
public static void createTermFrequencyVectors(Path input, Path output, Configuration baseConf, int minSupport,
        int maxNGramSize, float minLLRValue, int numReducers, int chunkSizeInMegabytes,
        boolean sequentialAccess) throws IOException, InterruptedException, ClassNotFoundException {
    if (chunkSizeInMegabytes < MIN_CHUNKSIZE) {
        chunkSizeInMegabytes = MIN_CHUNKSIZE;
    } else if (chunkSizeInMegabytes > MAX_CHUNKSIZE) { // 10GB
        chunkSizeInMegabytes = MAX_CHUNKSIZE;
    }
    if (minSupport < 0) {
        minSupport = DEFAULT_MIN_SUPPORT;
    }

    Path dictionaryJobPath = new Path(output, DICTIONARY_JOB_FOLDER);

    int[] maxTermDimension = new int[1];
    List<Path> dictionaryChunks;
    if (maxNGramSize == 1) {
        startWordCounting(input, dictionaryJobPath, minSupport);
        dictionaryChunks = createDictionaryChunks(minSupport, dictionaryJobPath, output, chunkSizeInMegabytes,
                new LongWritable(), maxTermDimension);
    } else {
        CollocDriver.generateAllGrams(input, dictionaryJobPath, baseConf, maxNGramSize, minSupport, minLLRValue,
                numReducers);
        dictionaryChunks = createDictionaryChunks(minSupport,
                new Path(new Path(output, DICTIONARY_JOB_FOLDER), CollocDriver.NGRAM_OUTPUT_DIRECTORY), output,
                chunkSizeInMegabytes, new DoubleWritable(), maxTermDimension);
    }

    int partialVectorIndex = 0;
    List<Path> partialVectorPaths = new ArrayList<Path>();
    for (Path dictionaryChunk : dictionaryChunks) {
        Path partialVectorOutputPath = new Path(output, VECTOR_OUTPUT_FOLDER + partialVectorIndex++);
        partialVectorPaths.add(partialVectorOutputPath);
        makePartialVectors(input, maxNGramSize, dictionaryChunk, partialVectorOutputPath, maxTermDimension[0],
                sequentialAccess, numReducers);
    }

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(partialVectorPaths.get(0).toUri(), conf);

    Path outputDir = new Path(output, DOCUMENT_VECTOR_OUTPUT_FOLDER);
    if (dictionaryChunks.size() > 1) {
        PartialVectorMerger.mergePartialVectors(partialVectorPaths, outputDir, -1, maxTermDimension[0],
                sequentialAccess, numReducers);
        HadoopUtil.deletePaths(partialVectorPaths, fs);
    } else {
        Path singlePartialVectorOutputPath = partialVectorPaths.get(0);
        fs.delete(outputDir, true);
        fs.rename(singlePartialVectorOutputPath, outputDir);
    }
}

From source file:org.apache.orc.mapred.OrcMapredRecordReader.java

License:Apache License

static DoubleWritable nextDouble(ColumnVector vector, int row, Object previous) {
    if (vector.isRepeating) {
        row = 0;/*from   ww w.j  a va  2s . c  om*/
    }
    if (vector.noNulls || !vector.isNull[row]) {
        DoubleWritable result;
        if (previous == null || previous.getClass() != DoubleWritable.class) {
            result = new DoubleWritable();
        } else {
            result = (DoubleWritable) previous;
        }
        result.set(((DoubleColumnVector) vector).vector[row]);
        return result;
    } else {
        return null;
    }
}

From source file:org.apache.orc.mapred.OrcStruct.java

License:Apache License

public static WritableComparable createValue(TypeDescription type) {
    switch (type.getCategory()) {
    case BOOLEAN:
        return new BooleanWritable();
    case BYTE://www  . j ava 2s  . com
        return new ByteWritable();
    case SHORT:
        return new ShortWritable();
    case INT:
        return new IntWritable();
    case LONG:
        return new LongWritable();
    case FLOAT:
        return new FloatWritable();
    case DOUBLE:
        return new DoubleWritable();
    case BINARY:
        return new BytesWritable();
    case CHAR:
    case VARCHAR:
    case STRING:
        return new Text();
    case DATE:
        return new DateWritable();
    case TIMESTAMP:
        return new OrcTimestamp();
    case DECIMAL:
        return new HiveDecimalWritable();
    case STRUCT: {
        OrcStruct result = new OrcStruct(type);
        int c = 0;
        for (TypeDescription child : type.getChildren()) {
            result.setFieldValue(c++, createValue(child));
        }
        return result;
    }
    case UNION:
        return new OrcUnion(type);
    case LIST:
        return new OrcList(type);
    case MAP:
        return new OrcMap(type);
    default:
        throw new IllegalArgumentException("Unknown type " + type);
    }
}

From source file:org.apache.sysml.runtime.util.MapReduceTool.java

License:Apache License

public static double[] pickValueWeight(String dir, MetaDataNumItemsByEachReducer metadata, double p,
        boolean average) throws IOException {
    long[] counts = metadata.getNumItemsArray();
    long[] ranges = new long[counts.length];
    ranges[0] = counts[0];//from   w  ww .  ja  v  a2s . c  o  m
    for (int i = 1; i < counts.length; i++)
        ranges[i] = ranges[i - 1] + counts[i];

    long total = ranges[ranges.length - 1];

    // do averaging only if it is asked for; and sum_wt is even
    average = average && (total % 2 == 0);

    int currentPart = 0;
    double cum_weight = 0;
    long pos = (long) Math.ceil(total * p);
    while (ranges[currentPart] < pos) {
        currentPart++;
        cum_weight += ranges[currentPart];
    }
    int offset;
    if (currentPart > 0)
        offset = (int) (pos - ranges[currentPart - 1] - 1);
    else
        offset = (int) pos - 1;

    Path path = new Path(dir);
    FileSystem fs = IOUtilFunctions.getFileSystem(path);
    FileStatus[] files = fs.listStatus(path);
    Path fileToRead = null;
    for (FileStatus file : files)
        if (file.getPath().toString().endsWith(Integer.toString(currentPart))) {
            fileToRead = file.getPath();
            break;
        }

    if (fileToRead == null)
        throw new RuntimeException("cannot read partition " + currentPart);

    int buffsz = 64 * 1024;
    DoubleWritable readKey = new DoubleWritable();
    IntWritable readValue = new IntWritable();
    FSDataInputStream currentStream = null;
    double ret = -1;
    try {
        currentStream = fs.open(fileToRead, buffsz);

        boolean contain0s = false;
        long numZeros = 0;
        if (currentPart == metadata.getPartitionOfZero()) {
            contain0s = true;
            numZeros = metadata.getNumberOfZero();
        }
        ReadWithZeros reader = new ReadWithZeros(currentStream, contain0s, numZeros);

        int numRead = 0;
        while (numRead <= offset) {
            reader.readNextKeyValuePairs(readKey, readValue);
            numRead += readValue.get();
            cum_weight += readValue.get();
        }

        ret = readKey.get();
        if (average) {
            if (numRead <= offset + 1) {
                reader.readNextKeyValuePairs(readKey, readValue);
                cum_weight += readValue.get();
                ret = (ret + readKey.get()) / 2;
            }
        }
    } finally {
        IOUtilFunctions.closeSilently(currentStream);
    }
    return new double[] { ret, (average ? -1 : readValue.get()), (average ? -1 : cum_weight) };
}

From source file:org.kiji.examples.wikipediarank.RankRedistributor.java

License:Apache License

/** {@inheritDoc} */
@Override/*from   w w  w. j  a  v  a  2 s.  c  om*/
public void setup(GathererContext<Text, DoubleWritable> context) throws IOException {
    super.setup(context);
    mLink = new Text();
    mRank = new DoubleWritable();
}

From source file:org.mrgeo.pdf.TriangularDistributionPdfCurve.java

License:Apache License

public void writePdfCurve(Path output, Configuration conf) throws IOException {
    FileSystem fs = output.getFileSystem(conf);
    Path outputFile = new Path(output, "part-r-00000");
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, outputFile, DoubleWritable.class,
            DoubleWritable.class);

    DoubleWritable key = new DoubleWritable();
    DoubleWritable value = new DoubleWritable();

    double resolution = (_max - _min) / _bin;
    double binNumber = _min;
    for (int i = 0; i < _bin; i++) {
        double likelihood = getLikelihood(binNumber);
        key.set(binNumber);/*from w w w  . j a  v  a  2  s.com*/
        value.set(likelihood);
        writer.append(key, value);
        binNumber += resolution;
    }

    writer.close();
}

From source file:org.mrgeo.pdf.TriangularDistributionPdfCurve.java

License:Apache License

private void _computeCurve(Path[] pdfFiles, Configuration conf) throws IOException {
    SequenceFile.Reader r = null;
    _likelihoods = new double[(int) _bin];
    int index = 0;
    try {//from  w w  w .  j av  a2  s . c  om
        // Loop through each of the output files from the reduce to process all of
        // the PDF histogram bins
        for (Path pdfFile : pdfFiles) {
            // ignore all the non-part files
            if (!pdfFile.getName().startsWith("part")) {
                continue;
            }
            r = new SequenceFile.Reader(pdfFile.getFileSystem(conf), pdfFile, conf);
            DoubleWritable key = new DoubleWritable();
            DoubleWritable value = new DoubleWritable();
            while (r.next(key, value)) {
                _likelihoods[index] = value.get();
                index++;
            }
        }
    } finally {
        IOUtils.closeStream(r);
    }
}

From source file:org.pentaho.hadoop.mapreduce.converter.converters.KettleTypeToDoubleWritableConverter.java

License:Apache License

@Override
public DoubleWritable convert(ValueMetaInterface meta, Object obj) throws TypeConversionException {
    try {/*from   w ww  .j ava 2s  .  c om*/
        DoubleWritable result = new DoubleWritable();
        result.set(meta.getNumber(obj));
        return result;
    } catch (KettleValueException ex) {
        throw new TypeConversionException(BaseMessages.getString(TypeConverterFactory.class, "ErrorConverting",
                DoubleWritable.class.getSimpleName(), obj), ex);
    }
}

From source file:org.shaf.core.util.IOUtils.java

License:Apache License

/**
 * Reads an {@link Object} of the specified type from the {@link DataInput}.
 * /*  ww w  . j  a v  a2  s.c o m*/
 * @param cls
 *            the type of the reading object.
 * @param in
 *            the data input stream.
 * @return the read object.
 * @throws IOException
 *             if I/O error occurs.
 */
public static final Object readObject(Class<?> cls, DataInput in) throws IOException {
    try {
        if (cls == null) {
            throw new IOException("Reading class is not defined: null.");
        } else if (ClassUtils.isBoolean(cls)) {
            BooleanWritable obj = new BooleanWritable();
            obj.readFields(in);
            return obj.get();
        } else if (ClassUtils.isByte(cls)) {
            ByteWritable obj = new ByteWritable();
            obj.readFields(in);
            return obj.get();
        } else if (ClassUtils.isShort(cls)) {
            ShortWritable obj = new ShortWritable();
            obj.readFields(in);
            return obj.get();
        } else if (ClassUtils.isInteger(cls)) {
            IntWritable obj = new IntWritable();
            obj.readFields(in);
            return obj.get();
        } else if (ClassUtils.isLong(cls)) {
            LongWritable obj = new LongWritable();
            obj.readFields(in);
            return obj.get();
        } else if (ClassUtils.isFloat(cls)) {
            FloatWritable obj = new FloatWritable();
            obj.readFields(in);
            return obj.get();
        } else if (ClassUtils.isDouble(cls)) {
            DoubleWritable obj = new DoubleWritable();
            obj.readFields(in);
            return obj.get();
        } else if (ClassUtils.isString(cls)) {
            return Text.readString(in);
        } else if (ClassUtils.isEnum(cls)) {
            IntWritable obj = new IntWritable();
            obj.readFields(in);
            return cls.getEnumConstants()[obj.get()];
        } else if (ClassUtils.isArray(cls)) {
            int length = (int) readObject(int.class, in);
            Object array = Array.newInstance(cls.getComponentType(), length);
            for (int j = 0; j < length; j++) {
                Object a = readObject(cls.getComponentType(), in);
                Array.set(array, j, a);
            }
            return array;
        } else {
            Object obj = cls.newInstance();
            ((Writable) obj).readFields(in);
            return obj;
        }
    } catch (IllegalArgumentException | InstantiationException | IllegalAccessException exc) {
        throw new IOException(exc);
    }
}

From source file:org.shaf.core.util.IOUtilsTest.java

License:Apache License

/**
 * Test writing of {@code double} value.
 *//* w  w  w .  ja va 2  s .  com*/
@Test
public void testWriteDouble() {
    byte[] buf = null;

    double value = 123.456;
    try (ByteArrayOutputStream baos = new ByteArrayOutputStream();
            DataOutputStream out = new DataOutputStream(baos);) {
        IOUtils.writeObject(value, out);
        buf = baos.toByteArray();
    } catch (IOException exc) {
        fail(exc.getMessage());
    }

    try (ByteArrayInputStream bais = new ByteArrayInputStream(buf);
            DataInputStream in = new DataInputStream(bais);) {
        DoubleWritable probe = new DoubleWritable();
        probe.readFields(in);
        assertEquals(value, probe.get(), 0.0001);
    } catch (IOException exc) {
        fail(exc.getMessage());
    }
}