Example usage for org.apache.hadoop.io DoubleWritable DoubleWritable

Introduction

In this page you can find the example usage for org.apache.hadoop.io DoubleWritable DoubleWritable.

Prototype

public DoubleWritable(double value)

Source Link

Usage

From source file:org.apache.mahout.math.hadoop.similarity.VectorDistanceMapper.java

License:Apache License

@Override
protected void map(WritableComparable<?> key, VectorWritable value, Context context)
        throws IOException, InterruptedException {
    String keyName;//  ww  w  .j  a v a  2  s  .c  o  m
    Vector valVec = value.get();
    if (valVec instanceof NamedVector) {
        keyName = ((NamedVector) valVec).getName();
    } else {
        keyName = key.toString();
    }

    for (NamedVector seedVector : seedVectors) {
        double distance = measure.distance(seedVector, valVec);
        if (!usesThreshold || distance <= maxDistance) {
            StringTuple outKey = new StringTuple();
            outKey.add(seedVector.getName());
            outKey.add(keyName);
            context.write(outKey, new DoubleWritable(distance));
        }
    }
}

From source file:org.apache.mahout.math.hadoop.stats.BasicStatsTest.java

License:Apache License

private void produceTestData(Path input) throws Exception {
    FileSystem fs = FileSystem.get(input.toUri(), conf);
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, input, IntWritable.class,
            DoubleWritable.class);
    //Random random = new MersenneTwisterRNG();
    /*Normal normal = new Normal(5, 3, random);
    for (int i = 0; i < 10000; i++) {
      writer.append(new IntWritable(i), new DoubleWritable((long)normal.nextDouble()));
    }*//* w  ww. ja  va 2s. c  o m*/
    int i = 0;
    writer.append(new IntWritable(i++), new DoubleWritable(7));
    writer.append(new IntWritable(i++), new DoubleWritable(9));
    writer.append(new IntWritable(i++), new DoubleWritable(9));
    writer.append(new IntWritable(i++), new DoubleWritable(10));
    writer.append(new IntWritable(i++), new DoubleWritable(10));
    writer.append(new IntWritable(i++), new DoubleWritable(10));
    writer.append(new IntWritable(i++), new DoubleWritable(10));
    writer.append(new IntWritable(i++), new DoubleWritable(11));
    writer.append(new IntWritable(i++), new DoubleWritable(11));
    writer.append(new IntWritable(i++), new DoubleWritable(13));
    writer.close();
}

From source file:org.apache.mahout.math.hadoop.stats.BasicStatsTest.java

License:Apache License

@Test
public void testStdDev2() throws Exception {
    Path input = getTestTempFilePath("stdDev/counts.file");
    Path output = getTestTempFilePath("stdDev/output.file");
    FileSystem fs = FileSystem.get(input.toUri(), conf);
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, input, IntWritable.class,
            DoubleWritable.class);
    Random random = RandomUtils.getRandom();
    Normal normal = new Normal(5, 3, random);
    for (int i = 0; i < 1000000; i++) {
        writer.append(new IntWritable(i), new DoubleWritable((long) normal.nextInt()));
    }/*from   ww w  .j  a v a 2  s  .  c o  m*/
    writer.close();
    double v = BasicStats.stdDev(input, output, conf);
    assertEquals(3, v, 0.02);
}

From source file:org.apache.mahout.math.hadoop.stats.StandardDeviationCalculatorMapper.java

License:Apache License

@Override
protected void map(IntWritable key, Writable value, Context context) throws IOException, InterruptedException {
    if (key.get() == -1) {
        return;//from   w  ww .j a va2 s .co m
    }
    //Kind of ugly, but such is life
    double df = Double.NaN;
    if (value instanceof LongWritable) {
        df = ((LongWritable) value).get();
    } else if (value instanceof DoubleWritable) {
        df = ((DoubleWritable) value).get();
    }
    if (!Double.isNaN(df)) {
        // For calculating the sum of squares
        context.write(SUM_OF_SQUARES, new DoubleWritable(df * df));
        context.write(SUM, new DoubleWritable(df));
        // For calculating the total number of entries
        context.write(TOTAL_COUNT, new DoubleWritable(1));
    }
}

From source file:org.apache.mahout.math.hadoop.stats.StandardDeviationCalculatorReducer.java

License:Apache License

@Override
protected void reduce(IntWritable key, Iterable<DoubleWritable> values, Context context)
        throws IOException, InterruptedException {
    double sum = 0.0;
    for (DoubleWritable value : values) {
        sum += value.get();//from w ww  .ja v a 2  s . co m
    }
    context.write(key, new DoubleWritable(sum));
}

From source file:org.apache.mahout.utils.nlp.collocations.llr.LLRReducer.java

License:Apache License

/**
 * Perform LLR calculation, input is: k:ngram:ngramFreq v:(h_|t_)subgram:subgramfreq N = ngram total
 * /*from  w  w  w.j a v  a  2 s .  co m*/
 * Each ngram will have 2 subgrams, a head and a tail, referred to as A and B respectively below.
 * 
 * A+ B: number of times a+b appear together: ngramFreq A+!B: number of times A appears without B:
 * hSubgramFreq - ngramFreq !A+ B: number of times B appears without A: tSubgramFreq - ngramFreq !A+!B:
 * number of times neither A or B appears (in that order): N - (subgramFreqA + subgramFreqB - ngramFreq)
 */
@Override
protected void reduce(Gram ngram, Iterable<Gram> values, Context context)
        throws IOException, InterruptedException {

    int[] gramFreq = { -1, -1 };

    if (ngram.getType() == Gram.Type.UNIGRAM && emitUnigrams) {
        DoubleWritable dd = new DoubleWritable(ngram.getFrequency());
        Text t = new Text(ngram.getString());
        context.write(t, dd);
        return;
    }
    // FIXME: better way to handle errors? Wouldn't an exception thrown here
    // cause hadoop to re-try the job?
    String[] gram = new String[2];
    for (Gram value : values) {

        int pos = value.getType() == Gram.Type.HEAD ? 0 : 1;

        if (gramFreq[pos] != -1) {
            log.warn("Extra {} for {}, skipping", value.getType(), ngram);
            if (value.getType() == Gram.Type.HEAD) {
                context.getCounter(Skipped.EXTRA_HEAD).increment(1);
            } else {
                context.getCounter(Skipped.EXTRA_TAIL).increment(1);
            }
            return;
        }

        gram[pos] = value.getString();
        gramFreq[pos] = value.getFrequency();
    }

    if (gramFreq[0] == -1) {
        log.warn("Missing head for {}, skipping.", ngram);
        context.getCounter(Skipped.MISSING_HEAD).increment(1);
        return;
    } else if (gramFreq[1] == -1) {
        log.warn("Missing tail for {}, skipping", ngram);
        context.getCounter(Skipped.MISSING_TAIL).increment(1);
        return;
    }

    int k11 = ngram.getFrequency(); /* a&b */
    int k12 = gramFreq[0] - ngram.getFrequency(); /* a&!b */
    int k21 = gramFreq[1] - ngram.getFrequency(); /* !b&a */
    int k22 = (int) (ngramTotal - (gramFreq[0] + gramFreq[1] - ngram.getFrequency())); /* !a&!b */

    try {
        double llr = ll.logLikelihoodRatio(k11, k12, k21, k22);
        if (llr < minLLRValue) {
            context.getCounter(Skipped.LESS_THAN_MIN_LLR).increment(1);
            return;
        }
        DoubleWritable dd = new DoubleWritable(llr);
        Text t = new Text(ngram.getString());
        context.write(t, dd);
    } catch (IllegalArgumentException ex) {
        context.getCounter(Skipped.LLR_CALCULATION_ERROR).increment(1);
        log.error("Problem calculating LLR ratio: " + ex.getMessage());
        log.error("NGram: " + ngram);
        log.error("HEAD: " + gram[0] + ':' + gramFreq[0]);
        log.error("TAIL: " + gram[1] + ':' + gramFreq[1]);
        log.error("k11: " + k11 + " k12: " + k12 + " k21: " + k21 + " k22: " + k22);
    }
}

From source file:org.apache.mahout.vectorizer.collocations.llr.LLRReducer.java

License:Apache License

/**
 * Perform LLR calculation, input is: k:ngram:ngramFreq v:(h_|t_)subgram:subgramfreq N = ngram total
 * /*  w  w w.j av  a2s.com*/
 * Each ngram will have 2 subgrams, a head and a tail, referred to as A and B respectively below.
 * 
 * A+ B: number of times a+b appear together: ngramFreq A+!B: number of times A appears without B:
 * hSubgramFreq - ngramFreq !A+ B: number of times B appears without A: tSubgramFreq - ngramFreq !A+!B:
 * number of times neither A or B appears (in that order): N - (subgramFreqA + subgramFreqB - ngramFreq)
 */
@Override
protected void reduce(Gram ngram, Iterable<Gram> values, Context context)
        throws IOException, InterruptedException {

    int[] gramFreq = { -1, -1 };

    if (ngram.getType() == Gram.Type.UNIGRAM && emitUnigrams) {
        DoubleWritable dd = new DoubleWritable(ngram.getFrequency());
        Text t = new Text(ngram.getString());
        context.write(t, dd);
        return;
    }
    // TODO better way to handle errors? Wouldn't an exception thrown here
    // cause hadoop to re-try the job?
    String[] gram = new String[2];
    for (Gram value : values) {

        int pos = value.getType() == Gram.Type.HEAD ? 0 : 1;

        if (gramFreq[pos] != -1) {
            log.warn("Extra {} for {}, skipping", value.getType(), ngram);
            if (value.getType() == Gram.Type.HEAD) {
                context.getCounter(Skipped.EXTRA_HEAD).increment(1);
            } else {
                context.getCounter(Skipped.EXTRA_TAIL).increment(1);
            }
            return;
        }

        gram[pos] = value.getString();
        gramFreq[pos] = value.getFrequency();
    }

    if (gramFreq[0] == -1) {
        log.warn("Missing head for {}, skipping.", ngram);
        context.getCounter(Skipped.MISSING_HEAD).increment(1);
        return;
    }
    if (gramFreq[1] == -1) {
        log.warn("Missing tail for {}, skipping", ngram);
        context.getCounter(Skipped.MISSING_TAIL).increment(1);
        return;
    }

    long k11 = ngram.getFrequency(); /* a&b */
    long k12 = gramFreq[0] - ngram.getFrequency(); /* a&!b */
    long k21 = gramFreq[1] - ngram.getFrequency(); /* !b&a */
    long k22 = ngramTotal - (gramFreq[0] + gramFreq[1] - ngram.getFrequency()); /* !a&!b */

    double llr;
    try {
        llr = ll.logLikelihoodRatio(k11, k12, k21, k22);
    } catch (IllegalArgumentException ex) {
        context.getCounter(Skipped.LLR_CALCULATION_ERROR).increment(1);
        log.warn(
                "Problem calculating LLR ratio for ngram {}, HEAD {}:{}, TAIL {}:{}, k11/k12/k21/k22: {}/{}/{}/{}",
                ngram, gram[0], gramFreq[0], gram[1], gramFreq[1], k11, k12, k21, k22, ex);
        return;
    }
    if (llr < minLLRValue) {
        context.getCounter(Skipped.LESS_THAN_MIN_LLR).increment(1);
    } else {
        context.write(new Text(ngram.getString()), new DoubleWritable(llr));
    }
}

From source file:org.apache.nifi.processors.hive.TestConvertAvroToORC.java

License:Apache License

@Test
public void test_onTrigger_nested_complex_record() throws Exception {

    Map<String, List<Double>> mapData1 = new TreeMap<String, List<Double>>() {
        {//from w  ww.j  av a2s  .c  o  m
            put("key1", Arrays.asList(1.0, 2.0));
            put("key2", Arrays.asList(3.0, 4.0));
        }
    };

    Map<String, String> arrayMap11 = new TreeMap<String, String>() {
        {
            put("key1", "v1");
            put("key2", "v2");
        }
    };
    Map<String, String> arrayMap12 = new TreeMap<String, String>() {
        {
            put("key3", "v3");
            put("key4", "v4");
        }
    };

    GenericData.Record record = TestNiFiOrcUtils.buildNestedComplexAvroRecord(mapData1,
            Arrays.asList(arrayMap11, arrayMap12));

    DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema());
    DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    fileWriter.create(record.getSchema(), out);
    fileWriter.append(record);

    // Put another record in
    Map<String, List<Double>> mapData2 = new TreeMap<String, List<Double>>() {
        {
            put("key1", Arrays.asList(-1.0, -2.0));
            put("key2", Arrays.asList(-3.0, -4.0));
        }
    };

    Map<String, String> arrayMap21 = new TreeMap<String, String>() {
        {
            put("key1", "v-1");
            put("key2", "v-2");
        }
    };
    Map<String, String> arrayMap22 = new TreeMap<String, String>() {
        {
            put("key3", "v-3");
            put("key4", "v-4");
        }
    };

    record = TestNiFiOrcUtils.buildNestedComplexAvroRecord(mapData2, Arrays.asList(arrayMap21, arrayMap22));
    fileWriter.append(record);

    fileWriter.flush();
    fileWriter.close();
    out.close();

    Map<String, String> attributes = new HashMap<String, String>() {
        {
            put(CoreAttributes.FILENAME.key(), "test");
        }
    };
    runner.enqueue(out.toByteArray(), attributes);
    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);

    // Write the flow file out to disk, since the ORC Reader needs a path
    MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
    assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS nested_complex_record "
            + "(myMapOfArray MAP<STRING, ARRAY<DOUBLE>>, myArrayOfMap ARRAY<MAP<STRING, STRING>>)"
            + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
    assertEquals("2", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE));
    assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
    byte[] resultContents = runner.getContentAsByteArray(resultFlowFile);
    FileOutputStream fos = new FileOutputStream("target/test1.orc");
    fos.write(resultContents);
    fos.flush();
    fos.close();

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);
    Reader reader = OrcFile.createReader(new Path("target/test1.orc"),
            OrcFile.readerOptions(conf).filesystem(fs));
    RecordReader rows = reader.rows();
    Object o = rows.next(null);
    assertNotNull(o);
    assertTrue(o instanceof OrcStruct);
    TypeInfo resultSchema = TestNiFiOrcUtils.buildNestedComplexOrcSchema();
    StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema);

    // check values
    Object myMapOfArray = inspector.getStructFieldData(o, inspector.getStructFieldRef("myMapOfArray"));
    assertTrue(myMapOfArray instanceof Map);
    Map map = (Map) myMapOfArray;
    Object mapValue = map.get(new Text("key1"));
    assertNotNull(mapValue);
    assertTrue(mapValue instanceof List);
    assertEquals(Arrays.asList(new DoubleWritable(1.0), new DoubleWritable(2.0)), mapValue);

    Object myArrayOfMap = inspector.getStructFieldData(o, inspector.getStructFieldRef("myArrayOfMap"));
    assertTrue(myArrayOfMap instanceof List);
    List list = (List) myArrayOfMap;
    Object el0 = list.get(0);
    assertNotNull(el0);
    assertTrue(el0 instanceof Map);
    assertEquals(new Text("v1"), ((Map) el0).get(new Text("key1")));
}

From source file:org.apache.orc.mapred.TestOrcFileEvolution.java

License:Apache License

private WritableComparable assembleRecord(TypeDescription type, Object row) {
    if (row == null) {
        return null;
    }//from   ww  w .  ja v  a 2 s .  c o m
    switch (type.getCategory()) {
    case STRUCT:
        OrcStruct structResult = new OrcStruct(type);
        for (int i = 0; i < structResult.getNumFields(); i++) {
            List<TypeDescription> childTypes = type.getChildren();
            structResult.setFieldValue(i, assembleRecord(childTypes.get(i), ((List<Object>) row).get(i)));
        }
        return structResult;
    case LIST:
        OrcList<WritableComparable> listResult = new OrcList<>(type);
        TypeDescription elemType = type.getChildren().get(0);
        List<Object> elems = (List<Object>) row;
        for (int i = 0; i < elems.size(); i++) {
            listResult.add(assembleRecord(elemType, elems.get(i)));
        }
        return listResult;
    case MAP:
        OrcMap<WritableComparable, WritableComparable> mapResult = new OrcMap<>(type);
        TypeDescription keyType = type.getChildren().get(0);
        TypeDescription valueType = type.getChildren().get(1);
        for (Map.Entry<Object, Object> entry : ((Map<Object, Object>) row).entrySet()) {
            mapResult.put(assembleRecord(keyType, entry.getKey()), assembleRecord(valueType, entry.getValue()));
        }
        return mapResult;
    case INT:
        return new IntWritable((Integer) row);
    case DOUBLE:
        return new DoubleWritable((Double) row);
    case STRING:
        return new Text((String) row);
    default:
        throw new UnsupportedOperationException(
                String.format("Not expecting to have a field of type %s in unit tests", type.getCategory()));
    }
}

From source file:org.apache.orc.mapred.TestOrcOutputFormat.java

License:Apache License

@Test
public void testAllTypes() throws Exception {
    conf.set("mapreduce.task.attempt.id", "attempt_20160101_0001_m_000001_0");
    conf.setOutputCommitter(NullOutputCommitter.class);
    final String typeStr = "struct<b1:binary,b2:boolean,b3:tinyint,"
            + "c:char(10),d1:date,d2:decimal(20,5),d3:double,fff:float,int:int,"
            + "l:array<bigint>,map:map<smallint,string>,"
            + "str:struct<u:uniontype<timestamp,varchar(100)>>,ts:timestamp>";
    OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, typeStr);
    FileOutputFormat.setOutputPath(conf, workDir);
    TypeDescription type = TypeDescription.fromString(typeStr);

    // build a row object
    OrcStruct row = (OrcStruct) OrcStruct.createValue(type);
    ((BytesWritable) row.getFieldValue(0)).set(new byte[] { 1, 2, 3, 4 }, 0, 4);
    ((BooleanWritable) row.getFieldValue(1)).set(true);
    ((ByteWritable) row.getFieldValue(2)).set((byte) 23);
    ((Text) row.getFieldValue(3)).set("aaabbbcccddd");
    SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
    ((DateWritable) row.getFieldValue(4)).set(DateWritable.millisToDays(format.parse("2016-04-01").getTime()));
    ((HiveDecimalWritable) row.getFieldValue(5)).set(new HiveDecimalWritable("1.23"));
    ((DoubleWritable) row.getFieldValue(6)).set(1.5);
    ((FloatWritable) row.getFieldValue(7)).set(4.5f);
    ((IntWritable) row.getFieldValue(8)).set(31415);
    OrcList<LongWritable> longList = (OrcList<LongWritable>) row.getFieldValue(9);
    longList.add(new LongWritable(123));
    longList.add(new LongWritable(456));
    OrcMap<ShortWritable, Text> map = (OrcMap<ShortWritable, Text>) row.getFieldValue(10);
    map.put(new ShortWritable((short) 1000), new Text("aaaa"));
    map.put(new ShortWritable((short) 123), new Text("bbbb"));
    OrcStruct struct = (OrcStruct) row.getFieldValue(11);
    OrcUnion union = (OrcUnion) struct.getFieldValue(0);
    union.set((byte) 1, new Text("abcde"));
    ((OrcTimestamp) row.getFieldValue(12)).set("1996-12-11 15:00:00");
    NullWritable nada = NullWritable.get();
    RecordWriter<NullWritable, OrcStruct> writer = new OrcOutputFormat<OrcStruct>().getRecordWriter(fs, conf,
            "all.orc", Reporter.NULL);
    for (int r = 0; r < 10; ++r) {
        row.setFieldValue(8, new IntWritable(r * 10));
        writer.write(nada, row);//from  w  w w .j  ava2s. c  om
    }
    union.set((byte) 0, new OrcTimestamp("2011-12-25 12:34:56"));
    for (int r = 0; r < 10; ++r) {
        row.setFieldValue(8, new IntWritable(r * 10 + 100));
        writer.write(nada, row);
    }
    OrcStruct row2 = new OrcStruct(type);
    writer.write(nada, row2);
    row.setFieldValue(8, new IntWritable(210));
    writer.write(nada, row);
    writer.close(Reporter.NULL);

    FileSplit split = new FileSplit(new Path(workDir, "all.orc"), 0, 100000, new String[0]);
    RecordReader<NullWritable, OrcStruct> reader = new OrcInputFormat<OrcStruct>().getRecordReader(split, conf,
            Reporter.NULL);
    nada = reader.createKey();
    row = reader.createValue();
    for (int r = 0; r < 22; ++r) {
        assertEquals(true, reader.next(nada, row));
        if (r == 20) {
            for (int c = 0; c < 12; ++c) {
                assertEquals(null, row.getFieldValue(c));
            }
        } else {
            assertEquals(new BytesWritable(new byte[] { 1, 2, 3, 4 }), row.getFieldValue(0));
            assertEquals(new BooleanWritable(true), row.getFieldValue(1));
            assertEquals(new ByteWritable((byte) 23), row.getFieldValue(2));
            assertEquals(new Text("aaabbbcccd"), row.getFieldValue(3));
            assertEquals(new DateWritable(DateWritable.millisToDays(format.parse("2016-04-01").getTime())),
                    row.getFieldValue(4));
            assertEquals(new HiveDecimalWritable("1.23"), row.getFieldValue(5));
            assertEquals(new DoubleWritable(1.5), row.getFieldValue(6));
            assertEquals(new FloatWritable(4.5f), row.getFieldValue(7));
            assertEquals(new IntWritable(r * 10), row.getFieldValue(8));
            assertEquals(longList, row.getFieldValue(9));
            assertEquals(map, row.getFieldValue(10));
            if (r < 10) {
                union.set((byte) 1, new Text("abcde"));
            } else {
                union.set((byte) 0, new OrcTimestamp("2011-12-25 12:34:56"));
            }
            assertEquals("row " + r, struct, row.getFieldValue(11));
            assertEquals("row " + r, new OrcTimestamp("1996-12-11 15:00:00"), row.getFieldValue(12));
        }
    }
    assertEquals(false, reader.next(nada, row));
}