Example usage for org.apache.hadoop.io DoubleWritable DoubleWritable

List of usage examples for org.apache.hadoop.io DoubleWritable DoubleWritable

Introduction

In this page you can find the example usage for org.apache.hadoop.io DoubleWritable DoubleWritable.

Prototype

public DoubleWritable(double value) 

Source Link

Usage

From source file:org.apache.mahout.math.hadoop.similarity.VectorDistanceMapper.java

License:Apache License

@Override
protected void map(WritableComparable<?> key, VectorWritable value, Context context)
        throws IOException, InterruptedException {
    String keyName;//  ww  w  .j  a v a  2  s  .c  o  m
    Vector valVec = value.get();
    if (valVec instanceof NamedVector) {
        keyName = ((NamedVector) valVec).getName();
    } else {
        keyName = key.toString();
    }

    for (NamedVector seedVector : seedVectors) {
        double distance = measure.distance(seedVector, valVec);
        if (!usesThreshold || distance <= maxDistance) {
            StringTuple outKey = new StringTuple();
            outKey.add(seedVector.getName());
            outKey.add(keyName);
            context.write(outKey, new DoubleWritable(distance));
        }
    }
}

From source file:org.apache.mahout.math.hadoop.stats.BasicStatsTest.java

License:Apache License

private void produceTestData(Path input) throws Exception {
    FileSystem fs = FileSystem.get(input.toUri(), conf);
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, input, IntWritable.class,
            DoubleWritable.class);
    //Random random = new MersenneTwisterRNG();
    /*Normal normal = new Normal(5, 3, random);
    for (int i = 0; i < 10000; i++) {
      writer.append(new IntWritable(i), new DoubleWritable((long)normal.nextDouble()));
    }*//* w  ww. ja  va 2s. c  o m*/
    int i = 0;
    writer.append(new IntWritable(i++), new DoubleWritable(7));
    writer.append(new IntWritable(i++), new DoubleWritable(9));
    writer.append(new IntWritable(i++), new DoubleWritable(9));
    writer.append(new IntWritable(i++), new DoubleWritable(10));
    writer.append(new IntWritable(i++), new DoubleWritable(10));
    writer.append(new IntWritable(i++), new DoubleWritable(10));
    writer.append(new IntWritable(i++), new DoubleWritable(10));
    writer.append(new IntWritable(i++), new DoubleWritable(11));
    writer.append(new IntWritable(i++), new DoubleWritable(11));
    writer.append(new IntWritable(i++), new DoubleWritable(13));
    writer.close();
}

From source file:org.apache.mahout.math.hadoop.stats.BasicStatsTest.java

License:Apache License

@Test
public void testStdDev2() throws Exception {
    Path input = getTestTempFilePath("stdDev/counts.file");
    Path output = getTestTempFilePath("stdDev/output.file");
    FileSystem fs = FileSystem.get(input.toUri(), conf);
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, input, IntWritable.class,
            DoubleWritable.class);
    Random random = RandomUtils.getRandom();
    Normal normal = new Normal(5, 3, random);
    for (int i = 0; i < 1000000; i++) {
        writer.append(new IntWritable(i), new DoubleWritable((long) normal.nextInt()));
    }/*from   ww w  .j  a v a 2  s  .  c o  m*/
    writer.close();
    double v = BasicStats.stdDev(input, output, conf);
    assertEquals(3, v, 0.02);
}

From source file:org.apache.mahout.math.hadoop.stats.StandardDeviationCalculatorMapper.java

License:Apache License

@Override
protected void map(IntWritable key, Writable value, Context context) throws IOException, InterruptedException {
    if (key.get() == -1) {
        return;//from   w  ww .j a va2 s .co m
    }
    //Kind of ugly, but such is life
    double df = Double.NaN;
    if (value instanceof LongWritable) {
        df = ((LongWritable) value).get();
    } else if (value instanceof DoubleWritable) {
        df = ((DoubleWritable) value).get();
    }
    if (!Double.isNaN(df)) {
        // For calculating the sum of squares
        context.write(SUM_OF_SQUARES, new DoubleWritable(df * df));
        context.write(SUM, new DoubleWritable(df));
        // For calculating the total number of entries
        context.write(TOTAL_COUNT, new DoubleWritable(1));
    }
}

From source file:org.apache.mahout.math.hadoop.stats.StandardDeviationCalculatorReducer.java

License:Apache License

@Override
protected void reduce(IntWritable key, Iterable<DoubleWritable> values, Context context)
        throws IOException, InterruptedException {
    double sum = 0.0;
    for (DoubleWritable value : values) {
        sum += value.get();//from w ww  .ja v a 2  s . co m
    }
    context.write(key, new DoubleWritable(sum));
}

From source file:org.apache.mahout.utils.nlp.collocations.llr.LLRReducer.java

License:Apache License

/**
 * Perform LLR calculation, input is: k:ngram:ngramFreq v:(h_|t_)subgram:subgramfreq N = ngram total
 * /*from  w  w  w.j a v  a  2 s .  co m*/
 * Each ngram will have 2 subgrams, a head and a tail, referred to as A and B respectively below.
 * 
 * A+ B: number of times a+b appear together: ngramFreq A+!B: number of times A appears without B:
 * hSubgramFreq - ngramFreq !A+ B: number of times B appears without A: tSubgramFreq - ngramFreq !A+!B:
 * number of times neither A or B appears (in that order): N - (subgramFreqA + subgramFreqB - ngramFreq)
 */
@Override
protected void reduce(Gram ngram, Iterable<Gram> values, Context context)
        throws IOException, InterruptedException {

    int[] gramFreq = { -1, -1 };

    if (ngram.getType() == Gram.Type.UNIGRAM && emitUnigrams) {
        DoubleWritable dd = new DoubleWritable(ngram.getFrequency());
        Text t = new Text(ngram.getString());
        context.write(t, dd);
        return;
    }
    // FIXME: better way to handle errors? Wouldn't an exception thrown here
    // cause hadoop to re-try the job?
    String[] gram = new String[2];
    for (Gram value : values) {

        int pos = value.getType() == Gram.Type.HEAD ? 0 : 1;

        if (gramFreq[pos] != -1) {
            log.warn("Extra {} for {}, skipping", value.getType(), ngram);
            if (value.getType() == Gram.Type.HEAD) {
                context.getCounter(Skipped.EXTRA_HEAD).increment(1);
            } else {
                context.getCounter(Skipped.EXTRA_TAIL).increment(1);
            }
            return;
        }

        gram[pos] = value.getString();
        gramFreq[pos] = value.getFrequency();
    }

    if (gramFreq[0] == -1) {
        log.warn("Missing head for {}, skipping.", ngram);
        context.getCounter(Skipped.MISSING_HEAD).increment(1);
        return;
    } else if (gramFreq[1] == -1) {
        log.warn("Missing tail for {}, skipping", ngram);
        context.getCounter(Skipped.MISSING_TAIL).increment(1);
        return;
    }

    int k11 = ngram.getFrequency(); /* a&b */
    int k12 = gramFreq[0] - ngram.getFrequency(); /* a&!b */
    int k21 = gramFreq[1] - ngram.getFrequency(); /* !b&a */
    int k22 = (int) (ngramTotal - (gramFreq[0] + gramFreq[1] - ngram.getFrequency())); /* !a&!b */

    try {
        double llr = ll.logLikelihoodRatio(k11, k12, k21, k22);
        if (llr < minLLRValue) {
            context.getCounter(Skipped.LESS_THAN_MIN_LLR).increment(1);
            return;
        }
        DoubleWritable dd = new DoubleWritable(llr);
        Text t = new Text(ngram.getString());
        context.write(t, dd);
    } catch (IllegalArgumentException ex) {
        context.getCounter(Skipped.LLR_CALCULATION_ERROR).increment(1);
        log.error("Problem calculating LLR ratio: " + ex.getMessage());
        log.error("NGram: " + ngram);
        log.error("HEAD: " + gram[0] + ':' + gramFreq[0]);
        log.error("TAIL: " + gram[1] + ':' + gramFreq[1]);
        log.error("k11: " + k11 + " k12: " + k12 + " k21: " + k21 + " k22: " + k22);
    }
}

From source file:org.apache.mahout.vectorizer.collocations.llr.LLRReducer.java

License:Apache License

/**
 * Perform LLR calculation, input is: k:ngram:ngramFreq v:(h_|t_)subgram:subgramfreq N = ngram total
 * /*  w  w w.j av  a2s.com*/
 * Each ngram will have 2 subgrams, a head and a tail, referred to as A and B respectively below.
 * 
 * A+ B: number of times a+b appear together: ngramFreq A+!B: number of times A appears without B:
 * hSubgramFreq - ngramFreq !A+ B: number of times B appears without A: tSubgramFreq - ngramFreq !A+!B:
 * number of times neither A or B appears (in that order): N - (subgramFreqA + subgramFreqB - ngramFreq)
 */
@Override
protected void reduce(Gram ngram, Iterable<Gram> values, Context context)
        throws IOException, InterruptedException {

    int[] gramFreq = { -1, -1 };

    if (ngram.getType() == Gram.Type.UNIGRAM && emitUnigrams) {
        DoubleWritable dd = new DoubleWritable(ngram.getFrequency());
        Text t = new Text(ngram.getString());
        context.write(t, dd);
        return;
    }
    // TODO better way to handle errors? Wouldn't an exception thrown here
    // cause hadoop to re-try the job?
    String[] gram = new String[2];
    for (Gram value : values) {

        int pos = value.getType() == Gram.Type.HEAD ? 0 : 1;

        if (gramFreq[pos] != -1) {
            log.warn("Extra {} for {}, skipping", value.getType(), ngram);
            if (value.getType() == Gram.Type.HEAD) {
                context.getCounter(Skipped.EXTRA_HEAD).increment(1);
            } else {
                context.getCounter(Skipped.EXTRA_TAIL).increment(1);
            }
            return;
        }

        gram[pos] = value.getString();
        gramFreq[pos] = value.getFrequency();
    }

    if (gramFreq[0] == -1) {
        log.warn("Missing head for {}, skipping.", ngram);
        context.getCounter(Skipped.MISSING_HEAD).increment(1);
        return;
    }
    if (gramFreq[1] == -1) {
        log.warn("Missing tail for {}, skipping", ngram);
        context.getCounter(Skipped.MISSING_TAIL).increment(1);
        return;
    }

    long k11 = ngram.getFrequency(); /* a&b */
    long k12 = gramFreq[0] - ngram.getFrequency(); /* a&!b */
    long k21 = gramFreq[1] - ngram.getFrequency(); /* !b&a */
    long k22 = ngramTotal - (gramFreq[0] + gramFreq[1] - ngram.getFrequency()); /* !a&!b */

    double llr;
    try {
        llr = ll.logLikelihoodRatio(k11, k12, k21, k22);
    } catch (IllegalArgumentException ex) {
        context.getCounter(Skipped.LLR_CALCULATION_ERROR).increment(1);
        log.warn(
                "Problem calculating LLR ratio for ngram {}, HEAD {}:{}, TAIL {}:{}, k11/k12/k21/k22: {}/{}/{}/{}",
                ngram, gram[0], gramFreq[0], gram[1], gramFreq[1], k11, k12, k21, k22, ex);
        return;
    }
    if (llr < minLLRValue) {
        context.getCounter(Skipped.LESS_THAN_MIN_LLR).increment(1);
    } else {
        context.write(new Text(ngram.getString()), new DoubleWritable(llr));
    }
}

From source file:org.apache.nifi.processors.hive.TestConvertAvroToORC.java

License:Apache License

@Test
public void test_onTrigger_nested_complex_record() throws Exception {

    Map<String, List<Double>> mapData1 = new TreeMap<String, List<Double>>() {
        {//from w  ww.j  av a2s  .c  o  m
            put("key1", Arrays.asList(1.0, 2.0));
            put("key2", Arrays.asList(3.0, 4.0));
        }
    };

    Map<String, String> arrayMap11 = new TreeMap<String, String>() {
        {
            put("key1", "v1");
            put("key2", "v2");
        }
    };
    Map<String, String> arrayMap12 = new TreeMap<String, String>() {
        {
            put("key3", "v3");
            put("key4", "v4");
        }
    };

    GenericData.Record record = TestNiFiOrcUtils.buildNestedComplexAvroRecord(mapData1,
            Arrays.asList(arrayMap11, arrayMap12));

    DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema());
    DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    fileWriter.create(record.getSchema(), out);
    fileWriter.append(record);

    // Put another record in
    Map<String, List<Double>> mapData2 = new TreeMap<String, List<Double>>() {
        {
            put("key1", Arrays.asList(-1.0, -2.0));
            put("key2", Arrays.asList(-3.0, -4.0));
        }
    };

    Map<String, String> arrayMap21 = new TreeMap<String, String>() {
        {
            put("key1", "v-1");
            put("key2", "v-2");
        }
    };
    Map<String, String> arrayMap22 = new TreeMap<String, String>() {
        {
            put("key3", "v-3");
            put("key4", "v-4");
        }
    };

    record = TestNiFiOrcUtils.buildNestedComplexAvroRecord(mapData2, Arrays.asList(arrayMap21, arrayMap22));
    fileWriter.append(record);

    fileWriter.flush();
    fileWriter.close();
    out.close();

    Map<String, String> attributes = new HashMap<String, String>() {
        {
            put(CoreAttributes.FILENAME.key(), "test");
        }
    };
    runner.enqueue(out.toByteArray(), attributes);
    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);

    // Write the flow file out to disk, since the ORC Reader needs a path
    MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
    assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS nested_complex_record "
            + "(myMapOfArray MAP<STRING, ARRAY<DOUBLE>>, myArrayOfMap ARRAY<MAP<STRING, STRING>>)"
            + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
    assertEquals("2", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE));
    assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
    byte[] resultContents = runner.getContentAsByteArray(resultFlowFile);
    FileOutputStream fos = new FileOutputStream("target/test1.orc");
    fos.write(resultContents);
    fos.flush();
    fos.close();

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);
    Reader reader = OrcFile.createReader(new Path("target/test1.orc"),
            OrcFile.readerOptions(conf).filesystem(fs));
    RecordReader rows = reader.rows();
    Object o = rows.next(null);
    assertNotNull(o);
    assertTrue(o instanceof OrcStruct);
    TypeInfo resultSchema = TestNiFiOrcUtils.buildNestedComplexOrcSchema();
    StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema);

    // check values
    Object myMapOfArray = inspector.getStructFieldData(o, inspector.getStructFieldRef("myMapOfArray"));
    assertTrue(myMapOfArray instanceof Map);
    Map map = (Map) myMapOfArray;
    Object mapValue = map.get(new Text("key1"));
    assertNotNull(mapValue);
    assertTrue(mapValue instanceof List);
    assertEquals(Arrays.asList(new DoubleWritable(1.0), new DoubleWritable(2.0)), mapValue);

    Object myArrayOfMap = inspector.getStructFieldData(o, inspector.getStructFieldRef("myArrayOfMap"));
    assertTrue(myArrayOfMap instanceof List);
    List list = (List) myArrayOfMap;
    Object el0 = list.get(0);
    assertNotNull(el0);
    assertTrue(el0 instanceof Map);
    assertEquals(new Text("v1"), ((Map) el0).get(new Text("key1")));
}

From source file:org.apache.orc.mapred.TestOrcFileEvolution.java

License:Apache License

private WritableComparable assembleRecord(TypeDescription type, Object row) {
    if (row == null) {
        return null;
    }//from   ww  w .  ja v  a 2 s .  c o m
    switch (type.getCategory()) {
    case STRUCT:
        OrcStruct structResult = new OrcStruct(type);
        for (int i = 0; i < structResult.getNumFields(); i++) {
            List<TypeDescription> childTypes = type.getChildren();
            structResult.setFieldValue(i, assembleRecord(childTypes.get(i), ((List<Object>) row).get(i)));
        }
        return structResult;
    case LIST:
        OrcList<WritableComparable> listResult = new OrcList<>(type);
        TypeDescription elemType = type.getChildren().get(0);
        List<Object> elems = (List<Object>) row;
        for (int i = 0; i < elems.size(); i++) {
            listResult.add(assembleRecord(elemType, elems.get(i)));
        }
        return listResult;
    case MAP:
        OrcMap<WritableComparable, WritableComparable> mapResult = new OrcMap<>(type);
        TypeDescription keyType = type.getChildren().get(0);
        TypeDescription valueType = type.getChildren().get(1);
        for (Map.Entry<Object, Object> entry : ((Map<Object, Object>) row).entrySet()) {
            mapResult.put(assembleRecord(keyType, entry.getKey()), assembleRecord(valueType, entry.getValue()));
        }
        return mapResult;
    case INT:
        return new IntWritable((Integer) row);
    case DOUBLE:
        return new DoubleWritable((Double) row);
    case STRING:
        return new Text((String) row);
    default:
        throw new UnsupportedOperationException(
                String.format("Not expecting to have a field of type %s in unit tests", type.getCategory()));
    }
}

From source file:org.apache.orc.mapred.TestOrcOutputFormat.java

License:Apache License

@Test
public void testAllTypes() throws Exception {
    conf.set("mapreduce.task.attempt.id", "attempt_20160101_0001_m_000001_0");
    conf.setOutputCommitter(NullOutputCommitter.class);
    final String typeStr = "struct<b1:binary,b2:boolean,b3:tinyint,"
            + "c:char(10),d1:date,d2:decimal(20,5),d3:double,fff:float,int:int,"
            + "l:array<bigint>,map:map<smallint,string>,"
            + "str:struct<u:uniontype<timestamp,varchar(100)>>,ts:timestamp>";
    OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, typeStr);
    FileOutputFormat.setOutputPath(conf, workDir);
    TypeDescription type = TypeDescription.fromString(typeStr);

    // build a row object
    OrcStruct row = (OrcStruct) OrcStruct.createValue(type);
    ((BytesWritable) row.getFieldValue(0)).set(new byte[] { 1, 2, 3, 4 }, 0, 4);
    ((BooleanWritable) row.getFieldValue(1)).set(true);
    ((ByteWritable) row.getFieldValue(2)).set((byte) 23);
    ((Text) row.getFieldValue(3)).set("aaabbbcccddd");
    SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
    ((DateWritable) row.getFieldValue(4)).set(DateWritable.millisToDays(format.parse("2016-04-01").getTime()));
    ((HiveDecimalWritable) row.getFieldValue(5)).set(new HiveDecimalWritable("1.23"));
    ((DoubleWritable) row.getFieldValue(6)).set(1.5);
    ((FloatWritable) row.getFieldValue(7)).set(4.5f);
    ((IntWritable) row.getFieldValue(8)).set(31415);
    OrcList<LongWritable> longList = (OrcList<LongWritable>) row.getFieldValue(9);
    longList.add(new LongWritable(123));
    longList.add(new LongWritable(456));
    OrcMap<ShortWritable, Text> map = (OrcMap<ShortWritable, Text>) row.getFieldValue(10);
    map.put(new ShortWritable((short) 1000), new Text("aaaa"));
    map.put(new ShortWritable((short) 123), new Text("bbbb"));
    OrcStruct struct = (OrcStruct) row.getFieldValue(11);
    OrcUnion union = (OrcUnion) struct.getFieldValue(0);
    union.set((byte) 1, new Text("abcde"));
    ((OrcTimestamp) row.getFieldValue(12)).set("1996-12-11 15:00:00");
    NullWritable nada = NullWritable.get();
    RecordWriter<NullWritable, OrcStruct> writer = new OrcOutputFormat<OrcStruct>().getRecordWriter(fs, conf,
            "all.orc", Reporter.NULL);
    for (int r = 0; r < 10; ++r) {
        row.setFieldValue(8, new IntWritable(r * 10));
        writer.write(nada, row);//from  w  w w .j  ava2s. c  om
    }
    union.set((byte) 0, new OrcTimestamp("2011-12-25 12:34:56"));
    for (int r = 0; r < 10; ++r) {
        row.setFieldValue(8, new IntWritable(r * 10 + 100));
        writer.write(nada, row);
    }
    OrcStruct row2 = new OrcStruct(type);
    writer.write(nada, row2);
    row.setFieldValue(8, new IntWritable(210));
    writer.write(nada, row);
    writer.close(Reporter.NULL);

    FileSplit split = new FileSplit(new Path(workDir, "all.orc"), 0, 100000, new String[0]);
    RecordReader<NullWritable, OrcStruct> reader = new OrcInputFormat<OrcStruct>().getRecordReader(split, conf,
            Reporter.NULL);
    nada = reader.createKey();
    row = reader.createValue();
    for (int r = 0; r < 22; ++r) {
        assertEquals(true, reader.next(nada, row));
        if (r == 20) {
            for (int c = 0; c < 12; ++c) {
                assertEquals(null, row.getFieldValue(c));
            }
        } else {
            assertEquals(new BytesWritable(new byte[] { 1, 2, 3, 4 }), row.getFieldValue(0));
            assertEquals(new BooleanWritable(true), row.getFieldValue(1));
            assertEquals(new ByteWritable((byte) 23), row.getFieldValue(2));
            assertEquals(new Text("aaabbbcccd"), row.getFieldValue(3));
            assertEquals(new DateWritable(DateWritable.millisToDays(format.parse("2016-04-01").getTime())),
                    row.getFieldValue(4));
            assertEquals(new HiveDecimalWritable("1.23"), row.getFieldValue(5));
            assertEquals(new DoubleWritable(1.5), row.getFieldValue(6));
            assertEquals(new FloatWritable(4.5f), row.getFieldValue(7));
            assertEquals(new IntWritable(r * 10), row.getFieldValue(8));
            assertEquals(longList, row.getFieldValue(9));
            assertEquals(map, row.getFieldValue(10));
            if (r < 10) {
                union.set((byte) 1, new Text("abcde"));
            } else {
                union.set((byte) 0, new OrcTimestamp("2011-12-25 12:34:56"));
            }
            assertEquals("row " + r, struct, row.getFieldValue(11));
            assertEquals("row " + r, new OrcTimestamp("1996-12-11 15:00:00"), row.getFieldValue(12));
        }
    }
    assertEquals(false, reader.next(nada, row));
}