Example usage for org.apache.hadoop.io MapWritable MapWritable

List of usage examples for org.apache.hadoop.io MapWritable MapWritable

Introduction

In this page you can find the example usage for org.apache.hadoop.io MapWritable MapWritable.

Prototype

public MapWritable() 

Source Link

Document

Default constructor.

Usage

From source file:org.wonderbee.elasticsearch.hive.ElasticSearchSerDe.java

License:Apache License

@Override
public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
    StructObjectInspector outputRowOI = (StructObjectInspector) objInspector;
    List<? extends StructField> outputFieldRefs = outputRowOI.getAllStructFieldRefs();
    MapWritable record = new MapWritable();

    String isJson = props.getProperty(ES_IS_JSON);
    if ("true".equalsIgnoreCase(isJson)) {
        throw new SerDeException("Json mode not yet supported");
    }//from ww w  .j a  va  2s .c  om
    // Handle delimited records (ie. isJson == false)

    for (int c = 0; c < numColumns; c++) {
        try {
            Object field = outputRowOI.getStructFieldData(obj, outputFieldRefs.get(c));
            ObjectInspector fieldOI = outputFieldRefs.get(c).getFieldObjectInspector();

            PrimitiveObjectInspector fieldStringOI = (PrimitiveObjectInspector) fieldOI;
            String columnName = columnNames.get(c);
            record.put(new Text(columnName), (Writable) fieldStringOI.getPrimitiveWritableObject(field));
        } catch (NullPointerException e) {
            //LOG.info("Increment null field counter.");
        }

    }

    return record;
}

From source file:org.wonderbee.elasticsearch.hive.ElasticSearchSerDe.java

License:Apache License

/**
 * Recursively converts an arbitrary object into the appropriate writable. Please enlighten me if there is an existing
 * method for doing this.// w  ww.j a  v  a2s. c o  m
 */
private Writable toWritable(Object thing) {
    if (thing instanceof String) {
        return new Text((String) thing);
    } else if (thing instanceof Long) {
        return new LongWritable((Long) thing);
    } else if (thing instanceof Integer) {
        return new IntWritable((Integer) thing);
    } else if (thing instanceof Double) {
        return new DoubleWritable((Double) thing);
    } else if (thing instanceof Float) {
        return new FloatWritable((Float) thing);
    } else if (thing instanceof Boolean) {
        return new BooleanWritable((Boolean) thing);
    } else if (thing instanceof Map) {
        MapWritable result = new MapWritable();
        for (Map.Entry<String, Object> entry : ((Map<String, Object>) thing).entrySet()) {
            result.put(new Text(entry.getKey().toString()), toWritable(entry.getValue()));
        }
        return result;
    } else if (thing instanceof List) {
        if (((List) thing).size() > 0) {
            Object first = ((List) thing).get(0);
            Writable[] listOfThings = new Writable[((List) thing).size()];
            for (int i = 0; i < listOfThings.length; i++) {
                listOfThings[i] = toWritable(((List) thing).get(i));
            }
            return new ArrayWritable(toWritable(first).getClass(), listOfThings);
        }
    }
    return NullWritable.get();
}

From source file:smile.wide.AttributeValueHistogram.java

License:Apache License

@Override
public int run(String[] arg) throws Exception {

    if (arg.length < 2) {
        s_logger.fatal("Usage: AttributeValueHistogram <infile> <outfile>");
        // TODO: return an error code?
    }/*from   w  ww .jav  a 2 s .  c o m*/

    s_logger.debug("Got " + arg.length + " arguments");

    inPath_ = arg[0];
    s_logger.info("Input path is " + inPath_);

    // parse the key-value arguments passed - by now these are the arguments
    // specific to AttributeValueHistogram
    for (int i = 1; i < arg.length; ++i) {
        String[] tokens = arg[i].split("=");
        if (tokens.length != 2) {
            s_logger.fatal("Can't parse argument" + arg[i]);
        }

        if (tokens[0].equals("xdata.bayesnets.datasetreader.class")) {
            readerClass_ = tokens[1].trim();
            s_logger.debug("Set reader class to " + readerClass_);
        } else if (tokens[0].equals("xdata.bayesnets.datasetreader.filter")) {
            readerFilter_ = tokens[1].trim();
            s_logger.debug("Set reader filter to " + readerFilter_);
        } else if (tokens[0].equals("xdata.bayesnets.datasetreader.instid")) {
            readerInstID_ = tokens[1].trim();
            s_logger.debug("Set reader's instance ID column to " + readerInstID_);
        } else if (tokens[0].equals("xdata.bayesnets.datasetreader.variablenames")) {
            variableNames_ = tokens[1].trim();
            s_logger.debug("Set reader's variable names to " + variableNames_);
        } else {
            s_logger.warn("Unknown argument " + arg[i]);
        }
    }

    conf_ = getConf();

    // pass the reader class to the mapper, in jobconf      
    // TODO: use setClass here - fails early if wrong, not in the mapper
    conf_.set("xdata.bayesnets.datasetreader.class", readerClass_);
    conf_.set("xdata.bayesnets.datasetreader.filter", readerFilter_);
    // conf_.set("xdata.bayesnets.datasetreader.instid", readerInstID_); // not used
    conf_.set("xdata.bayesnets.datasetreader.variablenames", variableNames_);

    conf_.setBoolean("mapred.compress.map.output", true); // compress intermediate data
    conf_.set("mapred.output.compression.type", CompressionType.BLOCK.toString()); // by block, to keep splittable
    conf_.setClass("mapred.map.output.compression.codec", GzipCodec.class, CompressionCodec.class);

    // for debugging               
    conf_.set("keep.failed.task.files", "true");
    conf_.set("keep.failed.task.pattern", "*");

    Job job = new Job(conf_);

    job.setJarByClass(AttributeValueHistogram.class); // use this jar
    job.setJobName("Collect value histograms by attribute");

    FileInputFormat.addInputPath(job, new Path(inPath_));

    int rnd = (new Random()).nextInt();
    lastWorkingDir_ = job.getWorkingDirectory().toUri();
    s_logger.info("Job working directory is " + lastWorkingDir_);
    String tempDirName = job.getWorkingDirectory() + "/tmp/attvalhist" + rnd + ".tmp";
    s_logger.info("Temp files in directory " + tempDirName);
    FileOutputFormat.setOutputPath(job, new Path(tempDirName));

    job.setMapperClass(AttributeValueHistogramMapper.class);
    job.setCombinerClass(AttributeValueHistogramReducer.class);
    job.setReducerClass(AttributeValueHistogramReducer.class);

    // set both the map and reduce in/out classes
    job.setOutputKeyClass(Text.class); // the name of the attribute
    job.setOutputValueClass(MapWritable.class); // Value -> count map
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    // run'em
    int result = job.waitForCompletion(true) ? 0 : 16;

    // retain the temp file, collect the output      
    attributeValues_ = new TreeMap<String, Map<String, Integer>>();

    FileSystem fs = FileSystem.get(conf_);
    SequenceFile.Reader reader = null;

    Path resPath = new Path(tempDirName);
    FileStatus[] stats = fs.listStatus(resPath);

    // read all output files
    for (FileStatus stat : stats) {
        if (stat.getPath().toUri().toString().contains("part-r-"))
            try {
                s_logger.info("Reading results from " + stat.getPath());
                reader = new SequenceFile.Reader(fs, stat.getPath(), conf_);
                // Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf_);
                // MapWritable value = (MapWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf_);
                Text key = new Text();
                MapWritable value = new MapWritable();

                while (reader.next(key, value)) {
                    TreeMap<String, Integer> valueCounts = new TreeMap<String, Integer>();
                    for (Writable attValue : value.keySet()) {
                        valueCounts.put(((Text) attValue).toString(),
                                ((IntWritable) (value.get(attValue))).get());
                    }
                    attributeValues_.put(key.toString(), valueCounts);
                }
            } finally {
                IOUtils.closeStream(reader);
            }
    }

    fs.deleteOnExit(resPath);

    return result;
}

From source file:smile.wide.AttributeValueHistogramMapper.java

License:Apache License

@SuppressWarnings("unchecked")
@Override//from  w w  w. j  a v  a2 s .  co m
public void map(LongWritable offsetkey, Text value, Context context) {
    if (initializing_) {
        conf_ = context.getConfiguration();
        fileReaderClass_ = conf_.get("xdata.bayesnets.datasetreader.class");
        fileReaderFilter_ = conf_.get("xdata.bayesnets.datasetreader.filter");
        columnNames_ = conf_.get("xdata.bayesnets.datasetreader.variablenames").split(",");
        assertEquals(columnNames_.length, fileReaderFilter_.split(",").length);

        try {
            Object r = Class.forName(fileReaderClass_).newInstance();
            reader_ = (DataSetReader<Integer, String>) r;
        } catch (InstantiationException e) {
            s_logger.error("Instantiation exception for DataSetReader " + fileReaderClass_);
            e.printStackTrace();
            System.exit(1);
        } catch (IllegalAccessException e) {
            s_logger.error("IllegalAccess exception for DataSetReader " + fileReaderClass_);
            e.printStackTrace();
            System.exit(1);
        } catch (ClassNotFoundException e) {
            s_logger.error("ClassDefNotFoundException for DataSetReader " + fileReaderClass_);
            e.printStackTrace();
            System.exit(1);
        } catch (ClassCastException e) {
            s_logger.error("ClassCastException for DataSetReader " + fileReaderClass_);
            e.printStackTrace();
            System.exit(1);
        }
        reader_.setFilter(fileReaderFilter_);
        reader_.setInstanceIDColumn(1); // doesn't matter, won't use

        initializing_ = false;
    }

    // we're initialized

    Instance<Integer, String> inst = reader_.parseLine(value.toString());
    String[] vals = inst.getValue();

    try {
        for (int i = 0; i < vals.length; ++i) {
            MapWritable mw = new MapWritable();
            mw.put(new Text(vals[i]), new IntWritable(1));
            context.write(new Text(columnNames_[i]), mw);
        }
    } catch (IOException e) {
        s_logger.error("I/O exception writing  the map output");
        e.printStackTrace();
    } catch (InterruptedException e) {
        s_logger.error("Interrupted writing the map output");
        e.printStackTrace();
    } catch (NullPointerException e) {
        s_logger.error("Null pointer, probably unexpected data");
        s_logger.error("Instance ID = " + inst.getID());
        for (int i = 0; i < inst.getValue().length; ++i) {
            s_logger.error("Attribute_" + i + " = " + inst.getValue()[i]);
        }
        ;
    }

}

From source file:smile.wide.AttributeValueHistogramReducer.java

License:Apache License

@Override
public void reduce(Text key, Iterable<MapWritable> values, Context context)
        throws IOException, InterruptedException {
    // Let's have a map and internally collect them

    int maps = 0;
    int vals = 0;

    HashMap<Text, Integer> myMap = new HashMap<Text, Integer>();

    for (MapWritable m : values) {
        maps++;//from  w  w  w  .  j a v a 2s .  c o m
        for (Writable valName : m.keySet()) {

            Text val = (Text) valName;
            Integer count = ((IntWritable) (m.get(valName))).get();
            if (myMap.containsKey(val)) {
                myMap.put(val, myMap.get(val) + count);
            } else {
                myMap.put(val, count);
                vals++;
            }
        }
    }

    s_logger.debug("Reducer/combiner got " + maps + " maps, with a total of " + vals
            + " distinct values for attribute `" + key + "`");

    // now output
    // key is key 
    // value is myMap as MapWritable<Text, IntWritable>

    MapWritable output = new MapWritable();
    for (Text t : myMap.keySet()) {
        s_logger.debug("Outputting count " + myMap.get(t) + " for attribute " + t);
        output.put(t, new IntWritable(myMap.get(t)));
    }

    context.write(key, output);

}

From source file:tl.lin.data.benchmark.basic.BenchmarkHashMapWritable.java

License:Apache License

/**
 * Runs this benchmark.// w  w  w.j  av  a2 s. c  o  m
 */
public static void main(String[] args) throws Exception {
    long startTime = System.currentTimeMillis();
    int numTrials = 100000;

    Random rand = new Random();

    ByteArrayOutputStream[] storageHashMapWritable = new ByteArrayOutputStream[numTrials];
    for (int i = 0; i < numTrials; i++) {
        HashMapWritable<IntWritable, IntWritable> map = new HashMapWritable<IntWritable, IntWritable>();

        int size = rand.nextInt(50) + 50;

        for (int j = 0; j < size; j++) {
            map.put(new IntWritable(rand.nextInt(10000)), new IntWritable(rand.nextInt(10)));
        }

        ByteArrayOutputStream bytesOut = new ByteArrayOutputStream();
        DataOutputStream dataOut = new DataOutputStream(bytesOut);

        map.write(dataOut);
        storageHashMapWritable[i] = bytesOut;
    }

    System.out.println("Generating and serializing " + numTrials + " random HashMapWritables: "
            + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    startTime = System.currentTimeMillis();

    ByteArrayOutputStream[] storageMapWritable = new ByteArrayOutputStream[numTrials];
    for (int i = 0; i < numTrials; i++) {
        MapWritable map = new MapWritable();

        int size = rand.nextInt(50) + 50;

        for (int j = 0; j < size; j++) {
            map.put(new IntWritable(rand.nextInt(10000)), new IntWritable(rand.nextInt(10)));
        }

        ByteArrayOutputStream bytesOut = new ByteArrayOutputStream();
        DataOutputStream dataOut = new DataOutputStream(bytesOut);

        map.write(dataOut);
        storageMapWritable[i] = bytesOut;
    }

    System.out.println("Generating and serializing " + numTrials + " random MapWritables: "
            + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    float cntA = 0.0f;
    float cntB = 0.0f;
    for (int i = 0; i < numTrials; i++) {
        cntA += storageHashMapWritable[i].size();
        cntB += storageMapWritable[i].size();
    }

    System.out.println("Average size of each HashMapWritable: " + cntA / numTrials);
    System.out.println("Average size of each MapWritable: " + cntB / numTrials);

    startTime = System.currentTimeMillis();

    for (int i = 0; i < numTrials; i++) {
        HashMapWritable<IntWritable, IntWritable> map = new HashMapWritable<IntWritable, IntWritable>();

        map.readFields(new DataInputStream(new ByteArrayInputStream(storageHashMapWritable[i].toByteArray())));
    }

    System.out.println("Deserializing " + numTrials + " random HashMapWritables: "
            + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    startTime = System.currentTimeMillis();

    for (int i = 0; i < numTrials; i++) {
        MapWritable map = new MapWritable();

        map.readFields(new DataInputStream(new ByteArrayInputStream(storageMapWritable[i].toByteArray())));
    }

    System.out.println("Deserializing " + numTrials + " random MapWritables: "
            + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

}

From source file:uk.ac.cam.eng.extraction.hadoop.util.ExtractorDataLoader.java

License:Apache License

/**
 * Loads word aligned parallel text to HDFS.
 * //  ww  w  .j  ava  2 s  .c  o m
 * @param sourceTextFile The source text file, gzipped, with one sentence
 * per line, same number of lines as targetTextFile.
 * @param targetTextFile The target text file, gzipped, with one sentence
 * per line, same number of lines as sourceTextFile.
 * @param wordAlignmentFile The word alignment file, gzipped, one alignment
 * per line in Berkeley format ("0-0<SPACE>1-2, etc.", zero-based source
 * index on the left), same number of lines as sourceTextFile.
 * @param provenanceFile The provenance file, gzipped, one set of
 * provenances per line with format "prov1<SPACE>prov2, etc.", same number
 * of lines as sourceTextFile.
 * @param hdfsName
 * @throws IOException
 */
public void loadTrainingData2Hdfs(String sourceTextFile, String targetTextFile, String wordAlignmentFile,
        String provenanceFile, String hdfsName) throws FileNotFoundException, IOException {

    try (BufferedReader src = new BufferedReader(
            new InputStreamReader(new GZIPInputStream(new FileInputStream(sourceTextFile))));
            BufferedReader trg = new BufferedReader(
                    new InputStreamReader(new GZIPInputStream(new FileInputStream(targetTextFile))));
            BufferedReader align = new BufferedReader(
                    new InputStreamReader(new GZIPInputStream(new FileInputStream(wordAlignmentFile))));
            BufferedReader prov = new BufferedReader(
                    new InputStreamReader(new GZIPInputStream(new FileInputStream(provenanceFile))))) {

        String srcLine = null, trgLine = null, alignLine = null, provLine = null;
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(conf);
        Path path = new Path(hdfsName);
        try (SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, MapWritable.class,
                TextArrayWritable.class)) {
            Text sourceSentenceText = new Text();
            Text targetSentenceText = new Text();
            Text alignmentText = new Text();
            Text[] array = new Text[3];
            array[0] = sourceSentenceText;
            array[1] = targetSentenceText;
            array[2] = alignmentText;
            TextArrayWritable arrayWritable = new TextArrayWritable();
            // metadata: provenance, e.g. genre, collection, training
            // instance
            // id, doc id, etc.
            MapWritable metadata = new MapWritable();

            while ((srcLine = src.readLine()) != null && (trgLine = trg.readLine()) != null
                    && (alignLine = align.readLine()) != null && (provLine = prov.readLine()) != null) {
                metadata.clear();
                String[] provenances = provLine.split("\\s+");
                for (String provenance : provenances) {
                    metadata.put(new Text(provenance), NullWritable.get());
                }
                sourceSentenceText.set(srcLine);
                targetSentenceText.set(trgLine);
                // note, alignLine can be the empty string
                alignmentText.set(alignLine);
                arrayWritable.set(array);
                writer.append(metadata, arrayWritable);
            }
        }
    }
}