List of usage examples for org.apache.hadoop.io MapWritable MapWritable
public MapWritable()
From source file:com.ikanow.aleph2.search_service.elasticsearch.hadoop.assets.TestAleph2EsInputFormat.java
License:Apache License
@Test public void test_Aleph2EsRecordReader_objectConversion() throws IOException, InterruptedException { @SuppressWarnings("rawtypes") final RecordReader mock_shard_record_reader = Mockito.mock(RecordReader.class); // mock returns Text key, MapWritable value Mockito.when(mock_shard_record_reader.getCurrentKey()).thenReturn(new Text("text_test")); final MapWritable test_out = new MapWritable(); test_out.put(new Text("val_key_text"), new Text("val_val_text")); Mockito.when(mock_shard_record_reader.getCurrentValue()).thenReturn(test_out); final Aleph2EsRecordReader reader_under_test = new Aleph2EsRecordReader(mock_shard_record_reader); final String key = reader_under_test.getCurrentKey(); assertEquals(String.class, key.getClass()); assertEquals("text_test", key); final Tuple2<Long, IBatchRecord> value = reader_under_test.getCurrentValue(); assertEquals(0L, value._1().longValue()); // (so something breaks in here when/if we put some logic in) assertEquals(Optional.empty(), value._2().getContent()); final JsonNode json_val = value._2().getJson(); assertTrue("Is object: " + json_val, json_val.isObject()); assertEquals("val_val_text", json_val.get("val_key_text").asText()); assertEquals("text_test", json_val.get("_id").asText()); }
From source file:com.ikanow.aleph2.search_service.elasticsearch.utils.TestJsonNodeWritableUtils.java
License:Apache License
@SuppressWarnings("deprecation") @Test/*w w w.j a va2 s . c o m*/ public void test_mapWritableWrapper() { final ObjectMapper mapper = BeanTemplateUtils.configureMapper(Optional.empty()); final MapWritable m1 = new MapWritable(); m1.put(new Text("test1"), new BooleanWritable(true)); final MapWritable m2 = new MapWritable(); m2.put(new Text("nested"), m1); m2.put(new Text("test2"), new Text("test2")); final ArrayWritable a1 = new ArrayWritable(IntWritable.class); a1.set(new Writable[] { new IntWritable(4), new IntWritable(5) }); final ArrayWritable a2 = new ArrayWritable(MapWritable.class); a2.set(new Writable[] { m1, m1 }); m2.put(new Text("array"), a2); m1.put(new Text("array"), a1); final JsonNode j2 = JsonNodeWritableUtils.from(m2); assertEquals(3, j2.size()); // Check j's contents assertEquals(Stream.of("nested", "test2", "array").sorted().collect(Collectors.toList()), Optionals.streamOf(j2.fieldNames(), false).sorted().collect(Collectors.toList())); assertEquals("test2", j2.get("test2").asText()); final JsonNode j1 = j2.get("nested"); assertEquals(2, j1.size()); final JsonNode j1b = JsonNodeWritableUtils.from(m1); assertTrue("{\"test1\":true,\"array\":[4,5]}".equals(j1b.toString()) || "{\"array\":[4,5],\"test1\":true}".equals(j1b.toString())); //(tests entrySet) final ArrayNode an = mapper.createArrayNode(); an.add(mapper.convertValue(4, JsonNode.class)); an.add(mapper.convertValue(5, JsonNode.class)); assertEquals(Arrays.asList(mapper.convertValue(true, JsonNode.class), an), Optionals.streamOf(((ObjectNode) j1).elements(), false).collect(Collectors.toList())); // OK, now test adding: assertEquals(2, j1.size()); final ObjectNode o1 = (ObjectNode) j1; o1.put("added", "added_this"); final ObjectNodeWrapper o1c = (ObjectNodeWrapper) o1; assertFalse(o1c.containsKey("not_present")); assertTrue(o1c.containsKey("added")); assertTrue(o1c.containsKey("test1")); assertEquals(Stream.of("test1", "array", "added").sorted().collect(Collectors.toList()), Optionals.streamOf(j1.fieldNames(), false).sorted().collect(Collectors.toList())); assertEquals( Arrays.asList(mapper.convertValue(true, JsonNode.class), an, mapper.convertValue("added_this", JsonNode.class)), Optionals.streamOf(((ObjectNode) j1).elements(), false).collect(Collectors.toList())); assertTrue(j1.toString().contains("added_this")); assertTrue(j1.toString().contains("4,5")); assertEquals(mapper.convertValue("added_this", JsonNode.class), j1.get("added")); assertEquals(3, j1.size()); // OK now test removing: assertEquals(null, o1.remove("not_present")); assertEquals(mapper.convertValue(true, JsonNode.class), o1.remove("test1")); assertEquals(2, o1.size()); ObjectNode o1b = o1.remove(Arrays.asList("added", "array")); assertEquals(0, o1.size()); assertEquals(0, o1b.size()); o1.putAll(JsonNodeWritableUtils.from(m1)); // will be minus one object assertEquals(2, o1.size()); assertTrue(o1c.containsValue(mapper.convertValue(true, JsonNode.class))); assertFalse(o1c.containsValue("banana")); final ObjectNodeWrapper o2 = (ObjectNodeWrapper) JsonNodeWritableUtils.from(m2); assertFalse(o2.isEmpty()); assertTrue(o2.containsKey("array")); assertFalse(o2.containsValue("array")); assertTrue(o2.containsValue(mapper.convertValue("test2", JsonNode.class))); assertEquals(TextNode.class, o2.remove("test2").getClass()); assertEquals(2, o2.size()); o2.removeAll(); assertEquals(0, o2.size()); }
From source file:com.jfolson.hive.serde.RTypedBytesWritableInput.java
License:Apache License
public MapWritable readMap(MapWritable mw) throws IOException { if (mw == null) { mw = new MapWritable(); }//from w w w . j a v a 2 s .c o m int length = in.readMapHeader(); for (int i = 0; i < length; i++) { Writable key = read(); Writable value = read(); mw.put(key, value); } return mw; }
From source file:com.ML_Hadoop.NaiveBayesClassifier_Continuous_Features.NaiveBayesClassifierMap_Continuous_Features.java
@Override // is used as innermap to aggregate data before shuffling protected void cleanup(Context context) throws IOException, InterruptedException { //features_probabilities.put(class_id, features); Float[] sigma_x2 = new Float[number_of_features]; Float[] sigma_x = new Float[number_of_features]; Float[] mu_x_local = new Float[number_of_features]; Float[] num_x_local = new Float[number_of_features]; MapWritable[] map_output = new MapWritable[number_of_features]; // It is a MUST to initilize all arrays before usage. for (int class_id = 0; class_id < number_of_classes; class_id++) { for (int i = 0; i < number_of_features; i++) { map_output[i] = new MapWritable(); // the way to initilize MapWritable[] sigma_x2[i] = 0.0f;/*from w w w. ja v a 2 s. c o m*/ sigma_x[i] = 0.0f; mu_x_local[i] = 0.0f; num_x_local[i] = 0.0f; } for (int member_id_in_a_class_id = 0; member_id_in_a_class_id < num_of_members_in_each_class[class_id]; member_id_in_a_class_id++) { for (int feature_id_in_a_member_id = 0; feature_id_in_a_member_id < number_of_features; feature_id_in_a_member_id++) { sigma_x[feature_id_in_a_member_id] += (features_probabilities.get(class_id) .get(member_id_in_a_class_id))[feature_id_in_a_member_id]; sigma_x2[feature_id_in_a_member_id] += (features_probabilities.get(class_id) .get(member_id_in_a_class_id))[feature_id_in_a_member_id] * ((features_probabilities.get(class_id) .get(member_id_in_a_class_id))[feature_id_in_a_member_id]); } } for (int feature_id_in_a_member_id = 0; feature_id_in_a_member_id < number_of_features; feature_id_in_a_member_id++) { num_x_local[feature_id_in_a_member_id] = (float) num_of_members_in_each_class[class_id]; if (num_x_local[feature_id_in_a_member_id] == 0) mu_x_local[feature_id_in_a_member_id] = 0.0f; else mu_x_local[feature_id_in_a_member_id] = sigma_x[feature_id_in_a_member_id] / num_x_local[feature_id_in_a_member_id]; } for (int feature_id_in_a_member_id = 0; feature_id_in_a_member_id < number_of_features; feature_id_in_a_member_id++) { // key of MAP must be Writable (i.e., new Text("...")), but new string("...") is wrong. // value of MAP must be Writable or one subset !!! like FloatWritable map_output[feature_id_in_a_member_id].put(new Text("sigma_x"), new FloatWritable(sigma_x[feature_id_in_a_member_id])); map_output[feature_id_in_a_member_id].put(new Text("sigma_x2"), new FloatWritable(sigma_x2[feature_id_in_a_member_id])); map_output[feature_id_in_a_member_id].put(new Text("mu_x_local"), new FloatWritable(mu_x_local[feature_id_in_a_member_id])); map_output[feature_id_in_a_member_id].put(new Text("num_x_local"), new FloatWritable(num_x_local[feature_id_in_a_member_id])); } context.write(new LongWritable(class_id), new MapArrayWritable(map_output)); } }
From source file:com.ML_Hadoop.NaiveBayesClassifier_Continuous_Features.NaiveBayesClassifierReduce_Continuous_Features.java
@Override // necessary otherwise it runs default reduce() public void reduce(LongWritable key, Iterable<MapArrayWritable> values, Context context) throws IOException, InterruptedException { int key_index = (int) key.get(); Float[] sigma_x2 = new Float[number_of_features]; Float[] mu_x = new Float[number_of_features]; Float[] num_x = new Float[number_of_features]; Float[] partial_num_x = new Float[number_of_features]; Float[] total_num_x = new Float[number_of_features]; Float[] class_id_mu = new Float[number_of_features]; Float[] class_id_std = new Float[number_of_features]; MapWritable[] t = new MapWritable[number_of_features]; // It is a MUST to initilize all arrays before usage. for (int i = 0; i < number_of_features; i++) { t[i] = new MapWritable(); // each member of an array (including MapWritable[] ) MUST be initilized before use sigma_x2[i] = 0.0f;//from w w w. j a v a2 s.c o m mu_x[i] = 0.0f; num_x[i] = 0.0f; partial_num_x[i] = 0.0f; total_num_x[i] = 0.0f; class_id_mu[i] = 0.0f; class_id_std[i] = 0.0f; } for (MapArrayWritable val : values) { for (int i = 0; i < number_of_features; i++) { num_x[i] = ((FloatWritable) ((MapWritable) (val.get()[i])).get(new Text("num_x_local"))).get(); sigma_x2[i] += ((FloatWritable) ((MapWritable) (val.get()[i])).get(new Text("sigma_x2"))).get(); mu_x[i] = ((FloatWritable) ((MapWritable) (val.get()[i])).get(new Text("mu_x_local"))).get(); partial_num_x[i] += mu_x[i] * num_x[i]; // calculates mu(i)*N(i) total_num_x[i] += num_x[i]; // calculates total N=N1+N2+...+Nk } } for (int i = 0; i < number_of_features & total_num_x[0] != 0; i++) { class_id_mu[i] = partial_num_x[i] / total_num_x[i]; class_id_std[i] = sigma_x2[i] / total_num_x[i] - (class_id_mu[i] * class_id_mu[i]); } for (int i = 0; i < number_of_features & total_num_x[0] != 0; i++) { t[i].put(new Text("class_id_mu"), new FloatWritable(class_id_mu[i])); t[i].put(new Text("class_id_std"), new FloatWritable(class_id_std[i])); } probablity_info_output.set(key_index, t); }
From source file:com.shmsoft.dmass.main.FileProcessor.java
License:Apache License
/** * Create a map/*from ww w .j av a 2s . com*/ * * @param metadata Hadoop metadata to insert into map * @param fileName File currently in process * @return Created map * @throws IOException */ private MapWritable createMapWritable(Metadata metadata, String fileName) throws IOException { MapWritable mapWritable = new MapWritable(); String[] names = metadata.names(); for (String name : names) { mapWritable.put(new Text(name), new Text(metadata.get(name))); } byte[] bytes = new File(fileName).length() < ParameterProcessing.ONE_GIG ? Util.getFileContent(fileName) : "File too large".getBytes(); mapWritable.put(new Text(ParameterProcessing.NATIVE), new BytesWritable(bytes)); if (isPdf()) { String pdfFileName = fileName + ".pdf"; if (new File(pdfFileName).exists()) { byte[] pdfBytes = Util.getFileContent(pdfFileName); mapWritable.put(new Text(ParameterProcessing.NATIVE_AS_PDF), new BytesWritable(pdfBytes)); } } return mapWritable; }
From source file:com.shmsoft.dmass.main.ZipFileProcessor.java
License:Apache License
/** * Create a map/*from ww w . j a v a 2s . c o m*/ * * @param metadata Tika class of key/value pairs to place in map * @return MapWritable with key/value pairs added */ private MapWritable createMapWritable(Metadata metadata) { MapWritable mapWritable = new MapWritable(); String[] names = metadata.names(); for (String name : names) { String value = metadata.get(name); // TODO how could value be null? (but it did happen to me) if (value == null) { value = ""; } mapWritable.put(new Text(name), new Text(value)); } return mapWritable; }
From source file:crunch.MaxTemperature.java
License:Apache License
@Test public void mapWritable() throws IOException { // vv MapWritableTest MapWritable src = new MapWritable(); src.put(new IntWritable(1), new Text("cat")); src.put(new VIntWritable(2), new LongWritable(163)); MapWritable dest = new MapWritable(); WritableUtils.cloneInto(dest, src); assertThat((Text) dest.get(new IntWritable(1)), is(new Text("cat"))); assertThat((LongWritable) dest.get(new VIntWritable(2)), is(new LongWritable(163))); // ^^ MapWritableTest }/*from w w w .j a v a2 s . c o m*/
From source file:crunch.MaxTemperature.java
License:Apache License
@Test public void setWritableEmulation() throws IOException { MapWritable src = new MapWritable(); src.put(new IntWritable(1), NullWritable.get()); src.put(new IntWritable(2), NullWritable.get()); MapWritable dest = new MapWritable(); WritableUtils.cloneInto(dest, src); assertThat(dest.containsKey(new IntWritable(1)), is(true)); }/* www . ja v a 2s. com*/
From source file:de.averbis.eucases.outlinkmeta.nutch.parse.OutlinkMetaParseFilter.java
License:Open Source License
/** * Creates the metadata for an outlink according to the configured fields. Empty fields are not added. * /*from w w w . j av a 2 s . c om*/ * @param metadata * The metadata created by the previous parsers * @return The metadata for the outlink */ private MapWritable createOutlinkAnnotations(Metadata metadata) { MapWritable md = new MapWritable(); for (String field : this.getFields()) { NutchField nutchField = new NutchField(); for (String value : metadata.getValues(field)) { nutchField.add(value); } if (nutchField.getValues().size() > 0) { md.put(new Text(field), nutchField); } } return md; }