List of usage examples for org.apache.hadoop.io FloatWritable FloatWritable
public FloatWritable(float value)
From source file:com.uber.hoodie.hadoop.realtime.AbstractRealtimeRecordReader.java
License:Apache License
/** * Convert the projected read from delta record into an array writable *//*w w w. j a v a 2s . c o m*/ public static Writable avroToArrayWritable(Object value, Schema schema) { if (value == null) { return null; } switch (schema.getType()) { case STRING: return new Text(value.toString()); case BYTES: return new BytesWritable((byte[]) value); case INT: return new IntWritable((Integer) value); case LONG: return new LongWritable((Long) value); case FLOAT: return new FloatWritable((Float) value); case DOUBLE: return new DoubleWritable((Double) value); case BOOLEAN: return new BooleanWritable((Boolean) value); case NULL: return null; case RECORD: GenericRecord record = (GenericRecord) value; Writable[] recordValues = new Writable[schema.getFields().size()]; int recordValueIndex = 0; for (Schema.Field field : schema.getFields()) { recordValues[recordValueIndex++] = avroToArrayWritable(record.get(field.name()), field.schema()); } return new ArrayWritable(Writable.class, recordValues); case ENUM: return new Text(value.toString()); case ARRAY: GenericArray arrayValue = (GenericArray) value; Writable[] arrayValues = new Writable[arrayValue.size()]; int arrayValueIndex = 0; for (Object obj : arrayValue) { arrayValues[arrayValueIndex++] = avroToArrayWritable(obj, schema.getElementType()); } // Hive 1.x will fail here, it requires values2 to be wrapped into another ArrayWritable return new ArrayWritable(Writable.class, arrayValues); case MAP: Map mapValue = (Map) value; Writable[] mapValues = new Writable[mapValue.size()]; int mapValueIndex = 0; for (Object entry : mapValue.entrySet()) { Map.Entry mapEntry = (Map.Entry) entry; Writable[] nestedMapValues = new Writable[2]; nestedMapValues[0] = new Text(mapEntry.getKey().toString()); nestedMapValues[1] = avroToArrayWritable(mapEntry.getValue(), schema.getValueType()); mapValues[mapValueIndex++] = new ArrayWritable(Writable.class, nestedMapValues); } // Hive 1.x will fail here, it requires values3 to be wrapped into another ArrayWritable return new ArrayWritable(Writable.class, mapValues); case UNION: List<Schema> types = schema.getTypes(); if (types.size() != 2) { throw new IllegalArgumentException("Only support union with 2 fields"); } Schema s1 = types.get(0); Schema s2 = types.get(1); if (s1.getType() == Schema.Type.NULL) { return avroToArrayWritable(value, s2); } else if (s2.getType() == Schema.Type.NULL) { return avroToArrayWritable(value, s1); } else { throw new IllegalArgumentException("Only support union with null"); } case FIXED: return new BytesWritable(((GenericFixed) value).bytes()); default: return null; } }
From source file:com.yahoo.sketches.hive.theta.DataToSketchUDAFTest.java
License:Apache License
@Test public void partial1ModeStringValuesExplicitParameters() throws Exception { ObjectInspector[] inspectors = new ObjectInspector[] { stringInspector, intConstantInspector, floatConstantInspector, longConstantInspector }; GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false); GenericUDAFEvaluator eval = new DataToSketchUDAF().getEvaluator(info); ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors); checkIntermediateResultInspector(resultInspector); final long seed = 1; UnionState state = (UnionState) eval.getNewAggregationBuffer(); eval.iterate(state, new Object[] { new Text("a"), new IntWritable(8), new FloatWritable(0.99f), new LongWritable(seed) }); eval.iterate(state, new Object[] { new Text("b"), new IntWritable(8), new FloatWritable(0.99f), new LongWritable(seed) }); Object result = eval.terminatePartial(state); Assert.assertNotNull(result);/* www.j a v a2s.co m*/ Assert.assertTrue(result instanceof List); List<?> r = (List<?>) result; Assert.assertEquals(r.size(), 3); Assert.assertEquals(((IntWritable) (r.get(0))).get(), 8); Assert.assertEquals(((LongWritable) (r.get(1))).get(), seed); Sketch resultSketch = Sketches.heapifySketch(new NativeMemory(((BytesWritable) (r.get(2))).getBytes()), seed); // because of sampling probability < 1 Assert.assertTrue(resultSketch.isEstimationMode()); Assert.assertEquals(resultSketch.getEstimate(), 2.0, 0.05); // check if seed is correct in the result Union union = SetOperation.builder().setSeed(seed).buildUnion(); // this must fail if the seed is incompatible union.update(resultSketch); eval.close(); }
From source file:com.yahoo.sketches.hive.theta.DataToSketchUDAFTest.java
License:Apache License
@Test public void completeModeDoubleValuesExplicitParameters() throws Exception { ObjectInspector[] inspectors = new ObjectInspector[] { doubleInspector, intConstantInspector, floatConstantInspector, longConstantInspector }; GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false); GenericUDAFEvaluator eval = new DataToSketchUDAF().getEvaluator(info); ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors); checkFinalResultInspector(resultInspector); final long seed = 2; UnionState state = (UnionState) eval.getNewAggregationBuffer(); eval.iterate(state, new Object[] { new DoubleWritable(1), new IntWritable(8), new FloatWritable(0.99f), new LongWritable(seed) }); eval.iterate(state, new Object[] { new DoubleWritable(2), new IntWritable(8), new FloatWritable(0.99f), new LongWritable(seed) }); Object result = eval.terminate(state); Assert.assertNotNull(result);/*from w w w .j ava 2s . com*/ Assert.assertTrue(result instanceof BytesWritable); Sketch resultSketch = Sketches.heapifySketch(new NativeMemory(((BytesWritable) result).getBytes()), seed); // because of sampling probability < 1 Assert.assertTrue(resultSketch.isEstimationMode()); Assert.assertEquals(resultSketch.getEstimate(), 2.0, 0.05); eval.close(); }
From source file:dinocode.SpeciesViewerMapperd.java
public void map(WritableComparable key, Writable value, OutputCollector output, Reporter reporter) throws IOException { // get the current page String data = ((Text) value).toString(); int index = data.indexOf(":"); if (index == -1) { return;/*from w w w .ja va 2s . c o m*/ } // split into title and PR (tab or variable number of blank spaces) String toParse = data.substring(0, index).trim(); String[] splits = toParse.split("\t"); if (splits.length == 0) { splits = toParse.split(" "); if (splits.length == 0) { return; } } String pagetitle = splits[0].trim(); String pagerank = splits[splits.length - 1].trim(); // parse score double currScore = 0.0; try { currScore = Double.parseDouble(pagerank); } catch (Exception e) { currScore = 0.0; } // collect //output.collect(new FloatWritable((float) - currScore), key); boolean allLetters = pagetitle.chars().allMatch(Character::isLetter); if (allLetters && pagetitle.length() > 2) { output.collect(new FloatWritable((float) -currScore), new Text(pagetitle)); } }
From source file:edu.brown.cs.mapreduce.generator.DataLoader.java
License:Open Source License
/** * @param args//from w w w .j a va 2 s . c o m */ public static void main(String[] args) { List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; i++) { if ("-compress".equals(args[i])) { DataLoader.compress = true; DataLoader.sequence = true; } else if ("-sequence".equals(args[i])) { DataLoader.sequence = true; } else if ("-tuple".equals(args[i])) { DataLoader.tuple = true; } else if ("-local".equals(args[i])) { DataLoader.local = true; } else if ("-limit".equals(args[i])) { DataLoader.limit = Integer.parseInt(args[++i]); } else if ("-xargs".equals(args[i])) { DataLoader.xargs = true; } else if ("-debug".equals(args[i])) { DataLoader.debug = true; } else { otherArgs.add(args[i]); } } // FOR if (otherArgs.size() < 3 && !DataLoader.xargs) { System.err.println("USAGE: DataLoader <input type> <input file> <output file>"); System.exit(1); } String input_type = otherArgs.get(0).toLowerCase(); String input_file = otherArgs.get(1); String output_file = null; if (DataLoader.xargs) { output_file = input_file + ".dl"; } else { output_file = otherArgs.get(2); } boolean valid = false; for (String type : DataLoader.VALID_TYPES) { if (type.equals(input_type)) { valid = true; break; } } if (!valid) { System.err.println("ERROR: Invalid input data type '" + input_type + "'"); System.exit(1); } if (debug) { System.out.println("Input Type: " + input_type); System.out.println("Input File: " + input_file); System.out.println("Output File: " + output_file); System.out.println("Limit: " + DataLoader.limit); System.out.println("Local: " + DataLoader.local); System.out.println("XArgs: " + DataLoader.xargs); } // // Get HDFS filesystem object that we can use for writing // FileSystem fs = null; Configuration conf = null; if (!DataLoader.local) { conf = AbstractHadoopClient.getConfiguration(); try { fs = FileSystem.get(conf); } catch (Exception ex) { ex.printStackTrace(); System.exit(-1); } if (debug) System.out.println("fs.default.name: " + conf.get("fs.default.name")); } // // Now open the file that we want to read and start writing the contents to our file system // For some things, like 'urls' we will want reverse the order so that the data makes sense // in our key->value paradigm // BufferedReader in = null; DataOutputStream out = null; SequenceFile.Writer writer = null; int lines = 0; try { if (input_file.equals("-")) { in = new BufferedReader(new InputStreamReader(System.in)); } else { in = new BufferedReader(new FileReader(input_file)); } } catch (FileNotFoundException ex) { System.err.println("ERROR: The input file '" + input_file + "' was not found : " + ex.getMessage()); System.exit(1); } try { if (!DataLoader.local) { // // FileSystem Writer // if (!DataLoader.sequence) { out = fs.create(new Path(output_file)); // // SequenceFile Writer // } else { if (input_type.equals("sortgrep")) DataLoader.tuple = false; if (DataLoader.debug) System.out.print("Creating " + (DataLoader.compress ? "compressed " : "") + "SequenceFile.Writer for '" + output_file + "': "); Class<? extends Writable> key_class = Text.class; Class<? extends Writable> value_class = null; if (DataLoader.tuple) { if (input_type.equals("uservisits")) value_class = UserVisitsTuple.class; if (input_type.equals("rankings")) value_class = RankingsTuple.class; } else { value_class = Text.class; } writer = SequenceFile.createWriter(fs, conf, new Path(output_file), key_class, value_class, (DataLoader.compress ? SequenceFile.CompressionType.BLOCK : SequenceFile.CompressionType.NONE)); if (DataLoader.debug) System.out.println("DONE!"); } // // Local Filesystem // } else { out = new DataOutputStream(new FileOutputStream(output_file, true)); } } catch (IOException ex) { System.err.println("ERROR: Failed to open output file '" + output_file + "' : " + ex.getMessage()); System.exit(1); } try { // // Now read in each line of the input file and append it to our output // while (in.ready()) { // // Ignore any misformated lines // String line = null; String key = ""; String value = ""; try { line = in.readLine(); String data[] = line.split("\\" + BenchmarkBase.VALUE_DELIMITER); // // Switch the two values in a rankings record // if (input_type.equals("rankings")) { key = data[1]; value = data[0]; for (int i = 2; i < data.length; i++) { value += BenchmarkBase.VALUE_DELIMITER + data[i]; } // FOR // // Change the comma to a tab // } else if (input_type.equals("convert") || input_type.equals("uservisits")) { key = data[0]; for (int i = 1; i < data.length; i++) { if (i != 1) value += BenchmarkBase.VALUE_DELIMITER; value += data[i]; } // FOR // // Don't do anything with the SortGrep data! // } else if (input_type.equals("sortgrep")) { key = line.substring(0, 10); value = line.substring(10); // // All others need to switch the first VALUE_DELIMITER to a KEYVALUE_DELIMITER // } else { line = line.replaceFirst(BenchmarkBase.VALUE_DELIMITER, BenchmarkBase.KEYVALUE_DELIMITER); } if (DataLoader.local || !DataLoader.sequence) { line = key + BenchmarkBase.KEYVALUE_DELIMITER + value + "\n"; out.write(line.getBytes()); } else { //if (DataLoader.debug) System.out.println("[" + lines + "] " + key + " => " + value); if (DataLoader.tuple) { try { data = value.split("\\" + BenchmarkBase.VALUE_DELIMITER); Writable tuple_values[] = new Writable[data.length]; Class<?> types[] = (input_type.equals("uservisits") ? BenchmarkBase.USERVISITS_TYPES : BenchmarkBase.RANKINGS_TYPES); for (int ctr = 0; ctr < data.length; ctr++) { // // Important! You have to subtract one from the types list // because the first one is really the key, but we're creating a tuple // on just the values!! // if (types[ctr + 1] == Text.class) { tuple_values[ctr] = new Text(data[ctr]); } else if (types[ctr + 1] == IntWritable.class) { tuple_values[ctr] = new IntWritable(Integer.valueOf(data[ctr])); } else if (types[ctr + 1] == DoubleWritable.class) { tuple_values[ctr] = new DoubleWritable(Double.valueOf(data[ctr])); } else if (types[ctr + 1] == LongWritable.class) { tuple_values[ctr] = new LongWritable(Long.valueOf(data[ctr])); } else if (types[ctr + 1] == FloatWritable.class) { tuple_values[ctr] = new FloatWritable(Float.valueOf(data[ctr])); } else { System.err.println("Unsupported Class: " + types[ctr + 1]); System.exit(1); } if (DataLoader.debug) System.out.println("tuple_values[" + ctr + "] = " + tuple_values[ctr]); } AbstractTuple tuple = (input_type.equals("uservisits") ? new UserVisitsTuple(tuple_values) : new RankingsTuple(tuple_values)); if (DataLoader.debug) System.out.println("STORING TUPLE: " + tuple + " (DATA " + data + " | VALUE " + value + ")"); writer.append(new Text(key), tuple); } catch (Exception ex) { ex.printStackTrace(); System.err.println("Error[" + output_file + "]"); System.err.println("## Line: " + lines); System.err.println("## Content: " + line); } } else { writer.append(new Text(key), new Text(value)); } } lines++; if (DataLoader.limit != null && lines >= DataLoader.limit) break; if (DataLoader.debug && lines % 1000000 == 0) System.out.println( "\tWrote " + lines + " '" + input_type + "' records to '" + output_file + "'"); } catch (Exception ex) { System.err.println("Error[" + output_file + "]"); System.err.println("## Line: " + lines); System.err.println("## Content: " + line); ex.printStackTrace(); System.exit(1); } } // WHILE } catch (Exception ex) { ex.printStackTrace(); System.exit(1); } finally { try { if (in != null) in.close(); if (out != null) out.close(); if (writer != null) writer.close(); } catch (Exception ex) { ex.printStackTrace(); System.exit(1); } } System.out.println("Wrote " + lines + " '" + input_type + "' records to '" + output_file + "'"); }
From source file:edu.ucsb.cs.hybrid.mappers.SingleS_Runner.java
License:Apache License
/** * Flush accumulator of the dynamically compared inverted index after one document is added to S * @param accumulator/*from ww w . j a v a 2s . c o m*/ * @param output: write the result of one-to-many comparisons into output stream * @throws IOException */ void flushAccumulator(float[] accumulator, ArrayList<Long> dynamicIdMap, OutputCollector<DocDocWritable, FloatWritable> output, float threshold) throws IOException { /* Exclude the document that is recently added */ int lastIndex = dynamicIdMap.size() - 1; long accID, thisId = dynamicIdMap.get(lastIndex); for (int i = 0; i < lastIndex; i++) { float sim = accumulator[i]; if ((sim > threshold) && ((accID = dynamicIdMap.get(i)) != thisId)) output.collect(new DocDocWritable(dynamicIdMap.get(i), thisId), new FloatWritable(sim)); } }
From source file:edu.ucsb.cs.lsh.statistics.LshStat.java
License:Apache License
public static void convertInput(String[] args) throws IOException { if (args.length != 3) printUsage(2);//from www.j ava2s . c o m String strLine, input = args[1], output_file = args[2]; Path outPath = new Path(output_file); Configuration conf = new Configuration(); FileSystem fs = outPath.getFileSystem(conf); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, outPath, DocDocWritable.class, FloatWritable.class, SequenceFile.CompressionType.NONE); if ((new File(input)).isDirectory()) { for (File inputFile : (new File(input)).listFiles()) { BufferedReader br = new BufferedReader( new InputStreamReader(new DataInputStream(new FileInputStream(inputFile)))); while ((strLine = br.readLine()) != null) { writer.append(new DocDocWritable(0, 3), new FloatWritable(1)); } } } else { } writer.close(); }
From source file:edu.umd.cloud9.examples.AnalyzeBigramRelativeFrequency.java
License:Apache License
/** * Reads in the bigram relative frequency count file * /*from www . ja v a 2s .c o m*/ * @param path * @return * @throws IOException */ private static List<PairOfWritables<PairOfStrings, FloatWritable>> readDirectory(Path path) throws IOException { File dir = new File(path.toString()); ArrayList<PairOfWritables<PairOfStrings, FloatWritable>> relativeFrequencies = new ArrayList<PairOfWritables<PairOfStrings, FloatWritable>>(); for (File child : dir.listFiles()) { if (".".equals(child.getName()) || "..".equals(child.getName())) { continue; // Ignore the self and parent aliases. } FileInputStream bigramFile = null; bigramFile = new FileInputStream(child.toString()); // Read in the file DataInputStream resultsStream = new DataInputStream(bigramFile); BufferedReader results = new BufferedReader(new InputStreamReader(resultsStream)); StringTokenizer rToken; String rLine; String firstWord; String secondWord; String frequency; // iterate through every line in the file while ((rLine = results.readLine()) != null) { rToken = new StringTokenizer(rLine); // extract the meaningful information firstWord = rToken.nextToken(); //remove leading ( and trailing , firstWord = firstWord.substring(1, firstWord.length() - 1); secondWord = rToken.nextToken(); //remove trailing ) secondWord = secondWord.substring(0, secondWord.length() - 1); frequency = rToken.nextToken(); relativeFrequencies.add(new PairOfWritables<PairOfStrings, FloatWritable>( new PairOfStrings(firstWord, secondWord), new FloatWritable(Float.parseFloat(frequency)))); } if (bigramFile != null) bigramFile.close(); } return relativeFrequencies; }
From source file:edu.umd.cloud9.io.array.ArrayListWritableComparableTest.java
License:Apache License
@Test public void testSerialize2() throws IOException { ArrayListWritableComparable<FloatWritable> list = new ArrayListWritableComparable<FloatWritable>(); list.add(new FloatWritable(0.3f)); list.add(new FloatWritable(3244.2f)); ByteArrayOutputStream bytesOut = new ByteArrayOutputStream(); DataOutputStream dataOut = new DataOutputStream(bytesOut); list.write(dataOut);/* w w w . ja v a 2 s . c om*/ ArrayListWritableComparable<FloatWritable> newList = new ArrayListWritableComparable<FloatWritable>(); newList.readFields(new DataInputStream(new ByteArrayInputStream(bytesOut.toByteArray()))); assertTrue(newList.get(0).get() == 0.3f); assertTrue(newList.get(1).get() == 3244.2f); }
From source file:edu.umd.cloud9.io.array.ArrayListWritableTest.java
License:Apache License
@Test public void testSerialize2() throws IOException { ArrayListWritable<FloatWritable> list = new ArrayListWritable<FloatWritable>(); list.add(new FloatWritable(0.3f)); list.add(new FloatWritable(3244.2f)); ByteArrayOutputStream bytesOut = new ByteArrayOutputStream(); DataOutputStream dataOut = new DataOutputStream(bytesOut); list.write(dataOut);/* w w w. j a va 2 s . c om*/ ArrayListWritable<FloatWritable> newList = new ArrayListWritable<FloatWritable>(); newList.readFields(new DataInputStream(new ByteArrayInputStream(bytesOut.toByteArray()))); assertTrue(newList.get(0).get() == 0.3f); assertTrue(newList.get(1).get() == 3244.2f); }