List of usage examples for org.apache.hadoop.io LongWritable get
public long get()
From source file:org.apache.kylin.storage.hbase.steps.RangeKeyDistributionReducer.java
License:Apache License
@Override public void doReduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { for (LongWritable v : values) { bytesRead += v.get(); }//w w w . ja v a 2 s . c o m if (bytesRead >= ONE_GIGA_BYTES) { gbPoints.add(new Text(key)); bytesRead = 0; // reset bytesRead } }
From source file:org.apache.mahout.cf.taste.hadoop.RecommenderMapper.java
License:Apache License
@Override protected void map(LongWritable key, LongWritable value, Context context) throws IOException, InterruptedException { long userID = value.get(); List<RecommendedItem> recommendedItems; try {/*from w w w . j av a2s . co m*/ recommendedItems = recommender.recommend(userID, recommendationsPerUser); } catch (TasteException te) { throw new RuntimeException(te); } RecommendedItemsWritable writable = new RecommendedItemsWritable(recommendedItems); context.write(value, writable); context.getCounter(ReducerMetrics.USERS_PROCESSED).increment(1L); context.getCounter(ReducerMetrics.RECOMMENDATIONS_MADE).increment(recommendedItems.size()); }
From source file:org.apache.mahout.clustering.spectral.AffinityMatrixInputMapper.java
License:Apache License
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] elements = COMMA_PATTERN.split(value.toString()); log.debug("(DEBUG - MAP) Key[{}], Value[{}]", key.get(), value); // enforce well-formed textual representation of the graph if (elements.length != 3) { throw new IOException( "Expected input of length 3, received " + elements.length + ". Please make sure you adhere to " + "the structure of (i,j,value) for representing a graph in text. " + "Input line was: '" + value + "'."); }/*from w w w . jav a2s . c o m*/ if (elements[0].isEmpty() || elements[1].isEmpty() || elements[2].isEmpty()) { throw new IOException("Found an element of 0 length. Please be sure you adhere to the structure of " + "(i,j,value) for representing a graph in text."); } // parse the line of text into a DistributedRowMatrix entry, // making the row (elements[0]) the key to the Reducer, and // setting the column (elements[1]) in the entry itself DistributedRowMatrix.MatrixEntryWritable toAdd = new DistributedRowMatrix.MatrixEntryWritable(); IntWritable row = new IntWritable(Integer.valueOf(elements[0])); toAdd.setRow(-1); // already set as the Reducer's key toAdd.setCol(Integer.valueOf(elements[1])); toAdd.setVal(Double.valueOf(elements[2])); context.write(row, toAdd); }
From source file:org.apache.mahout.df.mapred.partial.Step0JobTest.java
License:Apache License
public void testStep0Mapper() throws Exception { Random rng = RandomUtils.getRandom(); // create a dataset large enough to be split up String descriptor = Utils.randomDescriptor(rng, numAttributes); double[][] source = Utils.randomDoubles(rng, descriptor, numInstances); String[] sData = Utils.double2String(source); // write the data to a file Path dataPath = Utils.writeDataToTestFile(sData); JobConf job = new JobConf(); job.setNumMapTasks(numMaps);//from w ww . j av a 2 s .co m FileInputFormat.setInputPaths(job, dataPath); // retrieve the splits TextInputFormat input = (TextInputFormat) job.getInputFormat(); InputSplit[] splits = input.getSplits(job, numMaps); InputSplit[] sorted = Arrays.copyOf(splits, splits.length); Builder.sortSplits(sorted); Step0OutputCollector collector = new Step0OutputCollector(numMaps); Reporter reporter = Reporter.NULL; for (int p = 0; p < numMaps; p++) { InputSplit split = sorted[p]; RecordReader<LongWritable, Text> reader = input.getRecordReader(split, job, reporter); LongWritable key = reader.createKey(); Text value = reader.createValue(); Step0Mapper mapper = new Step0Mapper(); mapper.configure(p); Long firstKey = null; int size = 0; while (reader.next(key, value)) { if (firstKey == null) { firstKey = key.get(); } mapper.map(key, value, collector, reporter); size++; } mapper.close(); // validate the mapper's output assertEquals(p, collector.keys[p]); assertEquals(firstKey.longValue(), collector.values[p].getFirstId()); assertEquals(size, collector.values[p].getSize()); } }
From source file:org.apache.mahout.df.mapred.partial.Step0JobTest.java
License:Apache License
public void testProcessOutput() throws Exception { Random rng = RandomUtils.getRandom(); // create a dataset large enough to be split up String descriptor = Utils.randomDescriptor(rng, numAttributes); double[][] source = Utils.randomDoubles(rng, descriptor, numInstances); // each instance label is its index in the dataset int labelId = Utils.findLabel(descriptor); for (int index = 0; index < numInstances; index++) { source[index][labelId] = index;//w ww . j a v a 2 s . c o m } String[] sData = Utils.double2String(source); // write the data to a file Path dataPath = Utils.writeDataToTestFile(sData); // prepare a data converter Dataset dataset = DataLoader.generateDataset(descriptor, sData); DataConverter converter = new DataConverter(dataset); JobConf job = new JobConf(); job.setNumMapTasks(numMaps); FileInputFormat.setInputPaths(job, dataPath); // retrieve the splits TextInputFormat input = (TextInputFormat) job.getInputFormat(); InputSplit[] splits = input.getSplits(job, numMaps); InputSplit[] sorted = Arrays.copyOf(splits, splits.length); Builder.sortSplits(sorted); Reporter reporter = Reporter.NULL; int[] keys = new int[numMaps]; Step0Output[] values = new Step0Output[numMaps]; int[] expectedIds = new int[numMaps]; for (int p = 0; p < numMaps; p++) { InputSplit split = sorted[p]; RecordReader<LongWritable, Text> reader = input.getRecordReader(split, job, reporter); LongWritable key = reader.createKey(); Text value = reader.createValue(); Long firstKey = null; int size = 0; while (reader.next(key, value)) { if (firstKey == null) { firstKey = key.get(); expectedIds[p] = converter.convert(0, value.toString()).label; } size++; } keys[p] = p; values[p] = new Step0Output(firstKey, size); } Step0Output[] partitions = Step0Job.processOutput(keys, values); int[] actualIds = Step0Output.extractFirstIds(partitions); assertTrue("Expected: " + Arrays.toString(expectedIds) + " But was: " + Arrays.toString(actualIds), Arrays.equals(expectedIds, actualIds)); }
From source file:org.apache.mahout.df.mapred.partial.Step1Mapper.java
License:Apache License
@Override public void map(LongWritable key, Text value, OutputCollector<TreeID, MapredOutput> output, Reporter reporter) throws IOException { if (this.output == null) { this.output = output; }/*from ww w .j a va 2s .c o m*/ instances.add(converter.convert((int) key.get(), value.toString())); }
From source file:org.apache.mahout.df.mapreduce.partial.Step0JobTest.java
License:Apache License
public void testStep0Mapper() throws Exception { Random rng = RandomUtils.getRandom(); // create a dataset large enough to be split up String descriptor = Utils.randomDescriptor(rng, NUM_ATTRIBUTES); double[][] source = Utils.randomDoubles(rng, descriptor, NUM_INSTANCES); String[] sData = Utils.double2String(source); // write the data to a file Path dataPath = Utils.writeDataToTestFile(sData); Job job = new Job(); job.setInputFormatClass(TextInputFormat.class); FileInputFormat.setInputPaths(job, dataPath); setMaxSplitSize(job.getConfiguration(), dataPath, NUM_MAPS); // retrieve the splits TextInputFormat input = new TextInputFormat(); List<InputSplit> splits = input.getSplits(job); assertEquals(NUM_MAPS, splits.size()); InputSplit[] sorted = new InputSplit[NUM_MAPS]; splits.toArray(sorted);/*from ww w. j a v a 2 s. c o m*/ Builder.sortSplits(sorted); Step0Context context = new Step0Context(new Step0Mapper(), job.getConfiguration(), new TaskAttemptID(), NUM_MAPS); for (int p = 0; p < NUM_MAPS; p++) { InputSplit split = sorted[p]; RecordReader<LongWritable, Text> reader = input.createRecordReader(split, context); reader.initialize(split, context); Step0Mapper mapper = new Step0Mapper(); mapper.configure(p); Long firstKey = null; int size = 0; while (reader.nextKeyValue()) { LongWritable key = reader.getCurrentKey(); if (firstKey == null) { firstKey = key.get(); } mapper.map(key, reader.getCurrentValue(), context); size++; } mapper.cleanup(context); // validate the mapper's output assertEquals(p, context.keys[p]); assertEquals(firstKey.longValue(), context.values[p].getFirstId()); assertEquals(size, context.values[p].getSize()); } }
From source file:org.apache.mahout.df.mapreduce.partial.Step0JobTest.java
License:Apache License
public void testProcessOutput() throws Exception { Random rng = RandomUtils.getRandom(); // create a dataset large enough to be split up String descriptor = Utils.randomDescriptor(rng, NUM_ATTRIBUTES); double[][] source = Utils.randomDoubles(rng, descriptor, NUM_INSTANCES); // each instance label is its index in the dataset int labelId = Utils.findLabel(descriptor); for (int index = 0; index < NUM_INSTANCES; index++) { source[index][labelId] = index;//from w w w.j a v a 2 s . c o m } String[] sData = Utils.double2String(source); // write the data to a file Path dataPath = Utils.writeDataToTestFile(sData); // prepare a data converter Dataset dataset = DataLoader.generateDataset(descriptor, sData); DataConverter converter = new DataConverter(dataset); Job job = new Job(); job.setInputFormatClass(TextInputFormat.class); FileInputFormat.setInputPaths(job, dataPath); setMaxSplitSize(job.getConfiguration(), dataPath, NUM_MAPS); // retrieve the splits TextInputFormat input = new TextInputFormat(); List<InputSplit> splits = input.getSplits(job); assertEquals(NUM_MAPS, splits.size()); InputSplit[] sorted = new InputSplit[NUM_MAPS]; splits.toArray(sorted); Builder.sortSplits(sorted); List<Integer> keys = new ArrayList<Integer>(); List<Step0Output> values = new ArrayList<Step0Output>(); int[] expectedIds = new int[NUM_MAPS]; TaskAttemptContext context = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()); for (int p = 0; p < NUM_MAPS; p++) { InputSplit split = sorted[p]; RecordReader<LongWritable, Text> reader = input.createRecordReader(split, context); reader.initialize(split, context); Long firstKey = null; int size = 0; while (reader.nextKeyValue()) { LongWritable key = reader.getCurrentKey(); Text value = reader.getCurrentValue(); if (firstKey == null) { firstKey = key.get(); expectedIds[p] = converter.convert(0, value.toString()).getLabel(); } size++; } keys.add(p); values.add(new Step0Output(firstKey, size)); } Step0Output[] partitions = Step0Job.processOutput(keys, values); int[] actualIds = Step0Output.extractFirstIds(partitions); assertTrue("Expected: " + Arrays.toString(expectedIds) + " But was: " + Arrays.toString(actualIds), Arrays.equals(expectedIds, actualIds)); }
From source file:org.apache.mahout.df.mapreduce.partial.Step1Mapper.java
License:Apache License
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { instances.add(converter.convert((int) key.get(), value.toString())); }
From source file:org.apache.mahout.feature.common.csv.CsvToVectorMapper.java
License:Apache License
public void map(LongWritable key, Text line, Context context) throws IOException, InterruptedException { // TODO: not always this problem // first line (column name), don't care if (key.get() == 0) { return;// w w w .j av a 2 s . c o m } Vector input = new RandomAccessSparseVector(columnNumber); List<String> values = Lists.newArrayList(line.toString().split(",")); int k = 0; double v = 0.0; for (String value : values) { try { v = Double.parseDouble(value); } catch (NumberFormatException e) { throw new IOException("CSV file contains non-numeric data"); } input.setQuick(k, v); k++; } // Text type as key is required since "rowid" job takes as argument // SequenceFile<Text,VectorWritable> context.write(new Text("" + key.get()), new VectorWritable(input)); }