List of usage examples for org.apache.hadoop.io Text Text
public Text(byte[] utf8)
From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCFTrainHybridBSP.java
License:Apache License
@Override public void bsp(BSPPeer<LongWritable, PipesVectorWritable, Text, PipesVectorWritable, ItemMessage> peer) throws IOException, SyncException, InterruptedException { long startTime = System.currentTimeMillis(); // Fetch inputs collectInput(peer);/*from www .j a va 2s. c om*/ // Sync tasks after input has been collected peer.sync(); // DEBUG if (m_isDebuggingEnabled) { m_logger.writeChars("collected: " + this.m_usersMatrix.size() + " users, " + this.m_itemsMatrix.size() + " items, " + this.m_preferences.size() + " preferences\n"); m_logger.writeChars("preferences: length: " + this.m_preferences.size() + "\n"); for (Preference<Long, Long> p : this.m_preferences) { m_logger.writeChars("userId: '" + p.getUserId() + "' itemId: '" + p.getItemId() + "' value: '" + p.getValue().get() + "'\n"); } m_logger.writeChars("indexes: length: " + this.m_indexes.size() + " indexes: " + Arrays.toString(this.m_indexes.toArray()) + "\n"); m_logger.writeChars("usersMatrix: length: " + this.m_usersMatrix.size() + "\n"); for (Map.Entry<Long, PipesVectorWritable> e : this.m_usersMatrix.entrySet()) { m_logger.writeChars("key: '" + e.getKey() + "' value: '" + e.getValue().toString() + "'\n"); } m_logger.writeChars("itemsMatrix: length: " + this.m_itemsMatrix.size() + "\n"); for (Map.Entry<Long, PipesVectorWritable> e : this.m_itemsMatrix.entrySet()) { m_logger.writeChars("key: '" + e.getKey() + "' value: '" + e.getValue().toString() + "'\n"); } } // calculation steps for (int i = 0; i < m_maxIterations; i++) { computeAllValues(); if ((i + 1) % m_skipCount == 0) { normalizeWithBroadcastingValues(peer); } } // save users if (m_isDebuggingEnabled) { m_logger.writeChars("saving " + m_usersMatrix.size() + " users\n"); } for (Map.Entry<Long, PipesVectorWritable> user : m_usersMatrix.entrySet()) { if (m_isDebuggingEnabled) { m_logger.writeChars("user: " + user.getKey() + " vector: " + user.getValue().getVector() + "\n"); } peer.write(new Text("u" + user.getKey()), user.getValue()); } // save items // TODO duplicated item saves, but one item may belong to one task only if (m_isDebuggingEnabled) { m_logger.writeChars("saving " + m_itemsMatrix.size() + " items\n"); } for (Map.Entry<Long, PipesVectorWritable> item : m_itemsMatrix.entrySet()) { if (m_isDebuggingEnabled) { m_logger.writeChars("item: " + item.getKey() + " vector: " + item.getValue().getVector() + "\n"); } peer.write(new Text("i" + item.getKey()), item.getValue()); } this.m_bspTimeCpu = System.currentTimeMillis() - startTime; // Logging if (m_isDebuggingEnabled) { m_logger.writeChars("OnlineCFTrainHybridBSP,setupTimeCpu=" + this.m_setupTimeCpu + " ms\n"); m_logger.writeChars( "OnlineCFTrainHybridBSP,setupTimeCpu=" + (this.m_setupTimeCpu / 1000.0) + " seconds\n"); m_logger.writeChars("OnlineCFTrainHybridBSP,bspTimeCpu=" + this.m_bspTimeCpu + " ms\n"); m_logger.writeChars("OnlineCFTrainHybridBSP,bspTimeCpu=" + (this.m_bspTimeCpu / 1000.0) + " seconds\n"); m_logger.close(); } LOG.info("OnlineCFTrainHybridBSP,setupTimeCpu=" + this.m_setupTimeCpu + " ms"); LOG.info("OnlineCFTrainHybridBSP,setupTimeCpu=" + (this.m_setupTimeCpu / 1000.0) + " seconds"); LOG.info("OnlineCFTrainHybridBSP,bspTimeCpu=" + this.m_bspTimeCpu + " ms"); LOG.info("OnlineCFTrainHybridBSP,bspTimeCpu=" + (this.m_bspTimeCpu / 1000.0) + " seconds"); }
From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCFTrainHybridBSP.java
License:Apache License
@Override public void bspGpu(BSPPeer<LongWritable, PipesVectorWritable, Text, PipesVectorWritable, ItemMessage> peer, Rootbeer rootbeer) throws IOException, SyncException, InterruptedException { long startTime = System.currentTimeMillis(); // ********************************************************************** // Collect inputs // ********************************************************************** Map<Long, HashMap<Long, Double>> preferencesMap = new HashMap<Long, HashMap<Long, Double>>(); Map<Long, Long> userRatingCount = new HashMap<Long, Long>(); Map<Long, Long> itemRatingCount = new HashMap<Long, Long>(); LongWritable key = new LongWritable(); PipesVectorWritable value = new PipesVectorWritable(); int counter = 0; while (peer.readNext(key, value)) { // parse as <k:userId, v:(itemId, score)> long userId = key.get(); long itemId = (long) value.getVector().get(0); double score = value.getVector().get(1); // Add User vector if (m_usersMatrix.containsKey(userId) == false) { DenseDoubleVector vals = new DenseDoubleVector(m_matrixRank); for (int i = 0; i < m_matrixRank; i++) { vals.set(i, m_rand.nextDouble()); }//from www . ja v a 2 s .co m m_usersMatrix.put(userId, new PipesVectorWritable(vals)); userRatingCount.put(userId, 1l); } else { userRatingCount.put(userId, userRatingCount.get(userId) + 1); } // Add Item vector if (m_itemsMatrix.containsKey(itemId) == false) { DenseDoubleVector vals = new DenseDoubleVector(m_matrixRank); for (int i = 0; i < m_matrixRank; i++) { vals.set(i, m_rand.nextDouble()); } m_itemsMatrix.put(itemId, new PipesVectorWritable(vals)); itemRatingCount.put(itemId, 1l); } else { itemRatingCount.put(itemId, itemRatingCount.get(itemId) + 1); } // Add preference m_preferences.add(new Preference<Long, Long>(userId, itemId, score)); if (preferencesMap.containsKey(userId) == false) { HashMap<Long, Double> map = new HashMap<Long, Double>(); map.put(itemId, score); preferencesMap.put(userId, map); } else { preferencesMap.get(userId).put(itemId, score); } // Add counter m_indexes.add(counter); counter++; } // DEBUG if (m_isDebuggingEnabled) { m_logger.writeChars("collected: " + m_usersMatrix.size() + " users, " + m_itemsMatrix.size() + " items, " + m_preferences.size() + " preferences\n"); } // ********************************************************************** // Prepare input for GPU // ********************************************************************** Map<Long, Long> sortedUserRatingCount = sortByValues(userRatingCount); Map<Long, Long> sortedItemRatingCount = sortByValues(itemRatingCount); // Convert preferences to userItemMatrix double[][] // sortedUserRatingCount.size() x sortedItemRatingCount.size() double[][] userItemMatrix = new double[m_usersMatrix.size()][m_itemsMatrix.size()]; // Mappers Map<Long, Integer> userItemMatrixUserRowMap = new HashMap<Long, Integer>(); GpuIntegerMap userItemMatrixItemColMap = new GpuIntegerMap(m_itemsMatrix.size() + 1); // +1 because we are starting with 0 GpuIntegerMap userItemMatrixColItemMap = new GpuIntegerMap(m_itemsMatrix.size() + 1); // +1 because we are starting with 0 // Create userHelper to int[][] // userHelper[userId][0] = userRatingCount // userHelper[userId][1] = colId of userItemMatrix int[][] userHelper = null; // Create itemHelper to int[][] // itemHelper[itemId][0] = itemRatingCount // itemHelper[userId][1] = rowId of userItemMatrix int[][] itemHelper = null; Map<Long, Integer> itemHelperId = new HashMap<Long, Integer>(); // Debug if (m_isDebuggingEnabled) { m_logger.writeChars( "userItemMatrix: (m x n): " + m_usersMatrix.size() + " x " + m_itemsMatrix.size() + "\n"); } int rowId = 0; for (Long userId : sortedUserRatingCount.keySet()) { // Map userId to rowId in userItemMatrixUserRowMap userItemMatrixUserRowMap.put(userId, rowId); // Setup userHelper if (userHelper == null) { // TODO sortedUserRatingCount.size() userHelper = new int[m_usersMatrix.size()][sortedUserRatingCount.get(userId).intValue() + 1]; } userHelper[rowId][0] = sortedUserRatingCount.get(userId).intValue(); int colId = 0; int userHelperId = 1; for (Long itemId : sortedItemRatingCount.keySet()) { // Map itemId to colId in userItemMatrixItemColMap if (rowId == 0) { userItemMatrixItemColMap.put(itemId.intValue(), colId); userItemMatrixColItemMap.put(colId, itemId.intValue()); } // Setup itemHelper if (itemHelper == null) { // TODO sortedItemRatingCount.size() itemHelper = new int[m_itemsMatrix.size()][sortedItemRatingCount.get(itemId).intValue() + 1]; } itemHelper[colId][0] = sortedItemRatingCount.get(itemId).intValue(); if (preferencesMap.get(userId).containsKey(itemId)) { // Add userItemMatrix userItemMatrix[rowId][colId] = preferencesMap.get(userId).get(itemId); // Add userHelper userHelper[rowId][userHelperId] = colId; userHelperId++; // Add itemHelper if (itemHelperId.containsKey(itemId)) { int idx = itemHelperId.get(itemId); itemHelper[colId][idx] = rowId; itemHelperId.put(itemId, idx + 1); } else { itemHelper[colId][1] = rowId; itemHelperId.put(itemId, 2); } } colId++; } // Debug userItemMatrix if (m_isDebuggingEnabled) { m_logger.writeChars("userItemMatrix userId: " + userId + " row[" + rowId + "]: " + Arrays.toString(userItemMatrix[rowId]) + " userRatings: " + sortedUserRatingCount.get(userId) + "\n"); } rowId++; } // Debug userHelper and itemHelper if (m_isDebuggingEnabled) { // TODO sortedUserRatingCount.size() for (int i = 0; i < m_usersMatrix.size(); i++) { m_logger.writeChars("userHelper row " + i + ": " + Arrays.toString(userHelper[i]) + "\n"); } // TODO sortedItemRatingCount.size() for (int i = 0; i < m_itemsMatrix.size(); i++) { m_logger.writeChars("itemHelper row " + i + ": " + Arrays.toString(itemHelper[i]) + "\n"); } } // Convert usersMatrix to double[][] double[][] userMatrix = new double[m_usersMatrix.size()][m_matrixRank]; rowId = 0; if (m_isDebuggingEnabled) { m_logger.writeChars("userMatrix: length: " + m_usersMatrix.size() + "\n"); } for (Long userId : sortedUserRatingCount.keySet()) { DoubleVector vector = m_usersMatrix.get(userId).getVector(); for (int i = 0; i < m_matrixRank; i++) { userMatrix[rowId][i] = vector.get(i); } if (m_isDebuggingEnabled) { m_logger.writeChars("userId: " + userId + " " + Arrays.toString(vector.toArray()) + "\n"); } rowId++; } // Convert itemsMatrix to double[][] double[][] itemMatrix = new double[m_itemsMatrix.size()][m_matrixRank]; rowId = 0; GpuIntegerMap counterMap = new GpuIntegerMap(m_itemsMatrix.size()); if (m_isDebuggingEnabled) { m_logger.writeChars("itemMatrix: length: " + m_itemsMatrix.size() + "\n"); } for (Long itemId : sortedItemRatingCount.keySet()) { counterMap.put(itemId.intValue(), 0); DoubleVector vector = m_itemsMatrix.get(itemId).getVector(); for (int i = 0; i < m_matrixRank; i++) { itemMatrix[rowId][i] = vector.get(i); } if (m_isDebuggingEnabled) { m_logger.writeChars("itemId: " + itemId + " " + Arrays.toString(vector.toArray()) + "\n"); } rowId++; } // Sync tasks after input has been collected peer.sync(); // ********************************************************************** // Run GPU Kernels // ********************************************************************** OnlineCFTrainHybridKernel kernel = new OnlineCFTrainHybridKernel(userItemMatrix, userHelper, itemHelper, userItemMatrixItemColMap, userItemMatrixColItemMap, userMatrix, itemMatrix, m_usersMatrix.size(), m_itemsMatrix.size(), ALPHA, m_matrixRank, m_maxIterations, counterMap, m_skipCount, peer.getNumPeers(), peer.getPeerIndex(), peer.getAllPeerNames()); Context context = rootbeer.createDefaultContext(); Stopwatch watch = new Stopwatch(); watch.start(); rootbeer.run(kernel, new ThreadConfig(m_blockSize, m_gridSize, m_blockSize * m_gridSize), context); watch.stop(); // ********************************************************************** // Save Model // ********************************************************************** // save users for (Entry<Long, Integer> userMap : userItemMatrixUserRowMap.entrySet()) { if (m_isDebuggingEnabled) { m_logger.writeChars("user: " + userMap.getKey() + " vector: " + Arrays.toString(kernel.m_usersMatrix[userMap.getValue()]) + "\n"); } peer.write(new Text("u" + userMap.getKey()), new PipesVectorWritable(new DenseDoubleVector(kernel.m_usersMatrix[userMap.getValue()]))); } // TODO duplicated item saves, but one item may belong to one task only // save items for (GpuIntIntPair itemMap : userItemMatrixItemColMap.getList()) { if (itemMap != null) { if (m_isDebuggingEnabled) { m_logger.writeChars("item: " + itemMap.getKey() + " vector: " + Arrays.toString(kernel.m_itemsMatrix[itemMap.getValue()]) + "\n"); } peer.write(new Text("i" + itemMap.getKey()), new PipesVectorWritable(new DenseDoubleVector(kernel.m_itemsMatrix[itemMap.getValue()]))); } } this.m_bspTimeGpu = System.currentTimeMillis() - startTime; // ********************************************************************** // Logging // ********************************************************************** if (m_isDebuggingEnabled) { m_logger.writeChars("OnlineCFTrainHybridBSP.bspGpu executed on GPU!\n"); m_logger.writeChars( "OnlineCFTrainHybridBSP.bspGpu blockSize: " + m_blockSize + " gridSize: " + m_gridSize + "\n"); m_logger.writeChars("OnlineCFTrainHybridBSP,setupTimeGpu=" + this.m_setupTimeGpu + " ms\n"); m_logger.writeChars( "OnlineCFTrainHybridBSP,setupTimeGpu=" + (this.m_setupTimeGpu / 1000.0) + " seconds\n"); m_logger.writeChars("OnlineCFTrainHybridBSP,bspTimeGpu=" + this.m_bspTimeGpu + " ms\n"); m_logger.writeChars("OnlineCFTrainHybridBSP,bspTimeGpu=" + (this.m_bspTimeGpu / 1000.0) + " seconds\n"); List<StatsRow> stats = context.getStats(); for (StatsRow row : stats) { m_logger.writeChars(" StatsRow:\n"); m_logger.writeChars(" serial time: " + row.getSerializationTime() + "\n"); m_logger.writeChars(" exec time: " + row.getExecutionTime() + "\n"); m_logger.writeChars(" deserial time: " + row.getDeserializationTime() + "\n"); m_logger.writeChars(" num blocks: " + row.getNumBlocks() + "\n"); m_logger.writeChars(" num threads: " + row.getNumThreads() + "\n"); m_logger.writeChars("GPUTime: " + watch.elapsedTimeMillis() + " ms" + "\n"); } m_logger.close(); } // Logging List<StatsRow> stats = context.getStats(); for (StatsRow row : stats) { LOG.info(" StatsRow:"); LOG.info(" serial time: " + row.getSerializationTime()); LOG.info(" exec time: " + row.getExecutionTime()); LOG.info(" deserial time: " + row.getDeserializationTime()); LOG.info(" num blocks: " + row.getNumBlocks()); LOG.info(" num threads: " + row.getNumThreads()); LOG.info("GPUTime: " + watch.elapsedTimeMillis() + " ms"); } LOG.info("OnlineCFTrainHybridBSP.bspGpu executed on GPU!"); LOG.info("OnlineCFTrainHybridBSP.bspGpu blockSize: " + m_blockSize + " gridSize: " + m_gridSize); LOG.info("OnlineCFTrainHybridBSP,setupTimeGpu=" + this.m_setupTimeGpu + " ms"); LOG.info("OnlineCFTrainHybridBSP,setupTimeGpu=" + (this.m_setupTimeGpu / 1000.0) + " seconds"); LOG.info("OnlineCFTrainHybridBSP,bspTimeGpu=" + this.m_bspTimeGpu + " ms"); LOG.info("OnlineCFTrainHybridBSP,bspTimeGpu=" + (this.m_bspTimeGpu / 1000.0) + " seconds"); }
From source file:at.illecker.hama.hybrid.examples.piestimator.PiEstimatorHybridBSP.java
License:Apache License
@Override public void cleanup(BSPPeer<NullWritable, NullWritable, Text, DoubleWritable, LongWritable> peer) throws IOException { long startTime = 0; if (m_timeMeasurement) { startTime = System.currentTimeMillis(); }/*from w ww . j av a2 s . c o m*/ // MasterTask writes out results if (peer.getPeerName().equals(m_masterTask)) { long totalHits = 0; LongWritable received; while ((received = peer.getCurrentMessage()) != null) { totalHits += received.get(); } double pi = 4.0 * totalHits / m_iterations; // DEBUG if (m_isDebuggingEnabled) { m_logger.writeChars("PiEstimatorHybrid,Iterations=" + m_iterations + "\n"); m_logger.writeChars("PiEstimatorHybrid,numMessages: " + peer.getNumCurrentMessages() + "\n"); m_logger.writeChars("PiEstimatorHybrid,totalHits: " + totalHits + "\n"); } peer.write(new Text("Estimated value of PI(3,14159265) using " + m_iterations + " iterations is"), new DoubleWritable(pi)); } long stopTime = 0; if (m_timeMeasurement) { stopTime = System.currentTimeMillis(); LOG.info("# cleanupTime: " + ((stopTime - startTime) / 1000.0) + " sec"); } if (m_isDebuggingEnabled) { m_logger.writeChars("PiEstimatorHybrid,cleanupTime: " + ((stopTime - startTime) / 1000.0) + " sec\n"); m_logger.close(); } }
From source file:at.illecker.hama.rootbeer.examples.hellorootbeer.HelloRootbeerGpuBSP.java
License:Apache License
@Override public void cleanup(BSPPeer<NullWritable, NullWritable, Text, DoubleWritable, DoubleWritable> peer) throws IOException { if (peer.getPeerName().equals(m_masterTask)) { double sum = 0.0; DoubleWritable received;/* www . ja v a2 s .com*/ while ((received = peer.getCurrentMessage()) != null) { sum += received.get(); } double expectedResult = peer.getNumPeers() * m_kernelCount * m_iterations; Assert.assertEquals(expectedResult, sum); peer.write(new Text( "Result of " + (peer.getNumPeers() * m_kernelCount * m_iterations) + " calculations is"), new DoubleWritable(sum)); } }
From source file:at.illecker.hama.rootbeer.examples.piestimator.cpu.PiEstimatorCpuBSP.java
License:Apache License
@Override public void cleanup(BSPPeer<NullWritable, NullWritable, Text, DoubleWritable, LongWritable> peer) throws IOException { // MasterTask writes out results if (peer.getPeerName().equals(m_masterTask)) { int numMessages = peer.getNumCurrentMessages(); long totalHits = 0; LongWritable received;/*ww w. j a v a 2 s .c o m*/ while ((received = peer.getCurrentMessage()) != null) { totalHits += received.get(); } double pi = 4.0 * totalHits / (m_calculationsPerBspTask * numMessages); // DEBUG if (m_isDebuggingEnabled) { // Write log to dfs BSPJob job = new BSPJob((HamaConfiguration) peer.getConfiguration()); FileSystem fs = FileSystem.get(peer.getConfiguration()); FSDataOutputStream outStream = fs .create(new Path(FileOutputFormat.getOutputPath(job), peer.getTaskId() + ".log")); outStream.writeChars("BSP=PiEstimatorCpuBSP,Iterations=" + m_iterations + "\n"); outStream.writeChars("totalHits: " + totalHits + "\n"); outStream.writeChars("numMessages: " + numMessages + "\n"); outStream.writeChars("calculationsPerBspTask: " + m_calculationsPerBspTask + "\n"); outStream.writeChars("calculationsTotal: " + (m_calculationsPerBspTask * numMessages) + "\n"); outStream.close(); } peer.write( new Text("Estimated value of PI(3,14159265) using " + (m_calculationsPerBspTask * numMessages) // + (peer.getNumPeers() * m_threadCount * m_iterations) + " points is"), new DoubleWritable(pi)); } }
From source file:at.illecker.hama.rootbeer.examples.piestimator.gpu.PiEstimatorGpuBSP.java
License:Apache License
@Override public void cleanup(BSPPeer<NullWritable, NullWritable, Text, DoubleWritable, LongWritable> peer) throws IOException { if (peer.getPeerName().equals(m_masterTask)) { long totalHits = 0; LongWritable received;/*from w ww . j a va 2 s .co m*/ while ((received = peer.getCurrentMessage()) != null) { totalHits += received.get(); } double pi = 4.0 * totalHits / (m_calculationsPerThread * m_blockSize * m_gridSize); peer.write( new Text("Estimated value of PI(3,14159265) using " + (m_calculationsPerThread * m_blockSize * m_gridSize) + " points is"), new DoubleWritable(pi)); } }
From source file:authordetect.input.SingleBookReader.java
private void processBookContent() throws IOException { currentPos += lineReader.readLine(currentLine); String currentLineStr = currentLine.toString().toLowerCase(); //Processing book content line by line. And update the word map while (!isFinish) { String[] words = currentLineStr.split(" "); //write all words into the word map for (String word : words) { word = word.trim().replaceAll("[^a-zA-Z0-9]", "").toLowerCase(); if (!word.equals("")) { wordCountMap.put(word, 1); }//from ww w . j ava 2 s.c om } //detect book end if (currentLineStr.contains("end") && currentLineStr.contains("gutenberg")) { isFinish = true; //update counter which stores the book count Counter counter = context.getCounter(BookCounter.BOOK_COUNT); counter.increment(1); } currentPos += lineReader.readLine(currentLine); currentLineStr = currentLine.toString().toLowerCase(); } //convert word map to text array int arrayLen = wordCountMap.entrySet().size(); Iterator<Map.Entry<String, Integer>> iterator = wordCountMap.entrySet().iterator(); int maxCount = 0, count; String word, wordCount; Text[] wordArray = new Text[arrayLen]; for (int i = 0; i < arrayLen; i++) { Map.Entry<String, Integer> entry = iterator.next(); word = entry.getKey(); count = entry.getValue(); wordCount = word + "/" + count; wordArray[i] = new Text(wordCount); if (count > maxCount) {//get the maximum word count as well maxCount = count; } } key = new Text(title + "/" + maxCount); value = new TextArrayWritable(wordArray); }
From source file:average.AverageMapper.java
@Override public void map(LongWritable _key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { String TempString = value.toString(); String[] array = TempString.split(","); String key = new String(""); key = array[0];/*w w w. j a va 2 s . c o m*/ output.collect(new Text(key), new Text(array[1])); }
From source file:average.AverageReducer.java
@Override public void reduce(Text _key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { Text key = _key;//from w w w. ja v a 2 s . c o m if (key.toString().equals("0Student_Id") != true) { int frequencyForYear = 0; int f = 0; double av = 0, c = 0.0; String ans = "", t = ""; while (values.hasNext()) { Text value = (Text) values.next(); t = value.toString(); frequencyForYear += Integer.parseInt(t); c++; // process value } av = frequencyForYear / c; ans = Double.toString(av); output.collect(key, new Text(ans)); } else { output.collect(key, new Text("Average")); } }
From source file:averagetemperature.AverageTemperatureMapper.java
@Override public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { String[] line = value.toString().split(","); String datePart = line[1];//from w w w . jav a2 s . c o m String temp = line[10]; if (StringUtils.isNumeric(temp)) try { output.collect(new Text(datePart), new IntWritable(Integer.parseInt(temp))); } catch (NumberFormatException e) { } ; }