List of usage examples for org.apache.hadoop.io LongWritable LongWritable
public LongWritable()
From source file:HistogramBucket.java
License:Apache License
@Override public void readFields(DataInput di) throws IOException { attribute.readFields(di);// ww w . j a va 2 s . co m LongWritable arraySize = new LongWritable(); arraySize.readFields(di); splits = new ArrayList<DoubleWritable>(); for (int i = 0; i < Integer.parseInt(arraySize.toString()); i++) { DoubleWritable d = new DoubleWritable(); d.readFields(di); splits.add(d); } }
From source file:DeprecatedBAMBaseRecordReader.java
License:Open Source License
@Override public LongWritable createKey() { return new LongWritable(); }
From source file:Txt2SeqConverter.java
License:Apache License
public static void main(String[] args) { if (args.length != 2) { //System.out.println("Usage: env HADOOP_CLASSPATH=.:$HADOOP_CLASSPATH hadoop Txt2SeqConverter input output"); System.out.println("Usage: hadoop Txt2SeqConverter input output"); System.exit(1);/*from w w w . j a va2 s . c om*/ } FileSystem fs = null; String seqFileName = args[1]; Configuration conf = new Configuration(); try { fs = FileSystem.get(URI.create(seqFileName), conf); } catch (IOException e) { System.out.println("ERROR: " + e.getMessage()); } Path path = new Path(seqFileName); LongWritable key = new LongWritable(); Text value = new Text(); SequenceFile.Writer writer = null; try { //writer = SequenceFile.createWriter(fs, conf, path, LongWritable.class, Text.class, SequenceFile.CompressionType.BLOCK); writer = SequenceFile.createWriter(fs, conf, path, LongWritable.class, Text.class, SequenceFile.CompressionType.BLOCK, new com.hadoop.compression.lzo.LzoCodec()); BufferedReader br = new BufferedReader(new FileReader(args[0])); int transactionID = 0; String transaction = null; while ((transaction = br.readLine()) != null) { key.set(transactionID); value.set(transaction); writer.append(key, value); transactionID++; } } catch (IOException e) { System.out.println("ERROR: " + e.getMessage()); } finally { IOUtils.closeStream(writer); } }
From source file:TestHashMap.java
License:Apache License
@Test public void testHashSetString() throws Exception { final Set<String> hashSet = new HashSet<>(); final Random random = new Random(0xDEADBEEF); int matched = 0; LongWritable num = new LongWritable(); long startTime = System.nanoTime(); for (int i = 0; i < SET_SIZE; i++) { // input data is String String input = Long.toString(random.nextLong()); // disable optimizer if (input.length() > 5) { hashSet.add(input);// ww w . j av a2s . co m } } random.setSeed(0xDEADBEEF); for (int i = 0; i < DATA_SIZE; i++) { // query data is LongWritable num.set(random.nextLong()); if (hashSet.contains(num.toString())) { matched++; } } long endTime = System.nanoTime(); System.out.println(" HashSet<String>"); System.out.println(" Elapsed time: " + (endTime - startTime) / 1000000 + " ms"); System.out.println(" Matched " + matched + " times"); }
From source file:TestHashMap.java
License:Apache License
@Test public void testHashSetLong() throws Exception { final Set<Long> hashSet = new HashSet<>(); final Random random = new Random(0xDEADBEEF); int matched = 0; LongWritable num = new LongWritable(); long startTime = System.nanoTime(); for (int i = 0; i < SET_SIZE; i++) { // input data is String String input = Long.toString(random.nextLong()); // disable optimizer if (input.length() > 5) { hashSet.add(Long.parseLong(input)); }//from www . j a va2s. c o m } random.setSeed(0xDEADBEEF); for (int i = 0; i < DATA_SIZE; i++) { // query data is LongWritable num.set(random.nextLong()); if (hashSet.contains(num.get())) { matched++; } } long endTime = System.nanoTime(); System.out.println(" HashSet<Long>"); System.out.println(" Elapsed time: " + (endTime - startTime) / 1000000f + " ms"); System.out.println(" Matched " + matched + " times"); }
From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCFTrainHybridBSP.java
License:Apache License
private void collectInput( BSPPeer<LongWritable, PipesVectorWritable, Text, PipesVectorWritable, ItemMessage> peer) throws IOException { LongWritable key = new LongWritable(); PipesVectorWritable value = new PipesVectorWritable(); int counter = 0; while (peer.readNext(key, value)) { long actualId = key.get(); // parse as <k:userId, v:(itemId, score)> long itemId = (long) value.getVector().get(0); double score = value.getVector().get(1); if (m_usersMatrix.containsKey(actualId) == false) { DenseDoubleVector vals = new DenseDoubleVector(m_matrixRank); for (int i = 0; i < m_matrixRank; i++) { vals.set(i, m_rand.nextDouble()); }/*from w ww . j a v a 2 s . co m*/ m_usersMatrix.put(actualId, new PipesVectorWritable(vals)); } if (m_itemsMatrix.containsKey(itemId) == false) { DenseDoubleVector vals = new DenseDoubleVector(m_matrixRank); for (int i = 0; i < m_matrixRank; i++) { vals.set(i, m_rand.nextDouble()); } m_itemsMatrix.put(itemId, new PipesVectorWritable(vals)); } m_preferences.add(new Preference<Long, Long>(actualId, itemId, score)); m_indexes.add(counter); counter++; } }
From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCFTrainHybridBSP.java
License:Apache License
@Override public void bspGpu(BSPPeer<LongWritable, PipesVectorWritable, Text, PipesVectorWritable, ItemMessage> peer, Rootbeer rootbeer) throws IOException, SyncException, InterruptedException { long startTime = System.currentTimeMillis(); // ********************************************************************** // Collect inputs // ********************************************************************** Map<Long, HashMap<Long, Double>> preferencesMap = new HashMap<Long, HashMap<Long, Double>>(); Map<Long, Long> userRatingCount = new HashMap<Long, Long>(); Map<Long, Long> itemRatingCount = new HashMap<Long, Long>(); LongWritable key = new LongWritable(); PipesVectorWritable value = new PipesVectorWritable(); int counter = 0; while (peer.readNext(key, value)) { // parse as <k:userId, v:(itemId, score)> long userId = key.get(); long itemId = (long) value.getVector().get(0); double score = value.getVector().get(1); // Add User vector if (m_usersMatrix.containsKey(userId) == false) { DenseDoubleVector vals = new DenseDoubleVector(m_matrixRank); for (int i = 0; i < m_matrixRank; i++) { vals.set(i, m_rand.nextDouble()); }//from ww w .j a va 2 s . co m m_usersMatrix.put(userId, new PipesVectorWritable(vals)); userRatingCount.put(userId, 1l); } else { userRatingCount.put(userId, userRatingCount.get(userId) + 1); } // Add Item vector if (m_itemsMatrix.containsKey(itemId) == false) { DenseDoubleVector vals = new DenseDoubleVector(m_matrixRank); for (int i = 0; i < m_matrixRank; i++) { vals.set(i, m_rand.nextDouble()); } m_itemsMatrix.put(itemId, new PipesVectorWritable(vals)); itemRatingCount.put(itemId, 1l); } else { itemRatingCount.put(itemId, itemRatingCount.get(itemId) + 1); } // Add preference m_preferences.add(new Preference<Long, Long>(userId, itemId, score)); if (preferencesMap.containsKey(userId) == false) { HashMap<Long, Double> map = new HashMap<Long, Double>(); map.put(itemId, score); preferencesMap.put(userId, map); } else { preferencesMap.get(userId).put(itemId, score); } // Add counter m_indexes.add(counter); counter++; } // DEBUG if (m_isDebuggingEnabled) { m_logger.writeChars("collected: " + m_usersMatrix.size() + " users, " + m_itemsMatrix.size() + " items, " + m_preferences.size() + " preferences\n"); } // ********************************************************************** // Prepare input for GPU // ********************************************************************** Map<Long, Long> sortedUserRatingCount = sortByValues(userRatingCount); Map<Long, Long> sortedItemRatingCount = sortByValues(itemRatingCount); // Convert preferences to userItemMatrix double[][] // sortedUserRatingCount.size() x sortedItemRatingCount.size() double[][] userItemMatrix = new double[m_usersMatrix.size()][m_itemsMatrix.size()]; // Mappers Map<Long, Integer> userItemMatrixUserRowMap = new HashMap<Long, Integer>(); GpuIntegerMap userItemMatrixItemColMap = new GpuIntegerMap(m_itemsMatrix.size() + 1); // +1 because we are starting with 0 GpuIntegerMap userItemMatrixColItemMap = new GpuIntegerMap(m_itemsMatrix.size() + 1); // +1 because we are starting with 0 // Create userHelper to int[][] // userHelper[userId][0] = userRatingCount // userHelper[userId][1] = colId of userItemMatrix int[][] userHelper = null; // Create itemHelper to int[][] // itemHelper[itemId][0] = itemRatingCount // itemHelper[userId][1] = rowId of userItemMatrix int[][] itemHelper = null; Map<Long, Integer> itemHelperId = new HashMap<Long, Integer>(); // Debug if (m_isDebuggingEnabled) { m_logger.writeChars( "userItemMatrix: (m x n): " + m_usersMatrix.size() + " x " + m_itemsMatrix.size() + "\n"); } int rowId = 0; for (Long userId : sortedUserRatingCount.keySet()) { // Map userId to rowId in userItemMatrixUserRowMap userItemMatrixUserRowMap.put(userId, rowId); // Setup userHelper if (userHelper == null) { // TODO sortedUserRatingCount.size() userHelper = new int[m_usersMatrix.size()][sortedUserRatingCount.get(userId).intValue() + 1]; } userHelper[rowId][0] = sortedUserRatingCount.get(userId).intValue(); int colId = 0; int userHelperId = 1; for (Long itemId : sortedItemRatingCount.keySet()) { // Map itemId to colId in userItemMatrixItemColMap if (rowId == 0) { userItemMatrixItemColMap.put(itemId.intValue(), colId); userItemMatrixColItemMap.put(colId, itemId.intValue()); } // Setup itemHelper if (itemHelper == null) { // TODO sortedItemRatingCount.size() itemHelper = new int[m_itemsMatrix.size()][sortedItemRatingCount.get(itemId).intValue() + 1]; } itemHelper[colId][0] = sortedItemRatingCount.get(itemId).intValue(); if (preferencesMap.get(userId).containsKey(itemId)) { // Add userItemMatrix userItemMatrix[rowId][colId] = preferencesMap.get(userId).get(itemId); // Add userHelper userHelper[rowId][userHelperId] = colId; userHelperId++; // Add itemHelper if (itemHelperId.containsKey(itemId)) { int idx = itemHelperId.get(itemId); itemHelper[colId][idx] = rowId; itemHelperId.put(itemId, idx + 1); } else { itemHelper[colId][1] = rowId; itemHelperId.put(itemId, 2); } } colId++; } // Debug userItemMatrix if (m_isDebuggingEnabled) { m_logger.writeChars("userItemMatrix userId: " + userId + " row[" + rowId + "]: " + Arrays.toString(userItemMatrix[rowId]) + " userRatings: " + sortedUserRatingCount.get(userId) + "\n"); } rowId++; } // Debug userHelper and itemHelper if (m_isDebuggingEnabled) { // TODO sortedUserRatingCount.size() for (int i = 0; i < m_usersMatrix.size(); i++) { m_logger.writeChars("userHelper row " + i + ": " + Arrays.toString(userHelper[i]) + "\n"); } // TODO sortedItemRatingCount.size() for (int i = 0; i < m_itemsMatrix.size(); i++) { m_logger.writeChars("itemHelper row " + i + ": " + Arrays.toString(itemHelper[i]) + "\n"); } } // Convert usersMatrix to double[][] double[][] userMatrix = new double[m_usersMatrix.size()][m_matrixRank]; rowId = 0; if (m_isDebuggingEnabled) { m_logger.writeChars("userMatrix: length: " + m_usersMatrix.size() + "\n"); } for (Long userId : sortedUserRatingCount.keySet()) { DoubleVector vector = m_usersMatrix.get(userId).getVector(); for (int i = 0; i < m_matrixRank; i++) { userMatrix[rowId][i] = vector.get(i); } if (m_isDebuggingEnabled) { m_logger.writeChars("userId: " + userId + " " + Arrays.toString(vector.toArray()) + "\n"); } rowId++; } // Convert itemsMatrix to double[][] double[][] itemMatrix = new double[m_itemsMatrix.size()][m_matrixRank]; rowId = 0; GpuIntegerMap counterMap = new GpuIntegerMap(m_itemsMatrix.size()); if (m_isDebuggingEnabled) { m_logger.writeChars("itemMatrix: length: " + m_itemsMatrix.size() + "\n"); } for (Long itemId : sortedItemRatingCount.keySet()) { counterMap.put(itemId.intValue(), 0); DoubleVector vector = m_itemsMatrix.get(itemId).getVector(); for (int i = 0; i < m_matrixRank; i++) { itemMatrix[rowId][i] = vector.get(i); } if (m_isDebuggingEnabled) { m_logger.writeChars("itemId: " + itemId + " " + Arrays.toString(vector.toArray()) + "\n"); } rowId++; } // Sync tasks after input has been collected peer.sync(); // ********************************************************************** // Run GPU Kernels // ********************************************************************** OnlineCFTrainHybridKernel kernel = new OnlineCFTrainHybridKernel(userItemMatrix, userHelper, itemHelper, userItemMatrixItemColMap, userItemMatrixColItemMap, userMatrix, itemMatrix, m_usersMatrix.size(), m_itemsMatrix.size(), ALPHA, m_matrixRank, m_maxIterations, counterMap, m_skipCount, peer.getNumPeers(), peer.getPeerIndex(), peer.getAllPeerNames()); Context context = rootbeer.createDefaultContext(); Stopwatch watch = new Stopwatch(); watch.start(); rootbeer.run(kernel, new ThreadConfig(m_blockSize, m_gridSize, m_blockSize * m_gridSize), context); watch.stop(); // ********************************************************************** // Save Model // ********************************************************************** // save users for (Entry<Long, Integer> userMap : userItemMatrixUserRowMap.entrySet()) { if (m_isDebuggingEnabled) { m_logger.writeChars("user: " + userMap.getKey() + " vector: " + Arrays.toString(kernel.m_usersMatrix[userMap.getValue()]) + "\n"); } peer.write(new Text("u" + userMap.getKey()), new PipesVectorWritable(new DenseDoubleVector(kernel.m_usersMatrix[userMap.getValue()]))); } // TODO duplicated item saves, but one item may belong to one task only // save items for (GpuIntIntPair itemMap : userItemMatrixItemColMap.getList()) { if (itemMap != null) { if (m_isDebuggingEnabled) { m_logger.writeChars("item: " + itemMap.getKey() + " vector: " + Arrays.toString(kernel.m_itemsMatrix[itemMap.getValue()]) + "\n"); } peer.write(new Text("i" + itemMap.getKey()), new PipesVectorWritable(new DenseDoubleVector(kernel.m_itemsMatrix[itemMap.getValue()]))); } } this.m_bspTimeGpu = System.currentTimeMillis() - startTime; // ********************************************************************** // Logging // ********************************************************************** if (m_isDebuggingEnabled) { m_logger.writeChars("OnlineCFTrainHybridBSP.bspGpu executed on GPU!\n"); m_logger.writeChars( "OnlineCFTrainHybridBSP.bspGpu blockSize: " + m_blockSize + " gridSize: " + m_gridSize + "\n"); m_logger.writeChars("OnlineCFTrainHybridBSP,setupTimeGpu=" + this.m_setupTimeGpu + " ms\n"); m_logger.writeChars( "OnlineCFTrainHybridBSP,setupTimeGpu=" + (this.m_setupTimeGpu / 1000.0) + " seconds\n"); m_logger.writeChars("OnlineCFTrainHybridBSP,bspTimeGpu=" + this.m_bspTimeGpu + " ms\n"); m_logger.writeChars("OnlineCFTrainHybridBSP,bspTimeGpu=" + (this.m_bspTimeGpu / 1000.0) + " seconds\n"); List<StatsRow> stats = context.getStats(); for (StatsRow row : stats) { m_logger.writeChars(" StatsRow:\n"); m_logger.writeChars(" serial time: " + row.getSerializationTime() + "\n"); m_logger.writeChars(" exec time: " + row.getExecutionTime() + "\n"); m_logger.writeChars(" deserial time: " + row.getDeserializationTime() + "\n"); m_logger.writeChars(" num blocks: " + row.getNumBlocks() + "\n"); m_logger.writeChars(" num threads: " + row.getNumThreads() + "\n"); m_logger.writeChars("GPUTime: " + watch.elapsedTimeMillis() + " ms" + "\n"); } m_logger.close(); } // Logging List<StatsRow> stats = context.getStats(); for (StatsRow row : stats) { LOG.info(" StatsRow:"); LOG.info(" serial time: " + row.getSerializationTime()); LOG.info(" exec time: " + row.getExecutionTime()); LOG.info(" deserial time: " + row.getDeserializationTime()); LOG.info(" num blocks: " + row.getNumBlocks()); LOG.info(" num threads: " + row.getNumThreads()); LOG.info("GPUTime: " + watch.elapsedTimeMillis() + " ms"); } LOG.info("OnlineCFTrainHybridBSP.bspGpu executed on GPU!"); LOG.info("OnlineCFTrainHybridBSP.bspGpu blockSize: " + m_blockSize + " gridSize: " + m_gridSize); LOG.info("OnlineCFTrainHybridBSP,setupTimeGpu=" + this.m_setupTimeGpu + " ms"); LOG.info("OnlineCFTrainHybridBSP,setupTimeGpu=" + (this.m_setupTimeGpu / 1000.0) + " seconds"); LOG.info("OnlineCFTrainHybridBSP,bspTimeGpu=" + this.m_bspTimeGpu + " ms"); LOG.info("OnlineCFTrainHybridBSP,bspTimeGpu=" + (this.m_bspTimeGpu / 1000.0) + " seconds"); }
From source file:be.ugent.intec.halvade.hadoop.mapreduce.BamMergeReducer.java
@Override protected void setup(Context context) throws IOException, InterruptedException { outpFormat = new KeyIgnoringBAMOutputFormat(); String output = HalvadeConf.getOutDir(context.getConfiguration()); inputIsBam = HalvadeConf.inputIsBam(context.getConfiguration()); dict = HalvadeConf.getSequenceDictionary(context.getConfiguration()); if (inputIsBam) { header = SAMHeaderReader.readSAMHeaderFrom( new Path(HalvadeConf.getHeaderFile(context.getConfiguration())), context.getConfiguration()); } else {/*from w w w . ja va 2s. c o m*/ getReadGroupData(context.getConfiguration()); header = new SAMFileHeader(); header.setSequenceDictionary(dict); bamrg = new SAMReadGroupRecord(RGID); bamrg.setLibrary(RGLB); bamrg.setPlatform(RGPL); bamrg.setPlatformUnit(RGPU); bamrg.setSample(RGSM); header.addReadGroup(bamrg); } outpFormat.setSAMHeader(header); recordWriter = outpFormat.getRecordWriter(context, new Path(output + "mergedBam.bam")); outKey = new LongWritable(); outKey.set(0); }
From source file:br.com.lassal.mrunit.example.SMSCDRMapperReducerTest.java
@Test public void testMapper() throws IOException { mapDriver.withInput(new LongWritable(), new Text("655209;1;796764372490213;804422938115889;6")); mapDriver.withOutput(new Text("6"), new IntWritable(1)); mapDriver.withOutput(new Text("Z"), new IntWritable(1)); mapDriver.runTest();//from w w w . j a va2s .c o m }
From source file:br.com.lassal.nqueens.grid.mapreduce.NQueenPartialShotMapperReducerTest.java
@Test public void testMapper() throws IOException { mapDriver.getConfiguration().set(NQueenPartialShotMapper.NQueenRowSize_PROP, "4"); mapDriver.withInput(new LongWritable(), new Text("4#")); mapDriver.withOutput(new Text("4:1,3,0,2"), NullWritable.get()); mapDriver.withOutput(new Text("4:2,0,3,1"), NullWritable.get()); mapDriver.runTest();//w w w . j a v a 2s . co m }