List of usage examples for org.apache.hadoop.io SequenceFile createWriter
@Deprecated public static Writer createWriter(Configuration conf, FSDataOutputStream out, Class keyClass, Class valClass, CompressionType compressionType, CompressionCodec codec) throws IOException
From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCFHybridBenchmark.java
License:Apache License
public static List<double[]> convertInputData(Configuration conf, FileSystem fs, Path in, Path preferencesIn, String inputFile, String separator, int maxTestPrefs) throws IOException { List<double[]> testItems = new ArrayList<double[]>(); // Delete input files if already exist if (fs.exists(in)) { fs.delete(in, true);//from w w w . j a v a 2s . com } if (fs.exists(preferencesIn)) { fs.delete(preferencesIn, true); } final SequenceFile.Writer prefWriter = SequenceFile.createWriter(fs, conf, preferencesIn, LongWritable.class, PipesVectorWritable.class, CompressionType.NONE); BufferedReader br = new BufferedReader(new FileReader(inputFile)); String line; while ((line = br.readLine()) != null) { String[] values = line.split(separator); long userId = Long.parseLong(values[0]); long itemId = Long.parseLong(values[1]); double rating = Double.parseDouble(values[2]); // System.out.println("userId: " + userId + " itemId: " + itemId // + " rating: " + rating); double vector[] = new double[2]; vector[0] = itemId; vector[1] = rating; prefWriter.append(new LongWritable(userId), new PipesVectorWritable(new DenseDoubleVector(vector))); // Add test preferences maxTestPrefs--; if (maxTestPrefs > 0) { testItems.add(new double[] { userId, itemId, rating }); } } br.close(); prefWriter.close(); return testItems; }
From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCFTrainHybridBSP.java
License:Apache License
public static List<Preference<Long, Long>> prepareTestInputData(Configuration conf, FileSystem fs, Path in, Path preferencesIn) throws IOException { Preference[] train_prefs = { new Preference<Integer, Integer>(1, 0, 4), new Preference<Integer, Integer>(1, 1, 2.5), new Preference<Integer, Integer>(1, 2, 3.5), new Preference<Integer, Integer>(2, 0, 4), new Preference<Integer, Integer>(2, 1, 2.5), new Preference<Integer, Integer>(2, 2, 3.5), new Preference<Integer, Integer>(2, 3, 1), new Preference<Integer, Integer>(2, 4, 3.5), new Preference<Integer, Integer>(3, 0, 4), new Preference<Integer, Integer>(3, 1, 2.5), new Preference<Integer, Integer>(3, 2, 3.5), new Preference<Integer, Integer>(3, 3, 1), new Preference<Integer, Integer>(3, 4, 3.5) }; List<Preference<Long, Long>> test_prefs = new ArrayList<Preference<Long, Long>>(); test_prefs.add(new Preference<Long, Long>(1l, 0l, 4)); test_prefs.add(new Preference<Long, Long>(1l, 1l, 2.5)); test_prefs.add(new Preference<Long, Long>(1l, 2l, 3.5)); test_prefs.add(new Preference<Long, Long>(1l, 3l, 1)); test_prefs.add(new Preference<Long, Long>(1l, 4l, 3.5)); // Delete input files if already exist if (fs.exists(in)) { fs.delete(in, true);/* w w w. j a v a2 s . co m*/ } if (fs.exists(preferencesIn)) { fs.delete(preferencesIn, true); } final SequenceFile.Writer prefWriter = SequenceFile.createWriter(fs, conf, preferencesIn, LongWritable.class, PipesVectorWritable.class, CompressionType.NONE); for (Preference<Integer, Integer> taste : train_prefs) { double values[] = new double[2]; values[0] = taste.getItemId(); values[1] = taste.getValue().get(); prefWriter.append(new LongWritable(taste.getUserId()), new PipesVectorWritable(new DenseDoubleVector(values))); } prefWriter.close(); return test_prefs; }
From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCFTrainHybridBSP.java
License:Apache License
public static List<Preference<Long, Long>> generateRandomInputData(Configuration conf, FileSystem fs, Path in, int numBspTask, int numGPUBspTask, int userCount, int itemCount, int percentNonZeroValues, int GPUPercentage, int maxTestPrefs) throws IOException { // Delete input directory if already exist if (fs.exists(in)) { fs.delete(in, true);/*from w w w . j a va 2s . c o m*/ } Random rand = new Random(32L); Set<Map.Entry<Long, Long>> userItemPairs = new HashSet<Map.Entry<Long, Long>>(); List<Preference<Long, Long>> testItems = new ArrayList<Preference<Long, Long>>(); int possibleUserItemRatings = userCount * itemCount; int userItemRatings = possibleUserItemRatings * percentNonZeroValues / 100; System.out.println("generateRandomInputData possibleRatings: " + possibleUserItemRatings + " ratings: " + userItemRatings); // Compute work distributions int cpuTaskNum = numBspTask - numGPUBspTask; long ratingsPerGPUTask = 0; long ratingsPerCPU = 0; long ratingsPerCPUTask = 0; if ((numGPUBspTask > 0) && (GPUPercentage > 0) && (GPUPercentage <= 100)) { ratingsPerGPUTask = (userItemRatings * GPUPercentage) / 100; ratingsPerCPU = userItemRatings - ratingsPerGPUTask; } else { ratingsPerCPU = userItemRatings; } if (cpuTaskNum > 0) { ratingsPerCPUTask = ratingsPerCPU / cpuTaskNum; } System.out.println("generateRandomInputData ratingsPerGPUTask: " + ratingsPerGPUTask + " ratingsPerCPU: " + ratingsPerCPU + " ratingsPerCPUTask: " + ratingsPerCPUTask); for (int part = 0; part < numBspTask; part++) { Path partIn = new Path(in, "part" + part + ".seq"); final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, partIn, LongWritable.class, PipesVectorWritable.class, CompressionType.NONE); long interval = 0; if (part > cpuTaskNum) { interval = ratingsPerGPUTask; } else { interval = ratingsPerCPUTask; } long start = interval * part; long end = start + interval - 1; if ((numBspTask - 1) == part) { end = userItemRatings; } LOG.info("Partition " + part + ": from " + start + " to " + end); for (long i = start; i <= end; i++) { // Find new user item rating which was not used before Map.Entry<Long, Long> userItemPair; do { long userId = rand.nextInt(userCount); long itemId = rand.nextInt(itemCount); userItemPair = new AbstractMap.SimpleImmutableEntry<Long, Long>(userId, itemId); } while (userItemPairs.contains(userItemPair)); // Add user item rating userItemPairs.add(userItemPair); // Generate rating int rating = rand.nextInt(5) + 1; // values between 1 and 5 // Add user item rating to test data if (i < maxTestPrefs) { testItems.add( new Preference<Long, Long>(userItemPair.getKey(), userItemPair.getValue(), rating)); } // Write out user item rating dataWriter.append(new LongWritable(userItemPair.getKey()), new PipesVectorWritable( new DenseDoubleVector(new double[] { userItemPair.getValue(), rating }))); } dataWriter.close(); } return testItems; }
From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCFTrainHybridBSP.java
License:Apache License
public static List<Preference<Long, Long>> convertInputData(Configuration conf, FileSystem fs, Path in, Path preferencesIn, String inputFile, String separator, int maxTestPrefs) throws IOException { List<Preference<Long, Long>> test_prefs = new ArrayList<Preference<Long, Long>>(); // Delete input files if already exist if (fs.exists(in)) { fs.delete(in, true);/*from w ww . ja v a2 s . c om*/ } if (fs.exists(preferencesIn)) { fs.delete(preferencesIn, true); } final SequenceFile.Writer prefWriter = SequenceFile.createWriter(fs, conf, preferencesIn, LongWritable.class, PipesVectorWritable.class, CompressionType.NONE); BufferedReader br = new BufferedReader(new FileReader(inputFile)); String line; while ((line = br.readLine()) != null) { String[] values = line.split(separator); long userId = Long.parseLong(values[0]); long itemId = Long.parseLong(values[1]); double rating = Double.parseDouble(values[2]); // System.out.println("userId: " + userId + " itemId: " + itemId // + " rating: " + rating); double vector[] = new double[2]; vector[0] = itemId; vector[1] = rating; prefWriter.append(new LongWritable(userId), new PipesVectorWritable(new DenseDoubleVector(vector))); // Add test preferences maxTestPrefs--; if (maxTestPrefs > 0) { test_prefs.add(new Preference<Long, Long>(userId, itemId, rating)); } } br.close(); prefWriter.close(); return test_prefs; }
From source file:at.illecker.hama.hybrid.examples.testrootbeer.TestRootbeerHybridBSP.java
License:Apache License
private static void prepareInput(Configuration conf, Path inputPath, int n, int maxVal) throws IOException { FileSystem fs = inputPath.getFileSystem(conf); // Create input file writers depending on bspTaskNum int bspTaskNum = conf.getInt("bsp.peers.num", 1); SequenceFile.Writer[] inputWriters = new SequenceFile.Writer[bspTaskNum]; for (int i = 0; i < bspTaskNum; i++) { Path inputFile = new Path(inputPath, "input" + i + ".seq"); LOG.info("inputFile: " + inputFile.toString()); inputWriters[i] = SequenceFile.createWriter(fs, conf, inputFile, IntWritable.class, IntWritable.class, CompressionType.NONE);// ww w . java 2 s . co m } // Write random values to input files IntWritable key = new IntWritable(); IntWritable value = new IntWritable(); Random r = new Random(); for (int i = 0; i < n; i++) { key.set(i); value.set(r.nextInt(maxVal)); for (int j = 0; j < inputWriters.length; j++) { inputWriters[j].append(key, value); } } // Close file writers for (int j = 0; j < inputWriters.length; j++) { inputWriters[j].close(); } }
From source file:bme.iclef.hadoop.file2seq.TarToSeqFile.java
License:Apache License
private SequenceFile.Writer openOutputFile() throws Exception { Path outputPath = new Path(outputFile.getAbsolutePath()); return SequenceFile.createWriter(setup.getLocalFileSystem(), setup.getConf(), outputPath, Text.class, BytesWritable.class, SequenceFile.CompressionType.BLOCK); }
From source file:co.cask.tephra.persist.CommitMarkerCodecTest.java
License:Apache License
@Test public void testRandomCommitMarkers() throws Exception { List<Integer> randomInts = new ArrayList<>(); Path newLog = new Path(TMP_FOLDER.newFolder().getAbsolutePath(), LOG_FILE); // Write a bunch of random commit markers try (SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, newLog, LongWritable.class, LongWritable.class, SequenceFile.CompressionType.NONE)) { for (int i = 0; i < 1000; i++) { int randomNum = RANDOM.nextInt(Integer.MAX_VALUE); CommitMarkerCodec.writeMarker(writer, randomNum); randomInts.add(randomNum);/*from w w w . j av a 2 s . c o m*/ } writer.hflush(); writer.hsync(); } // Read the commit markers back to verify the marker try (SequenceFile.Reader reader = new SequenceFile.Reader(fs, newLog, conf); CommitMarkerCodec markerCodec = new CommitMarkerCodec()) { for (int num : randomInts) { Assert.assertEquals(num, markerCodec.readMarker(reader)); } } }
From source file:co.cask.tephra.persist.CommitMarkerCodecTest.java
License:Apache License
@Test public void testIncompleteCommitMarker() throws Exception { Path newLog = new Path(TMP_FOLDER.newFolder().getAbsolutePath(), LOG_FILE); try (SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, newLog, LongWritable.class, LongWritable.class, SequenceFile.CompressionType.NONE)) { String key = TxConstants.TransactionLog.NUM_ENTRIES_APPENDED; SequenceFile.ValueBytes valueBytes = new IncompleteValueBytes(); writer.appendRaw(key.getBytes(), 0, key.length(), valueBytes); writer.hflush();/* w w w .j av a 2s . c o m*/ writer.hsync(); } // Read the incomplete commit marker try (SequenceFile.Reader reader = new SequenceFile.Reader(fs, newLog, conf); CommitMarkerCodec markerCodec = new CommitMarkerCodec()) { try { markerCodec.readMarker(reader); Assert.fail("Expected EOF Exception to be thrown"); } catch (EOFException e) { // expected since we didn't write the value bytes } } }
From source file:co.cask.tephra.persist.CommitMarkerCodecTest.java
License:Apache License
@Test public void testIncorrectCommitMarker() throws Exception { Path newLog = new Path(TMP_FOLDER.newFolder().getAbsolutePath(), LOG_FILE); // Write an incorrect marker try (SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, newLog, LongWritable.class, LongWritable.class, SequenceFile.CompressionType.NONE)) { String invalidKey = "IncorrectKey"; SequenceFile.ValueBytes valueBytes = new CommitMarkerCodec.CommitEntriesCount(100); writer.appendRaw(invalidKey.getBytes(), 0, invalidKey.length(), valueBytes); writer.hflush();/*from ww w. ja v a 2 s. com*/ writer.hsync(); } // Read the commit markers back to verify the marker try (SequenceFile.Reader reader = new SequenceFile.Reader(fs, newLog, conf); CommitMarkerCodec markerCodec = new CommitMarkerCodec()) { try { markerCodec.readMarker(reader); Assert.fail("Expected an IOException to be thrown"); } catch (IOException e) { // expected } } }
From source file:com.asakusafw.runtime.io.sequencefile.SequenceFileUtil.java
License:Apache License
/** * Creates a new writer.//from w ww. ja v a 2 s . c o m * @param out the drain * @param conf current configuration * @param keyClass the key type * @param valueClass the value type * @param codec the compression codec to block compression, or {@code null} to uncompressed * @return the created sequence file writer * @throws IOException if failed to create a sequence file * @throws IllegalArgumentException if some parameters were {@code null} */ public static SequenceFile.Writer openWriter(OutputStream out, Configuration conf, Class<?> keyClass, Class<?> valueClass, CompressionCodec codec) throws IOException { if (out == null) { throw new IllegalArgumentException("out must not be null"); //$NON-NLS-1$ } if (conf == null) { throw new IllegalArgumentException("conf must not be null"); //$NON-NLS-1$ } if (keyClass == null) { throw new IllegalArgumentException("keyClass must not be null"); //$NON-NLS-1$ } if (valueClass == null) { throw new IllegalArgumentException("valueClass must not be null"); //$NON-NLS-1$ } if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Creating sequence file writer for output (key={0}, value={0})", //$NON-NLS-1$ keyClass.getName(), valueClass.getName())); } FSDataOutputStream output = new FSDataOutputStream(out, null); if (codec != null) { return SequenceFile.createWriter(conf, output, keyClass, valueClass, CompressionType.BLOCK, codec); } else { return SequenceFile.createWriter(conf, output, keyClass, valueClass, CompressionType.NONE, null); } }