Example usage for org.apache.hadoop.io SequenceFile createWriter

List of usage examples for org.apache.hadoop.io SequenceFile createWriter

Introduction

In this page you can find the example usage for org.apache.hadoop.io SequenceFile createWriter.

Prototype

@Deprecated
public static Writer createWriter(Configuration conf, FSDataOutputStream out, Class keyClass, Class valClass,
        CompressionType compressionType, CompressionCodec codec) throws IOException 

Source Link

Document

Construct the preferred type of 'raw' SequenceFile Writer.

Usage

From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCFHybridBenchmark.java

License:Apache License

public static List<double[]> convertInputData(Configuration conf, FileSystem fs, Path in, Path preferencesIn,
        String inputFile, String separator, int maxTestPrefs) throws IOException {

    List<double[]> testItems = new ArrayList<double[]>();

    // Delete input files if already exist
    if (fs.exists(in)) {
        fs.delete(in, true);//from   w  w  w . j  a  v  a 2s .  com
    }
    if (fs.exists(preferencesIn)) {
        fs.delete(preferencesIn, true);
    }

    final SequenceFile.Writer prefWriter = SequenceFile.createWriter(fs, conf, preferencesIn,
            LongWritable.class, PipesVectorWritable.class, CompressionType.NONE);

    BufferedReader br = new BufferedReader(new FileReader(inputFile));
    String line;
    while ((line = br.readLine()) != null) {
        String[] values = line.split(separator);
        long userId = Long.parseLong(values[0]);
        long itemId = Long.parseLong(values[1]);
        double rating = Double.parseDouble(values[2]);
        // System.out.println("userId: " + userId + " itemId: " + itemId
        // + " rating: " + rating);

        double vector[] = new double[2];
        vector[0] = itemId;
        vector[1] = rating;
        prefWriter.append(new LongWritable(userId), new PipesVectorWritable(new DenseDoubleVector(vector)));

        // Add test preferences
        maxTestPrefs--;
        if (maxTestPrefs > 0) {
            testItems.add(new double[] { userId, itemId, rating });
        }

    }
    br.close();
    prefWriter.close();

    return testItems;
}

From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCFTrainHybridBSP.java

License:Apache License

public static List<Preference<Long, Long>> prepareTestInputData(Configuration conf, FileSystem fs, Path in,
        Path preferencesIn) throws IOException {

    Preference[] train_prefs = { new Preference<Integer, Integer>(1, 0, 4),
            new Preference<Integer, Integer>(1, 1, 2.5), new Preference<Integer, Integer>(1, 2, 3.5),

            new Preference<Integer, Integer>(2, 0, 4), new Preference<Integer, Integer>(2, 1, 2.5),
            new Preference<Integer, Integer>(2, 2, 3.5), new Preference<Integer, Integer>(2, 3, 1),
            new Preference<Integer, Integer>(2, 4, 3.5),

            new Preference<Integer, Integer>(3, 0, 4), new Preference<Integer, Integer>(3, 1, 2.5),
            new Preference<Integer, Integer>(3, 2, 3.5), new Preference<Integer, Integer>(3, 3, 1),
            new Preference<Integer, Integer>(3, 4, 3.5) };

    List<Preference<Long, Long>> test_prefs = new ArrayList<Preference<Long, Long>>();
    test_prefs.add(new Preference<Long, Long>(1l, 0l, 4));
    test_prefs.add(new Preference<Long, Long>(1l, 1l, 2.5));
    test_prefs.add(new Preference<Long, Long>(1l, 2l, 3.5));
    test_prefs.add(new Preference<Long, Long>(1l, 3l, 1));
    test_prefs.add(new Preference<Long, Long>(1l, 4l, 3.5));

    // Delete input files if already exist
    if (fs.exists(in)) {
        fs.delete(in, true);/* w w  w.  j a v a2 s  . co m*/
    }
    if (fs.exists(preferencesIn)) {
        fs.delete(preferencesIn, true);
    }

    final SequenceFile.Writer prefWriter = SequenceFile.createWriter(fs, conf, preferencesIn,
            LongWritable.class, PipesVectorWritable.class, CompressionType.NONE);

    for (Preference<Integer, Integer> taste : train_prefs) {
        double values[] = new double[2];
        values[0] = taste.getItemId();
        values[1] = taste.getValue().get();
        prefWriter.append(new LongWritable(taste.getUserId()),
                new PipesVectorWritable(new DenseDoubleVector(values)));
    }
    prefWriter.close();

    return test_prefs;
}

From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCFTrainHybridBSP.java

License:Apache License

public static List<Preference<Long, Long>> generateRandomInputData(Configuration conf, FileSystem fs, Path in,
        int numBspTask, int numGPUBspTask, int userCount, int itemCount, int percentNonZeroValues,
        int GPUPercentage, int maxTestPrefs) throws IOException {

    // Delete input directory if already exist
    if (fs.exists(in)) {
        fs.delete(in, true);/*from w  w  w  .  j a  va 2s  . c o  m*/
    }

    Random rand = new Random(32L);
    Set<Map.Entry<Long, Long>> userItemPairs = new HashSet<Map.Entry<Long, Long>>();
    List<Preference<Long, Long>> testItems = new ArrayList<Preference<Long, Long>>();

    int possibleUserItemRatings = userCount * itemCount;
    int userItemRatings = possibleUserItemRatings * percentNonZeroValues / 100;
    System.out.println("generateRandomInputData possibleRatings: " + possibleUserItemRatings + " ratings: "
            + userItemRatings);

    // Compute work distributions
    int cpuTaskNum = numBspTask - numGPUBspTask;
    long ratingsPerGPUTask = 0;
    long ratingsPerCPU = 0;
    long ratingsPerCPUTask = 0;
    if ((numGPUBspTask > 0) && (GPUPercentage > 0) && (GPUPercentage <= 100)) {
        ratingsPerGPUTask = (userItemRatings * GPUPercentage) / 100;
        ratingsPerCPU = userItemRatings - ratingsPerGPUTask;
    } else {
        ratingsPerCPU = userItemRatings;
    }
    if (cpuTaskNum > 0) {
        ratingsPerCPUTask = ratingsPerCPU / cpuTaskNum;
    }

    System.out.println("generateRandomInputData ratingsPerGPUTask: " + ratingsPerGPUTask + " ratingsPerCPU: "
            + ratingsPerCPU + " ratingsPerCPUTask: " + ratingsPerCPUTask);

    for (int part = 0; part < numBspTask; part++) {
        Path partIn = new Path(in, "part" + part + ".seq");
        final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, partIn, LongWritable.class,
                PipesVectorWritable.class, CompressionType.NONE);

        long interval = 0;
        if (part > cpuTaskNum) {
            interval = ratingsPerGPUTask;
        } else {
            interval = ratingsPerCPUTask;
        }
        long start = interval * part;
        long end = start + interval - 1;
        if ((numBspTask - 1) == part) {
            end = userItemRatings;
        }
        LOG.info("Partition " + part + ": from " + start + " to " + end);

        for (long i = start; i <= end; i++) {

            // Find new user item rating which was not used before
            Map.Entry<Long, Long> userItemPair;
            do {
                long userId = rand.nextInt(userCount);
                long itemId = rand.nextInt(itemCount);
                userItemPair = new AbstractMap.SimpleImmutableEntry<Long, Long>(userId, itemId);
            } while (userItemPairs.contains(userItemPair));

            // Add user item rating
            userItemPairs.add(userItemPair);

            // Generate rating
            int rating = rand.nextInt(5) + 1; // values between 1 and 5

            // Add user item rating to test data
            if (i < maxTestPrefs) {
                testItems.add(
                        new Preference<Long, Long>(userItemPair.getKey(), userItemPair.getValue(), rating));
            }

            // Write out user item rating
            dataWriter.append(new LongWritable(userItemPair.getKey()), new PipesVectorWritable(
                    new DenseDoubleVector(new double[] { userItemPair.getValue(), rating })));
        }
        dataWriter.close();
    }

    return testItems;
}

From source file:at.illecker.hama.hybrid.examples.onlinecf.OnlineCFTrainHybridBSP.java

License:Apache License

public static List<Preference<Long, Long>> convertInputData(Configuration conf, FileSystem fs, Path in,
        Path preferencesIn, String inputFile, String separator, int maxTestPrefs) throws IOException {

    List<Preference<Long, Long>> test_prefs = new ArrayList<Preference<Long, Long>>();

    // Delete input files if already exist
    if (fs.exists(in)) {
        fs.delete(in, true);/*from   w  ww . ja  v  a2  s  . c om*/
    }
    if (fs.exists(preferencesIn)) {
        fs.delete(preferencesIn, true);
    }

    final SequenceFile.Writer prefWriter = SequenceFile.createWriter(fs, conf, preferencesIn,
            LongWritable.class, PipesVectorWritable.class, CompressionType.NONE);

    BufferedReader br = new BufferedReader(new FileReader(inputFile));
    String line;
    while ((line = br.readLine()) != null) {
        String[] values = line.split(separator);
        long userId = Long.parseLong(values[0]);
        long itemId = Long.parseLong(values[1]);
        double rating = Double.parseDouble(values[2]);
        // System.out.println("userId: " + userId + " itemId: " + itemId
        // + " rating: " + rating);

        double vector[] = new double[2];
        vector[0] = itemId;
        vector[1] = rating;
        prefWriter.append(new LongWritable(userId), new PipesVectorWritable(new DenseDoubleVector(vector)));

        // Add test preferences
        maxTestPrefs--;
        if (maxTestPrefs > 0) {
            test_prefs.add(new Preference<Long, Long>(userId, itemId, rating));
        }

    }
    br.close();
    prefWriter.close();

    return test_prefs;
}

From source file:at.illecker.hama.hybrid.examples.testrootbeer.TestRootbeerHybridBSP.java

License:Apache License

private static void prepareInput(Configuration conf, Path inputPath, int n, int maxVal) throws IOException {
    FileSystem fs = inputPath.getFileSystem(conf);

    // Create input file writers depending on bspTaskNum
    int bspTaskNum = conf.getInt("bsp.peers.num", 1);
    SequenceFile.Writer[] inputWriters = new SequenceFile.Writer[bspTaskNum];
    for (int i = 0; i < bspTaskNum; i++) {
        Path inputFile = new Path(inputPath, "input" + i + ".seq");
        LOG.info("inputFile: " + inputFile.toString());
        inputWriters[i] = SequenceFile.createWriter(fs, conf, inputFile, IntWritable.class, IntWritable.class,
                CompressionType.NONE);// ww  w  . java 2  s  .  co m
    }

    // Write random values to input files
    IntWritable key = new IntWritable();
    IntWritable value = new IntWritable();
    Random r = new Random();
    for (int i = 0; i < n; i++) {
        key.set(i);
        value.set(r.nextInt(maxVal));
        for (int j = 0; j < inputWriters.length; j++) {
            inputWriters[j].append(key, value);
        }
    }

    // Close file writers
    for (int j = 0; j < inputWriters.length; j++) {
        inputWriters[j].close();
    }
}

From source file:bme.iclef.hadoop.file2seq.TarToSeqFile.java

License:Apache License

private SequenceFile.Writer openOutputFile() throws Exception {
    Path outputPath = new Path(outputFile.getAbsolutePath());
    return SequenceFile.createWriter(setup.getLocalFileSystem(), setup.getConf(), outputPath, Text.class,
            BytesWritable.class, SequenceFile.CompressionType.BLOCK);
}

From source file:co.cask.tephra.persist.CommitMarkerCodecTest.java

License:Apache License

@Test
public void testRandomCommitMarkers() throws Exception {
    List<Integer> randomInts = new ArrayList<>();
    Path newLog = new Path(TMP_FOLDER.newFolder().getAbsolutePath(), LOG_FILE);

    // Write a bunch of random commit markers
    try (SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, newLog, LongWritable.class,
            LongWritable.class, SequenceFile.CompressionType.NONE)) {
        for (int i = 0; i < 1000; i++) {
            int randomNum = RANDOM.nextInt(Integer.MAX_VALUE);
            CommitMarkerCodec.writeMarker(writer, randomNum);
            randomInts.add(randomNum);/*from   w w  w .  j av  a  2  s  .  c o m*/
        }
        writer.hflush();
        writer.hsync();
    }

    // Read the commit markers back to verify the marker
    try (SequenceFile.Reader reader = new SequenceFile.Reader(fs, newLog, conf);
            CommitMarkerCodec markerCodec = new CommitMarkerCodec()) {
        for (int num : randomInts) {
            Assert.assertEquals(num, markerCodec.readMarker(reader));
        }
    }
}

From source file:co.cask.tephra.persist.CommitMarkerCodecTest.java

License:Apache License

@Test
public void testIncompleteCommitMarker() throws Exception {
    Path newLog = new Path(TMP_FOLDER.newFolder().getAbsolutePath(), LOG_FILE);
    try (SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, newLog, LongWritable.class,
            LongWritable.class, SequenceFile.CompressionType.NONE)) {
        String key = TxConstants.TransactionLog.NUM_ENTRIES_APPENDED;
        SequenceFile.ValueBytes valueBytes = new IncompleteValueBytes();
        writer.appendRaw(key.getBytes(), 0, key.length(), valueBytes);
        writer.hflush();/*  w  w  w .j av a 2s . c  o m*/
        writer.hsync();
    }

    // Read the incomplete commit marker
    try (SequenceFile.Reader reader = new SequenceFile.Reader(fs, newLog, conf);
            CommitMarkerCodec markerCodec = new CommitMarkerCodec()) {
        try {
            markerCodec.readMarker(reader);
            Assert.fail("Expected EOF Exception to be thrown");
        } catch (EOFException e) {
            // expected since we didn't write the value bytes
        }
    }
}

From source file:co.cask.tephra.persist.CommitMarkerCodecTest.java

License:Apache License

@Test
public void testIncorrectCommitMarker() throws Exception {
    Path newLog = new Path(TMP_FOLDER.newFolder().getAbsolutePath(), LOG_FILE);

    // Write an incorrect marker
    try (SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, newLog, LongWritable.class,
            LongWritable.class, SequenceFile.CompressionType.NONE)) {
        String invalidKey = "IncorrectKey";
        SequenceFile.ValueBytes valueBytes = new CommitMarkerCodec.CommitEntriesCount(100);
        writer.appendRaw(invalidKey.getBytes(), 0, invalidKey.length(), valueBytes);
        writer.hflush();/*from   ww  w. ja v  a  2 s. com*/
        writer.hsync();
    }

    // Read the commit markers back to verify the marker
    try (SequenceFile.Reader reader = new SequenceFile.Reader(fs, newLog, conf);
            CommitMarkerCodec markerCodec = new CommitMarkerCodec()) {
        try {
            markerCodec.readMarker(reader);
            Assert.fail("Expected an IOException to be thrown");
        } catch (IOException e) {
            // expected
        }
    }
}

From source file:com.asakusafw.runtime.io.sequencefile.SequenceFileUtil.java

License:Apache License

/**
 * Creates a new writer.//from  w  ww.  ja v  a  2 s .  c o  m
 * @param out the drain
 * @param conf current configuration
 * @param keyClass the key type
 * @param valueClass the value type
 * @param codec the compression codec to block compression, or {@code null} to uncompressed
 * @return the created sequence file writer
 * @throws IOException if failed to create a sequence file
 * @throws IllegalArgumentException if some parameters were {@code null}
 */
public static SequenceFile.Writer openWriter(OutputStream out, Configuration conf, Class<?> keyClass,
        Class<?> valueClass, CompressionCodec codec) throws IOException {
    if (out == null) {
        throw new IllegalArgumentException("out must not be null"); //$NON-NLS-1$
    }
    if (conf == null) {
        throw new IllegalArgumentException("conf must not be null"); //$NON-NLS-1$
    }
    if (keyClass == null) {
        throw new IllegalArgumentException("keyClass must not be null"); //$NON-NLS-1$
    }
    if (valueClass == null) {
        throw new IllegalArgumentException("valueClass must not be null"); //$NON-NLS-1$
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug(MessageFormat.format("Creating sequence file writer for output (key={0}, value={0})", //$NON-NLS-1$
                keyClass.getName(), valueClass.getName()));
    }
    FSDataOutputStream output = new FSDataOutputStream(out, null);
    if (codec != null) {
        return SequenceFile.createWriter(conf, output, keyClass, valueClass, CompressionType.BLOCK, codec);
    } else {
        return SequenceFile.createWriter(conf, output, keyClass, valueClass, CompressionType.NONE, null);
    }
}