Example usage for org.apache.hadoop.io SequenceFile createWriter

List of usage examples for org.apache.hadoop.io SequenceFile createWriter

Introduction

In this page you can find the example usage for org.apache.hadoop.io SequenceFile createWriter.

Prototype

@Deprecated
public static Writer createWriter(Configuration conf, FSDataOutputStream out, Class keyClass, Class valClass,
        CompressionType compressionType, CompressionCodec codec, Metadata metadata) throws IOException 

Source Link

Document

Construct the preferred type of 'raw' SequenceFile Writer.

Usage

From source file:com.pinterest.secor.common.FileRegistry.java

License:Apache License

/**
 * Retrieve a writer for a given path or create a new one if it does not exist.
 * @param path The path to retrieve writer for.
 * @param codec Optional compression codec.
 * @return Writer for a given path.// w  ww  .  ja  va2s. c o m
 * @throws IOException
 */
public SequenceFile.Writer getOrCreateWriter(LogFilePath path, CompressionCodec codec) throws IOException {
    SequenceFile.Writer writer = mWriters.get(path);
    if (writer == null) {
        // Just in case.
        FileUtil.delete(path.getLogFilePath());
        FileUtil.delete(path.getLogFileCrcPath());
        TopicPartition topicPartition = new TopicPartition(path.getTopic(), path.getKafkaPartition());
        HashSet<LogFilePath> files = mFiles.get(topicPartition);
        if (files == null) {
            files = new HashSet<LogFilePath>();
            mFiles.put(topicPartition, files);
        }
        if (!files.contains(path)) {
            files.add(path);
        }
        Configuration config = new Configuration();
        FileSystem fs = FileSystem.get(config);
        if (codec != null) {
            Path fsPath = new Path(path.getLogFilePath());
            writer = SequenceFile.createWriter(fs, config, fsPath, LongWritable.class, BytesWritable.class,
                    SequenceFile.CompressionType.BLOCK, codec);
        } else {
            Path fsPath = new Path(path.getLogFilePath());
            writer = SequenceFile.createWriter(fs, config, fsPath, LongWritable.class, BytesWritable.class);
        }
        mWriters.put(path, writer);
        mCreationTimes.put(path, System.currentTimeMillis() / 1000L);
        LOG.debug("created writer for path " + path.getLogFilePath());
    }
    return writer;
}

From source file:com.pinterest.secor.common.FileRegistryTest.java

License:Apache License

private void createCompressedWriter() throws IOException {
    PowerMockito.mockStatic(FileUtil.class);

    PowerMockito.mockStatic(FileSystem.class);
    FileSystem fs = Mockito.mock(FileSystem.class);
    Mockito.when(FileSystem.get(Mockito.any(Configuration.class))).thenReturn(fs);

    PowerMockito.mockStatic(SequenceFile.class);
    Path fsPath = new Path(PATH_GZ);
    SequenceFile.Writer writer = Mockito.mock(SequenceFile.Writer.class);
    Mockito.when(SequenceFile.createWriter(Mockito.eq(fs), Mockito.any(Configuration.class), Mockito.eq(fsPath),
            Mockito.eq(LongWritable.class), Mockito.eq(BytesWritable.class),
            Mockito.eq(SequenceFile.CompressionType.BLOCK), Mockito.any(GzipCodec.class))).thenReturn(writer);

    Mockito.when(writer.getLength()).thenReturn(123L);

    SequenceFile.Writer createdWriter = mRegistry.getOrCreateWriter(mLogFilePathGz, new GzipCodec());
    assertTrue(createdWriter == writer);
}

From source file:com.pinterest.secor.common.FileRegistryTest.java

License:Apache License

public void testGetOrCreateWriterCompressed() throws Exception {
    createCompressedWriter();//from ww w.ja  va 2 s. c om

    mRegistry.getOrCreateWriter(mLogFilePathGz, new GzipCodec());

    // Verify that the method has been called exactly once (the default).
    PowerMockito.verifyStatic();
    FileSystem.get(Mockito.any(Configuration.class));

    PowerMockito.verifyStatic();
    FileUtil.delete(PATH_GZ);
    PowerMockito.verifyStatic();
    FileUtil.delete(CRC_PATH);

    Path fsPath = new Path(PATH_GZ);
    PowerMockito.verifyStatic();
    SequenceFile.createWriter(Mockito.any(FileSystem.class), Mockito.any(Configuration.class),
            Mockito.eq(fsPath), Mockito.eq(LongWritable.class), Mockito.eq(BytesWritable.class),
            Mockito.eq(SequenceFile.CompressionType.BLOCK), Mockito.any(GzipCodec.class));

    TopicPartition topicPartition = new TopicPartition("some_topic", 0);
    Collection<TopicPartition> topicPartitions = mRegistry.getTopicPartitions();
    assertEquals(1, topicPartitions.size());
    assertTrue(topicPartitions.contains(topicPartition));

    Collection<LogFilePath> logFilePaths = mRegistry.getPaths(topicPartition);
    assertEquals(1, logFilePaths.size());
    assertTrue(logFilePaths.contains(mLogFilePath));
}

From source file:com.pinterest.secor.io.impl.SequenceFileReaderWriter.java

License:Apache License

public SequenceFileReaderWriter(LogFilePath path, CompressionCodec codec, FileReaderWriter.Type type)
        throws Exception {
    Configuration config = new Configuration();
    Path fsPath = new Path(path.getLogFilePath());
    FileSystem fs = FileUtil.getFileSystem(path.getLogFilePath());

    if (type == FileReaderWriter.Type.Reader) {
        this.mReader = new SequenceFile.Reader(fs, fsPath, config);
        this.mKey = (LongWritable) mReader.getKeyClass().newInstance();
        this.mValue = (BytesWritable) mReader.getValueClass().newInstance();
        this.mWriter = null;
    } else if (type == FileReaderWriter.Type.Writer) {
        if (codec != null) {
            this.mWriter = SequenceFile.createWriter(fs, config, fsPath, LongWritable.class,
                    BytesWritable.class, SequenceFile.CompressionType.BLOCK, codec);
        } else {/* ww  w .  j  a v  a 2 s  .co  m*/
            this.mWriter = SequenceFile.createWriter(fs, config, fsPath, LongWritable.class,
                    BytesWritable.class);
        }
        this.mReader = null;
        this.mKey = null;
        this.mValue = null;
    } else {
        throw new IllegalArgumentException("Undefined File Type: " + type);
    }

}

From source file:com.pinterest.secor.storage.seqfile.HadoopSequenceFileStorageFactory.java

License:Apache License

@Override
public Writer createWriter(LogFilePath path) throws IOException {

    Configuration config = new Configuration();
    FileSystem fs = FileSystem.get(config);

    Path fsPath = new Path(path.getLogFilePath());

    SequenceFile.Writer writer = null;
    if (mCodec != null) {
        writer = SequenceFile.createWriter(fs, config, fsPath, LongWritable.class, BytesWritable.class,
                SequenceFile.CompressionType.BLOCK, mCodec);
    } else {/*from  w w w .ja  v a  2 s.  c o  m*/
        writer = SequenceFile.createWriter(fs, config, fsPath, LongWritable.class, BytesWritable.class);
    }

    return new HadoopSequenceFileWriter(writer);
}

From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.RecordWriterManager.java

License:Apache License

RecordWriter createWriter(FileSystem fs, Path path, long timeToLiveMillis) throws StageException, IOException {
    switch (fileType) {
    case TEXT:/* w  w  w .  j  a v a2 s  .  co m*/
        OutputStream os = fs.create(path, false);
        if (compressionCodec != null) {
            try {
                os = compressionCodec.createOutputStream(os);
            } catch (UnsatisfiedLinkError unsatisfiedLinkError) {
                throw new StageException(Errors.HADOOPFS_46, compressionType.name(), unsatisfiedLinkError,
                        unsatisfiedLinkError);
            }
        }
        return new RecordWriter(path, timeToLiveMillis, os, generatorFactory);
    case SEQUENCE_FILE:
        Utils.checkNotNull(compressionType, "compressionType");
        Utils.checkNotNull(keyEL, "keyEL");
        Utils.checkArgument(compressionCodec == null || compressionType != SequenceFile.CompressionType.NONE,
                "if using a compressionCodec, compressionType cannot be NULL");
        try {
            SequenceFile.Writer writer = SequenceFile.createWriter(fs, hdfsConf, path, Text.class, Text.class,
                    compressionType, compressionCodec);
            return new RecordWriter(path, timeToLiveMillis, writer, keyEL, generatorFactory, context);
        } catch (UnsatisfiedLinkError unsatisfiedLinkError) {
            throw new StageException(Errors.HADOOPFS_46, compressionType.name(), unsatisfiedLinkError,
                    unsatisfiedLinkError);
        }
    default:
        throw new UnsupportedOperationException(Utils.format("Unsupported file Type '{}'", fileType));
    }
}

From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestRecordWriter.java

License:Apache License

private void testSequenceFile(boolean useUUIDAsKey) throws Exception {
    String keyEL = (useUUIDAsKey) ? "${uuid()}" : "${record:value('/')}";
    FileSystem fs = getRawLocalFileSystem();
    try {//from  w w  w .  jav a  2 s  .  c  o m
        Path file = new Path(getTestDir(), "file.txt");

        SequenceFile.Writer seqFile = SequenceFile.createWriter(fs, new HdfsConfiguration(), file, Text.class,
                Text.class, SequenceFile.CompressionType.NONE, (CompressionCodec) null);
        long timeToLive = 10000;
        long expires = System.currentTimeMillis() + timeToLive;
        RecordWriter writer = new RecordWriter(file, timeToLive, seqFile, keyEL,
                new DummyDataGeneratorFactory(null), ContextInfoCreator.createTargetContext(HdfsDTarget.class,
                        "testWritersLifecycle", false, OnRecordError.TO_ERROR, null));
        Assert.assertFalse(writer.isTextFile());
        Assert.assertTrue(writer.isSeqFile());
        Assert.assertEquals(file, writer.getPath());
        Assert.assertTrue(expires <= writer.getExpiresOn());
        Assert.assertTrue(writer.toString().contains(file.toString()));
        Record record = RecordCreator.create();
        record.set(Field.create("a"));
        writer.write(record);
        record.set(Field.create("z"));
        writer.write(record);
        Assert.assertFalse(writer.isClosed());
        writer.flush();
        Assert.assertTrue(writer.getLength() > 4);
        Assert.assertEquals(2, writer.getRecords());
        writer.close();
        Assert.assertTrue(writer.isClosed());
        try {
            writer.write(record);
            Assert.fail();
        } catch (IOException ex) {
            //NOP
        }
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, new HdfsConfiguration());
        Text key = new Text();
        Text value = new Text();
        Assert.assertTrue(reader.next(key, value));
        if (useUUIDAsKey) {
            Assert.assertNotNull(UUID.fromString(key.toString()));
        } else {
            Assert.assertEquals("a", key.toString());
        }
        Assert.assertEquals("a", value.toString().trim());
        Assert.assertTrue(reader.next(key, value));
        if (useUUIDAsKey) {
            Assert.assertNotNull(UUID.fromString(key.toString()));

        } else {
            Assert.assertEquals("z", key.toString());
        }
        Assert.assertEquals("z", value.toString().trim());
        Assert.assertFalse(reader.next(key, value));
        reader.close();
    } finally {
        fs.close();
    }
}

From source file:crush.CrushUtil.java

License:Apache License

protected void sequenceCrush(FileSystem fs, FileStatus[] status) throws IOException, CrushException {
    l4j.info("Sequence file crushing activated");
    Class keyClass = null;//from   ww  w . j  av a2  s . c  o  m
    Class valueClass = null;
    SequenceFile.Writer writer = null;
    for (FileStatus stat : status) {
        if (reporter != null) {
            reporter.setStatus("Crushing on " + stat.getPath());
            l4j.info("Current file " + stat.getPath());
            l4j.info("length " + stat.getLen());
            reporter.incrCounter(CrushMapper.CrushCounters.FILES_CRUSHED, 1);
        }
        Path p1 = stat.getPath();
        SequenceFile.Reader read = new SequenceFile.Reader(fs, p1, jobConf);
        if (keyClass == null) {
            keyClass = read.getKeyClass();
            valueClass = read.getValueClass();
            writer = SequenceFile.createWriter(fs, jobConf, outPath, keyClass, valueClass, this.compressionType,
                    this.codec);
        } else {
            if (!(keyClass.equals(read.getKeyClass()) && valueClass.equals(read.getValueClass()))) {
                read.close();
                writer.close();
                throw new CrushException(
                        "File  " + stat.getPath() + " keyClass " + read.getKeyClass() + " valueClass "
                                + read.getValueClassName() + " does not match" + " other files in folder");
            }
        }

        Writable k = (Writable) ReflectionUtils.newInstance(keyClass, jobConf);
        Writable v = (Writable) ReflectionUtils.newInstance(valueClass, jobConf);

        int rowCount = 0;
        while (read.next(k, v)) {

            writer.append(k, v);
            rowCount++;
            if (rowCount % 100000 == 0) {
                if (reporter != null) {
                    reporter.setStatus(stat + " at row " + rowCount);
                    l4j.debug(stat + " at row " + rowCount);
                }
            }
        }
        read.close();
        if (reporter != null) {
            reporter.incrCounter(CrushMapper.CrushCounters.ROWS_WRITTEN, rowCount);
        }
    } // end for
    writer.close();

    l4j.info("crushed file written to " + outPath);
}

From source file:crush.CrushUtilTest.java

License:Apache License

/**
 * Test of crush method, of class CrushUtil.
 *///ww w.  j  a  va  2  s  . co m
@Test
public void testCrush() throws Exception {

    Path aFile = new Path(this.tmpRootPath, "filea");
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, aFile, Text.class, Text.class,
            SequenceFile.CompressionType.BLOCK, new org.apache.hadoop.io.compress.DefaultCodec());
    writer.append(new Text("1"), new Text("1"));
    writer.append(new Text("2"), new Text("2"));
    writer.close();

    Path bFile = new Path(this.tmpRootPath, "fileb");
    SequenceFile.Writer writerb = SequenceFile.createWriter(fs, jobConf, bFile, Text.class, Text.class,
            SequenceFile.CompressionType.BLOCK, new org.apache.hadoop.io.compress.DefaultCodec());
    writerb.append(new Text("3"), new Text("4"));
    writerb.append(new Text("5"), new Text("6"));
    writerb.close();

    CrushUtil instance = new CrushUtil();
    instance.setSourcePath(tmpRootPath);
    instance.setOutPath(new Path(tmpRootPath, "crushed_file"));
    instance.setType(CrushUtil.FileType.SEQUENCEFILE);
    instance.crush();

    SequenceFile.Reader read1 = new SequenceFile.Reader(fs, new Path(tmpRootPath, "crushed_file"), jobConf);
    assert (fs.exists(new Path(tmpRootPath, "crushed_file")));
    Text akey = new Text();
    Text avalue = new Text();
    HashMap<String, String> results1 = new HashMap<String, String>();
    int rowCount = 0;
    while (read1.next(akey, avalue)) {
        results1.put(akey.toString(), avalue.toString());
        rowCount++;
    }

    assertEquals(4, rowCount);
    assert (results1.containsKey("1"));
    assertEquals(results1.get("1"), "1");
    assertEquals(results1.get("2"), "2");
    assertEquals(results1.get("5"), "6");

}

From source file:gobblin.metastore.FsStateStore.java

License:Apache License

/**
 * See {@link StateStore#put(String, String, T)}.
 *
 * <p>//w w  w  . j  a  va 2  s  . c  o  m
 *   This implementation does not support putting the state object into an existing store as
 *   append is to be supported by the Hadoop SequenceFile (HADOOP-7139).
 * </p>
 */
@Override
public void put(String storeName, String tableName, T state) throws IOException {
    String tmpTableName = this.useTmpFileForPut ? TMP_FILE_PREFIX + tableName : tableName;
    Path tmpTablePath = new Path(new Path(this.storeRootDir, storeName), tmpTableName);

    if (!this.fs.exists(tmpTablePath) && !create(storeName, tmpTableName)) {
        throw new IOException("Failed to create a state file for table " + tmpTableName);
    }

    Closer closer = Closer.create();
    try {
        @SuppressWarnings("deprecation")
        SequenceFile.Writer writer = closer.register(SequenceFile.createWriter(this.fs, this.conf, tmpTablePath,
                Text.class, this.stateClass, SequenceFile.CompressionType.BLOCK, new DefaultCodec()));
        writer.append(new Text(Strings.nullToEmpty(state.getId())), state);
    } catch (Throwable t) {
        throw closer.rethrow(t);
    } finally {
        closer.close();
    }

    if (this.useTmpFileForPut) {
        Path tablePath = new Path(new Path(this.storeRootDir, storeName), tableName);
        HadoopUtils.renamePath(this.fs, tmpTablePath, tablePath);
    }
}