List of usage examples for org.apache.hadoop.io SequenceFile createWriter
@Deprecated public static Writer createWriter(Configuration conf, FSDataOutputStream out, Class keyClass, Class valClass, CompressionType compressionType, CompressionCodec codec, Metadata metadata) throws IOException
From source file:com.pinterest.secor.common.FileRegistry.java
License:Apache License
/** * Retrieve a writer for a given path or create a new one if it does not exist. * @param path The path to retrieve writer for. * @param codec Optional compression codec. * @return Writer for a given path.// w ww . ja va2s. c o m * @throws IOException */ public SequenceFile.Writer getOrCreateWriter(LogFilePath path, CompressionCodec codec) throws IOException { SequenceFile.Writer writer = mWriters.get(path); if (writer == null) { // Just in case. FileUtil.delete(path.getLogFilePath()); FileUtil.delete(path.getLogFileCrcPath()); TopicPartition topicPartition = new TopicPartition(path.getTopic(), path.getKafkaPartition()); HashSet<LogFilePath> files = mFiles.get(topicPartition); if (files == null) { files = new HashSet<LogFilePath>(); mFiles.put(topicPartition, files); } if (!files.contains(path)) { files.add(path); } Configuration config = new Configuration(); FileSystem fs = FileSystem.get(config); if (codec != null) { Path fsPath = new Path(path.getLogFilePath()); writer = SequenceFile.createWriter(fs, config, fsPath, LongWritable.class, BytesWritable.class, SequenceFile.CompressionType.BLOCK, codec); } else { Path fsPath = new Path(path.getLogFilePath()); writer = SequenceFile.createWriter(fs, config, fsPath, LongWritable.class, BytesWritable.class); } mWriters.put(path, writer); mCreationTimes.put(path, System.currentTimeMillis() / 1000L); LOG.debug("created writer for path " + path.getLogFilePath()); } return writer; }
From source file:com.pinterest.secor.common.FileRegistryTest.java
License:Apache License
private void createCompressedWriter() throws IOException { PowerMockito.mockStatic(FileUtil.class); PowerMockito.mockStatic(FileSystem.class); FileSystem fs = Mockito.mock(FileSystem.class); Mockito.when(FileSystem.get(Mockito.any(Configuration.class))).thenReturn(fs); PowerMockito.mockStatic(SequenceFile.class); Path fsPath = new Path(PATH_GZ); SequenceFile.Writer writer = Mockito.mock(SequenceFile.Writer.class); Mockito.when(SequenceFile.createWriter(Mockito.eq(fs), Mockito.any(Configuration.class), Mockito.eq(fsPath), Mockito.eq(LongWritable.class), Mockito.eq(BytesWritable.class), Mockito.eq(SequenceFile.CompressionType.BLOCK), Mockito.any(GzipCodec.class))).thenReturn(writer); Mockito.when(writer.getLength()).thenReturn(123L); SequenceFile.Writer createdWriter = mRegistry.getOrCreateWriter(mLogFilePathGz, new GzipCodec()); assertTrue(createdWriter == writer); }
From source file:com.pinterest.secor.common.FileRegistryTest.java
License:Apache License
public void testGetOrCreateWriterCompressed() throws Exception { createCompressedWriter();//from ww w.ja va 2 s. c om mRegistry.getOrCreateWriter(mLogFilePathGz, new GzipCodec()); // Verify that the method has been called exactly once (the default). PowerMockito.verifyStatic(); FileSystem.get(Mockito.any(Configuration.class)); PowerMockito.verifyStatic(); FileUtil.delete(PATH_GZ); PowerMockito.verifyStatic(); FileUtil.delete(CRC_PATH); Path fsPath = new Path(PATH_GZ); PowerMockito.verifyStatic(); SequenceFile.createWriter(Mockito.any(FileSystem.class), Mockito.any(Configuration.class), Mockito.eq(fsPath), Mockito.eq(LongWritable.class), Mockito.eq(BytesWritable.class), Mockito.eq(SequenceFile.CompressionType.BLOCK), Mockito.any(GzipCodec.class)); TopicPartition topicPartition = new TopicPartition("some_topic", 0); Collection<TopicPartition> topicPartitions = mRegistry.getTopicPartitions(); assertEquals(1, topicPartitions.size()); assertTrue(topicPartitions.contains(topicPartition)); Collection<LogFilePath> logFilePaths = mRegistry.getPaths(topicPartition); assertEquals(1, logFilePaths.size()); assertTrue(logFilePaths.contains(mLogFilePath)); }
From source file:com.pinterest.secor.io.impl.SequenceFileReaderWriter.java
License:Apache License
public SequenceFileReaderWriter(LogFilePath path, CompressionCodec codec, FileReaderWriter.Type type) throws Exception { Configuration config = new Configuration(); Path fsPath = new Path(path.getLogFilePath()); FileSystem fs = FileUtil.getFileSystem(path.getLogFilePath()); if (type == FileReaderWriter.Type.Reader) { this.mReader = new SequenceFile.Reader(fs, fsPath, config); this.mKey = (LongWritable) mReader.getKeyClass().newInstance(); this.mValue = (BytesWritable) mReader.getValueClass().newInstance(); this.mWriter = null; } else if (type == FileReaderWriter.Type.Writer) { if (codec != null) { this.mWriter = SequenceFile.createWriter(fs, config, fsPath, LongWritable.class, BytesWritable.class, SequenceFile.CompressionType.BLOCK, codec); } else {/* ww w . j a v a 2 s .co m*/ this.mWriter = SequenceFile.createWriter(fs, config, fsPath, LongWritable.class, BytesWritable.class); } this.mReader = null; this.mKey = null; this.mValue = null; } else { throw new IllegalArgumentException("Undefined File Type: " + type); } }
From source file:com.pinterest.secor.storage.seqfile.HadoopSequenceFileStorageFactory.java
License:Apache License
@Override public Writer createWriter(LogFilePath path) throws IOException { Configuration config = new Configuration(); FileSystem fs = FileSystem.get(config); Path fsPath = new Path(path.getLogFilePath()); SequenceFile.Writer writer = null; if (mCodec != null) { writer = SequenceFile.createWriter(fs, config, fsPath, LongWritable.class, BytesWritable.class, SequenceFile.CompressionType.BLOCK, mCodec); } else {/*from w w w .ja v a 2 s. c o m*/ writer = SequenceFile.createWriter(fs, config, fsPath, LongWritable.class, BytesWritable.class); } return new HadoopSequenceFileWriter(writer); }
From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.RecordWriterManager.java
License:Apache License
RecordWriter createWriter(FileSystem fs, Path path, long timeToLiveMillis) throws StageException, IOException { switch (fileType) { case TEXT:/* w w w . j a v a2 s . co m*/ OutputStream os = fs.create(path, false); if (compressionCodec != null) { try { os = compressionCodec.createOutputStream(os); } catch (UnsatisfiedLinkError unsatisfiedLinkError) { throw new StageException(Errors.HADOOPFS_46, compressionType.name(), unsatisfiedLinkError, unsatisfiedLinkError); } } return new RecordWriter(path, timeToLiveMillis, os, generatorFactory); case SEQUENCE_FILE: Utils.checkNotNull(compressionType, "compressionType"); Utils.checkNotNull(keyEL, "keyEL"); Utils.checkArgument(compressionCodec == null || compressionType != SequenceFile.CompressionType.NONE, "if using a compressionCodec, compressionType cannot be NULL"); try { SequenceFile.Writer writer = SequenceFile.createWriter(fs, hdfsConf, path, Text.class, Text.class, compressionType, compressionCodec); return new RecordWriter(path, timeToLiveMillis, writer, keyEL, generatorFactory, context); } catch (UnsatisfiedLinkError unsatisfiedLinkError) { throw new StageException(Errors.HADOOPFS_46, compressionType.name(), unsatisfiedLinkError, unsatisfiedLinkError); } default: throw new UnsupportedOperationException(Utils.format("Unsupported file Type '{}'", fileType)); } }
From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestRecordWriter.java
License:Apache License
private void testSequenceFile(boolean useUUIDAsKey) throws Exception { String keyEL = (useUUIDAsKey) ? "${uuid()}" : "${record:value('/')}"; FileSystem fs = getRawLocalFileSystem(); try {//from w w w . jav a 2 s . c o m Path file = new Path(getTestDir(), "file.txt"); SequenceFile.Writer seqFile = SequenceFile.createWriter(fs, new HdfsConfiguration(), file, Text.class, Text.class, SequenceFile.CompressionType.NONE, (CompressionCodec) null); long timeToLive = 10000; long expires = System.currentTimeMillis() + timeToLive; RecordWriter writer = new RecordWriter(file, timeToLive, seqFile, keyEL, new DummyDataGeneratorFactory(null), ContextInfoCreator.createTargetContext(HdfsDTarget.class, "testWritersLifecycle", false, OnRecordError.TO_ERROR, null)); Assert.assertFalse(writer.isTextFile()); Assert.assertTrue(writer.isSeqFile()); Assert.assertEquals(file, writer.getPath()); Assert.assertTrue(expires <= writer.getExpiresOn()); Assert.assertTrue(writer.toString().contains(file.toString())); Record record = RecordCreator.create(); record.set(Field.create("a")); writer.write(record); record.set(Field.create("z")); writer.write(record); Assert.assertFalse(writer.isClosed()); writer.flush(); Assert.assertTrue(writer.getLength() > 4); Assert.assertEquals(2, writer.getRecords()); writer.close(); Assert.assertTrue(writer.isClosed()); try { writer.write(record); Assert.fail(); } catch (IOException ex) { //NOP } SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, new HdfsConfiguration()); Text key = new Text(); Text value = new Text(); Assert.assertTrue(reader.next(key, value)); if (useUUIDAsKey) { Assert.assertNotNull(UUID.fromString(key.toString())); } else { Assert.assertEquals("a", key.toString()); } Assert.assertEquals("a", value.toString().trim()); Assert.assertTrue(reader.next(key, value)); if (useUUIDAsKey) { Assert.assertNotNull(UUID.fromString(key.toString())); } else { Assert.assertEquals("z", key.toString()); } Assert.assertEquals("z", value.toString().trim()); Assert.assertFalse(reader.next(key, value)); reader.close(); } finally { fs.close(); } }
From source file:crush.CrushUtil.java
License:Apache License
protected void sequenceCrush(FileSystem fs, FileStatus[] status) throws IOException, CrushException { l4j.info("Sequence file crushing activated"); Class keyClass = null;//from ww w . j av a2 s . c o m Class valueClass = null; SequenceFile.Writer writer = null; for (FileStatus stat : status) { if (reporter != null) { reporter.setStatus("Crushing on " + stat.getPath()); l4j.info("Current file " + stat.getPath()); l4j.info("length " + stat.getLen()); reporter.incrCounter(CrushMapper.CrushCounters.FILES_CRUSHED, 1); } Path p1 = stat.getPath(); SequenceFile.Reader read = new SequenceFile.Reader(fs, p1, jobConf); if (keyClass == null) { keyClass = read.getKeyClass(); valueClass = read.getValueClass(); writer = SequenceFile.createWriter(fs, jobConf, outPath, keyClass, valueClass, this.compressionType, this.codec); } else { if (!(keyClass.equals(read.getKeyClass()) && valueClass.equals(read.getValueClass()))) { read.close(); writer.close(); throw new CrushException( "File " + stat.getPath() + " keyClass " + read.getKeyClass() + " valueClass " + read.getValueClassName() + " does not match" + " other files in folder"); } } Writable k = (Writable) ReflectionUtils.newInstance(keyClass, jobConf); Writable v = (Writable) ReflectionUtils.newInstance(valueClass, jobConf); int rowCount = 0; while (read.next(k, v)) { writer.append(k, v); rowCount++; if (rowCount % 100000 == 0) { if (reporter != null) { reporter.setStatus(stat + " at row " + rowCount); l4j.debug(stat + " at row " + rowCount); } } } read.close(); if (reporter != null) { reporter.incrCounter(CrushMapper.CrushCounters.ROWS_WRITTEN, rowCount); } } // end for writer.close(); l4j.info("crushed file written to " + outPath); }
From source file:crush.CrushUtilTest.java
License:Apache License
/** * Test of crush method, of class CrushUtil. *///ww w. j a va 2 s . co m @Test public void testCrush() throws Exception { Path aFile = new Path(this.tmpRootPath, "filea"); SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, aFile, Text.class, Text.class, SequenceFile.CompressionType.BLOCK, new org.apache.hadoop.io.compress.DefaultCodec()); writer.append(new Text("1"), new Text("1")); writer.append(new Text("2"), new Text("2")); writer.close(); Path bFile = new Path(this.tmpRootPath, "fileb"); SequenceFile.Writer writerb = SequenceFile.createWriter(fs, jobConf, bFile, Text.class, Text.class, SequenceFile.CompressionType.BLOCK, new org.apache.hadoop.io.compress.DefaultCodec()); writerb.append(new Text("3"), new Text("4")); writerb.append(new Text("5"), new Text("6")); writerb.close(); CrushUtil instance = new CrushUtil(); instance.setSourcePath(tmpRootPath); instance.setOutPath(new Path(tmpRootPath, "crushed_file")); instance.setType(CrushUtil.FileType.SEQUENCEFILE); instance.crush(); SequenceFile.Reader read1 = new SequenceFile.Reader(fs, new Path(tmpRootPath, "crushed_file"), jobConf); assert (fs.exists(new Path(tmpRootPath, "crushed_file"))); Text akey = new Text(); Text avalue = new Text(); HashMap<String, String> results1 = new HashMap<String, String>(); int rowCount = 0; while (read1.next(akey, avalue)) { results1.put(akey.toString(), avalue.toString()); rowCount++; } assertEquals(4, rowCount); assert (results1.containsKey("1")); assertEquals(results1.get("1"), "1"); assertEquals(results1.get("2"), "2"); assertEquals(results1.get("5"), "6"); }
From source file:gobblin.metastore.FsStateStore.java
License:Apache License
/** * See {@link StateStore#put(String, String, T)}. * * <p>//w w w . j a va 2 s . c o m * This implementation does not support putting the state object into an existing store as * append is to be supported by the Hadoop SequenceFile (HADOOP-7139). * </p> */ @Override public void put(String storeName, String tableName, T state) throws IOException { String tmpTableName = this.useTmpFileForPut ? TMP_FILE_PREFIX + tableName : tableName; Path tmpTablePath = new Path(new Path(this.storeRootDir, storeName), tmpTableName); if (!this.fs.exists(tmpTablePath) && !create(storeName, tmpTableName)) { throw new IOException("Failed to create a state file for table " + tmpTableName); } Closer closer = Closer.create(); try { @SuppressWarnings("deprecation") SequenceFile.Writer writer = closer.register(SequenceFile.createWriter(this.fs, this.conf, tmpTablePath, Text.class, this.stateClass, SequenceFile.CompressionType.BLOCK, new DefaultCodec())); writer.append(new Text(Strings.nullToEmpty(state.getId())), state); } catch (Throwable t) { throw closer.rethrow(t); } finally { closer.close(); } if (this.useTmpFileForPut) { Path tablePath = new Path(new Path(this.storeRootDir, storeName), tableName); HadoopUtils.renamePath(this.fs, tmpTablePath, tablePath); } }