List of usage examples for org.apache.hadoop.io SequenceFile createWriter
@Deprecated public static Writer createWriter(Configuration conf, FSDataOutputStream out, Class keyClass, Class valClass, CompressionType compressionType, CompressionCodec codec, Metadata metadata) throws IOException
From source file:org.apache.sqoop.connector.hdfs.TestHdfsBase.java
License:Apache License
protected void createSequenceInput(String indir, Class<? extends CompressionCodec> clz, int numberOfFiles, int numberOfRows) throws IOException, InstantiationException, IllegalAccessException { Configuration conf = new Configuration(); CompressionCodec codec = null;/*from w w w . j a v a 2 s .c o m*/ if (clz != null) { codec = clz.newInstance(); if (codec instanceof Configurable) { ((Configurable) codec).setConf(conf); } } int index = 1; for (int fi = 0; fi < numberOfFiles; fi++) { Path filepath = new Path(indir, UUID.randomUUID() + ".seq"); SequenceFile.Writer filewriter; if (codec != null) { filewriter = SequenceFile.createWriter(filepath.getFileSystem(conf), conf, filepath, Text.class, NullWritable.class, SequenceFile.CompressionType.BLOCK, codec); } else { filewriter = SequenceFile.createWriter(filepath.getFileSystem(conf), conf, filepath, Text.class, NullWritable.class, SequenceFile.CompressionType.NONE); } Text text = new Text(); for (int ri = 0; ri < numberOfRows; ri++) { String row = index + "," + (double) index + ",'" + index + "'"; text.set(row); filewriter.append(text, NullWritable.get()); index++; } filewriter.close(); } }
From source file:org.apache.tajo.storage.sequencefile.SequenceFileAppender.java
License:Apache License
@Override public void init() throws IOException { os = new NonSyncByteArrayOutputStream(BUFFER_SIZE); this.fs = path.getFileSystem(conf); //determine the intermediate file type String store = conf.get(TajoConf.ConfVars.SHUFFLE_FILE_FORMAT.varname, TajoConf.ConfVars.SHUFFLE_FILE_FORMAT.defaultVal); if (enabledStats && CatalogProtos.StoreType.SEQUENCEFILE == CatalogProtos.StoreType.valueOf(store.toUpperCase())) { isShuffle = true;//from w w w . j av a2 s. c o m } else { isShuffle = false; } this.delimiter = StringEscapeUtils.unescapeJava(this.meta.getOption(StorageConstants.SEQUENCEFILE_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER)).charAt(0); this.columnNum = schema.size(); String nullCharacters = StringEscapeUtils .unescapeJava(this.meta.getOption(StorageConstants.SEQUENCEFILE_NULL, NullDatum.DEFAULT_TEXT)); if (StringUtils.isEmpty(nullCharacters)) { nullChars = NullDatum.get().asTextBytes(); } else { nullChars = nullCharacters.getBytes(); } if (!fs.exists(path.getParent())) { throw new FileNotFoundException(path.toString()); } if (this.meta.containsOption(StorageConstants.COMPRESSION_CODEC)) { String codecName = this.meta.getOption(StorageConstants.COMPRESSION_CODEC); codecFactory = new CompressionCodecFactory(conf); codec = codecFactory.getCodecByClassName(codecName); } else { if (fs.exists(path)) { throw new AlreadyExistsStorageException(path); } } try { String serdeClass = this.meta.getOption(StorageConstants.SEQUENCEFILE_SERDE, TextSerializerDeserializer.class.getName()); serde = (SerializerDeserializer) Class.forName(serdeClass).newInstance(); } catch (Exception e) { LOG.error(e.getMessage(), e); throw new IOException(e); } Class<? extends Writable> keyClass, valueClass; if (serde instanceof BinarySerializerDeserializer) { keyClass = BytesWritable.class; EMPTY_KEY = new BytesWritable(); valueClass = BytesWritable.class; } else { keyClass = LongWritable.class; EMPTY_KEY = new LongWritable(); valueClass = Text.class; } String type = this.meta.getOption(StorageConstants.COMPRESSION_TYPE, CompressionType.NONE.name()); if (type.equals(CompressionType.BLOCK.name())) { writer = SequenceFile.createWriter(fs, conf, path, keyClass, valueClass, CompressionType.BLOCK, codec); } else if (type.equals(CompressionType.RECORD.name())) { writer = SequenceFile.createWriter(fs, conf, path, keyClass, valueClass, CompressionType.RECORD, codec); } else { writer = SequenceFile.createWriter(fs, conf, path, keyClass, valueClass, CompressionType.NONE, codec); } if (enabledStats) { this.stats = new TableStatistics(this.schema); } super.init(); }
From source file:org.commoncrawl.util.CrawlLogSplitter.java
License:Open Source License
public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); FileStatus arcFiles[] = fs.globStatus(new Path("crawl/checkpoint_data/CrawlLog_*")); for (FileStatus candidate : arcFiles) { if (candidate.getLen() > SPLIT_SIZE) { candidateList.add(candidate.getPath()); }//from w w w . j a v a 2s.c om } LOG.info("Found:" + candidateList.size() + " oversized candidates"); Path tempOutputDir = new Path(conf.get("mapred.temp.dir", ".")); while (candidateList.size() != 0) { Path candidateName = candidateList.first(); candidateList.remove(candidateName); LOG.info("Processing Candidate:" + candidateName); long fileSize = fs.getFileStatus(candidateName).getLen(); //get crawl log filename components ArrayList<Path> splitItems = new ArrayList<Path>(); int index = 0; Path outputPart = buildIncrementalPathGivenPathAndIndex(tempOutputDir, candidateName.getName(), index); LOG.info("Initial Output Path is:" + outputPart); fs.delete(outputPart, false); // create reader SequenceFile.Reader reader = new SequenceFile.Reader(fs, candidateName, conf); ValueBytes sourceVB = reader.createValueBytes(); DataOutputBuffer sourceKeyData = new DataOutputBuffer(); try { // ok create temp file SequenceFile.Writer activeWriter = SequenceFile.createWriter(fs, conf, outputPart, Text.class, CrawlURL.class, CompressionType.BLOCK, new SnappyCodec()); // add to split items array splitItems.add(outputPart); try { long recordsWritten = 0; while (reader.nextRawKey(sourceKeyData) != -1) { reader.nextRawValue(sourceVB); long lengthPreWrite = activeWriter.getLength(); activeWriter.appendRaw(sourceKeyData.getData(), 0, sourceKeyData.getLength(), sourceVB); if (++recordsWritten % 10000 == 0) { LOG.info("Write 10000 records"); } long lengthPostWrite = activeWriter.getLength(); if (lengthPostWrite != lengthPreWrite) { if (lengthPostWrite >= IDEAL_SIZE) { LOG.info("Hit Split Point. Flushing File:" + outputPart); activeWriter.close(); outputPart = buildIncrementalPathGivenPathAndIndex(tempOutputDir, candidateName.getName(), ++index); LOG.info("Creating New File:" + outputPart); activeWriter = SequenceFile.createWriter(fs, conf, outputPart, Text.class, CrawlURL.class, CompressionType.BLOCK, new SnappyCodec()); splitItems.add(outputPart); } } sourceKeyData.reset(); } } finally { activeWriter.close(); } } finally { reader.close(); } LOG.info("Rewrote Source:" + candidateName + " into:" + splitItems.size() + " split files"); for (Path splitItem : splitItems) { Path destPath = new Path("crawl/checkpoint_data", splitItem.getName()); LOG.info("Moving:" + splitItem + " to:" + destPath); fs.rename(splitItem, destPath); } Path sourceMoveLocation = new Path("crawl/checkpoint_data_split", candidateName.getName()); LOG.info("Moving SOURCE:" + candidateName + " to:" + sourceMoveLocation); fs.rename(candidateName, sourceMoveLocation); } }
From source file:org.springframework.data.hadoop.store.output.AbstractSequenceFileWriter.java
License:Apache License
/** * Gets the output./*from ww w . j ava2 s .c o m*/ * * @return the output * @throws IOException Signals that an I/O exception has occurred. */ @SuppressWarnings("deprecation") protected SequenceFileWriterHolder<Writer> getOutput() throws IOException { FileSystem fs = FileSystem.get(getConfiguration()); SequenceFileWriterHolder<Writer> holder; Writer writer; CodecInfo codecInfo = getCodec(); Path p = getResolvedPath(); if (codecInfo == null) { writer = SequenceFile.createWriter(fs, getConfiguration(), getResolvedPath(), Text.class, Text.class, CompressionType.NONE, (CompressionCodec) null); holder = new SequenceFileWriterHolder<SequenceFile.Writer>(writer, p); } else { Class<?> clazz = ClassUtils.resolveClassName(codecInfo.getCodecClass(), getClass().getClassLoader()); CompressionCodec compressionCodec = (CompressionCodec) ReflectionUtils.newInstance(clazz, getConfiguration()); writer = SequenceFile.createWriter(fs, getConfiguration(), getResolvedPath(), Text.class, Text.class, CompressionType.RECORD, compressionCodec); holder = new SequenceFileWriterHolder<SequenceFile.Writer>(writer, p); } return holder; }