List of usage examples for org.apache.hadoop.io IOUtils closeStream
public static void closeStream(java.io.Closeable stream)
From source file:edu.bigdata.training.fileformats.compress.SequenceFileWriter.java
public static void main(String[] args) throws IOException { String uri = "output"; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new Path(uri); IntWritable key = new IntWritable(); Text value = new Text(); File infile = new File("src/main/resources/input.txt"); SequenceFile.Writer writer = null; try {//from www .ja v a2 s. com writer = SequenceFile.createWriter(conf, Writer.file(path), Writer.keyClass(key.getClass()), Writer.valueClass(value.getClass()), Writer.bufferSize(fs.getConf().getInt("io.file.buffer.size", 4096)), Writer.replication(fs.getDefaultReplication()), Writer.blockSize(1073741824), Writer.compression(SequenceFile.CompressionType.BLOCK, new DefaultCodec()), Writer.progressable(null), Writer.metadata(new Metadata())); int ctr = 100; List<String> lines = FileUtils.readLines(infile); for (String line : lines) { key.set(ctr++); value.set(line); if (ctr < 150) { System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value); } writer.append(key, value); } } finally { IOUtils.closeStream(writer); } }
From source file:edu.cuhk.hccl.SequenceFileWriter.java
License:Apache License
private static void createSeqFile(File[] files, String seqName) { Configuration conf = new Configuration(); LongWritable key = new LongWritable(); Text value = new Text(); SequenceFile.Writer writer = null; try {/*from w w w . j a v a2 s.c om*/ FileSystem fs = FileSystem.get(URI.create(seqName), conf); writer = SequenceFile.createWriter(fs, conf, new Path(seqName), key.getClass(), value.getClass()); for (File file : files) { //System.out.printf("Processing file: %s \n", file.getPath()); key.set(Integer.parseInt(file.getName().split("_")[1])); value.set(FileUtils.readFileToString(file)); writer.append(key, value); } System.out.printf("[INFO] The sequence file %s has been created for %d files! \n", seqName, files.length); } catch (IOException e) { System.out.println(e.getMessage()); } finally { IOUtils.closeStream(writer); } }
From source file:edu.indiana.d2i.htrc.corpus.analysis.LDAAnalysisDriver.java
License:Apache License
/** * /*from w w w. j a v a 2 s .c om*/ * @param ldaStateFilePath * : HDFS path pointing to lda state file * @return * @throws IOException */ private static boolean isAnalysisConverged(String ldaStateFilePath) throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); SequenceFile.Reader seqFileReader = null; try { seqFileReader = new SequenceFile.Reader(fs, new Path(ldaStateFilePath), conf); Text key = (Text) ReflectionUtils.newInstance(seqFileReader.getKeyClass(), conf); LDAState ldaState = (LDAState) ReflectionUtils.newInstance(seqFileReader.getValueClass(), conf); // the sequence file should only have one record seqFileReader.next(key, ldaState); return LDAAnalyzer.isConverged(ldaState); } finally { IOUtils.closeStream(seqFileReader); } }
From source file:edu.indiana.d2i.htrc.corpus.analysis.LDAAnalysisMapper.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); String mappingTableFileName = conf.get("user.args.mapping.table.filename"); String topicsFileName = conf.get("user.args.topics.filename"); BufferedReader reader = null; String line = null;//from w w w . j a v a2 s . c o m // load mapping table try { reader = new BufferedReader(new InputStreamReader(new FileInputStream(mappingTableFileName))); /* * each line is a mapping: <word> <index of the word in full word * set> */ while ((line = reader.readLine()) != null) { String trimmedLine = line.trim(); int idx = trimmedLine.lastIndexOf(' '); mappingTable.add(new MappingTableEntry(trimmedLine.substring(0, idx), Integer.parseInt(trimmedLine.substring(idx + 1)))); } } finally { if (reader != null) reader.close(); } mappingIndices = CorpusProcessingUtils.extractIdxFromMappingTable(mappingTable); reader = null; line = null; // load topics try { reader = new BufferedReader(new InputStreamReader(new FileInputStream(topicsFileName))); /* Each line is a topic */ while ((line = reader.readLine()) != null) { topics.add(line.trim()); } } finally { if (reader != null) reader.close(); } // load LDA state, stateFilePath is the path in HDFS String stateFilePath = conf.get("user.args.lda.state.filepath"); int stepSize = conf.getInt("user.args.topdoctable.capacity.stepsize", Integer.parseInt(Constants.LDA_ANALYSIS_DEFAULT_STEP_SIZE)); if (stateFilePath == null) { // No previous state for initialization (first iteration) ldaAnalyzer = new LDAAnalyzer(mappingTable, topics, stepSize); } else { // second and following iterations FileSystem fs = FileSystem.get(conf); SequenceFile.Reader seqFileReader = null; try { seqFileReader = new SequenceFile.Reader(fs, new Path(stateFilePath), conf); Text key = (Text) ReflectionUtils.newInstance(seqFileReader.getKeyClass(), conf); LDAState ldaState = (LDAState) ReflectionUtils.newInstance(seqFileReader.getValueClass(), conf); // the sequence file should only have one record seqFileReader.next(key, ldaState); ldaAnalyzer = new LDAAnalyzer(ldaState.getWordsTopicsTable(), ldaState.getTopicsDocumentsTable(), mappingTable, topics); } finally { IOUtils.closeStream(seqFileReader); } } }
From source file:edu.usc.pgroup.louvain.hadoop.GraphPartitionRecordReader.java
License:Apache License
@Override public boolean nextKeyValue() throws IOException, InterruptedException { if (isFinished) { return false; }/* ww w.j a v a2 s . c o m*/ int fileLength = (int) split.getLength(); byte[] result = new byte[fileLength]; FileSystem fs = FileSystem.get(conf); Path path = split.getPath(); currentKey = new Text(path.getName()); FSDataInputStream in = null; try { in = fs.open(split.getPath()); IOUtils.readFully(in, result, 0, fileLength); currentValue.set(result, 0, fileLength); } finally { IOUtils.closeStream(in); } this.isFinished = true; return true; }
From source file:eu.edisonproject.utility.commons.WholeFileRecordReader.java
License:Apache License
@Override public boolean nextKeyValue() throws IOException { if (!processed) { byte[] contents = new byte[(int) fileSplit.getLength()]; Path file = fileSplit.getPath(); FileSystem fs = file.getFileSystem(conf); FSDataInputStream in = null;/*from w ww . jav a 2 s.c om*/ try { in = fs.open(file); IOUtils.readFully(in, contents, 0, contents.length); // value.set(contents, 0, contents.length); value.set(new String(contents)); } finally { IOUtils.closeStream(in); } processed = true; return true; } return false; }
From source file:eu.scape_project.tb.lsdr.seqfileutility.SequenceFileWriter.java
License:Apache License
@Override public void run() { try {/* ww w . ja va2 s. c o m*/ FileSystem fs = FileSystem.get(URI.create(uri), conf); path = new Path(uri); Class keyClass = Text.class; Class valueClass = BytesWritable.class; if (pc.isTextlinemode()) { keyClass = Text.class; valueClass = Text.class; } writer = SequenceFile.createWriter(fs, conf, path, keyClass, valueClass, CompressionType.get(pc.getCompressionType())); traverseDir(rootDir); } catch (Exception e) { logger.error(this.getId() + ": " + "IOException occurred", e); } finally { IOUtils.closeStream(writer); } }
From source file:eu.stratosphere.addons.parquet.ParquetFileSourceTest.java
License:Apache License
/** * In this method we populate a Parquet file using Groups. *///from www . j a v a 2 s . c om private void populateParquetFile(MessageType schema, File f, String[] content) throws IOException { Path path = new Path(f.toURI()); final MessageType finalSchema = schema; // init needs to be overridden as configuration cannot be passed in an // other way. ParquetWriter<Group> parquetWriter = new ParquetWriter<Group>(path, new GroupWriteSupport() { @Override public WriteSupport.WriteContext init(Configuration configuration) { if (configuration.get(GroupWriteSupport.PARQUET_EXAMPLE_SCHEMA) == null) { configuration.set(GroupWriteSupport.PARQUET_EXAMPLE_SCHEMA, finalSchema.toString()); } return super.init(configuration); } }); Group group = new SimpleGroup(schema); try { for (String line : content) { group.append("text_in_line", Binary.fromString(line)); } parquetWriter.write(group); } finally { IOUtils.closeStream(parquetWriter); } }
From source file:eu.stratosphere.addons.parquet.SequenceFileSourceTest.java
License:Apache License
private void populateSeqFile(File f, String[] content) throws IOException { URI uri = f.toURI();/* ww w . java 2 s . c o m*/ Configuration conf = new JobConf(); FileSystem fs = FileSystem.get(uri, conf); Path path = new Path(uri); IntWritable key = new IntWritable(); Text value = new Text(); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass()); for (int i = 0; i < content.length; i++) { key.set(i); value.set(content[i]); writer.append(key, value); } } finally { IOUtils.closeStream(writer); } }
From source file:explain.ExplainTask.java
License:Apache License
public int explain(ArrayList<Task<? extends Serializable>> rootTasks, OutputStream outS) { PrintStream out = null;//from w ww.j ava 2s. co m try { out = new PrintStream(outS); // Go over all the tasks and dump out the plans outputStagePlans(out, rootTasks, 0); return (0); } catch (Exception e) { return (1); } finally { IOUtils.closeStream(out); } }