Example usage for org.apache.hadoop.io IOUtils closeStream

List of usage examples for org.apache.hadoop.io IOUtils closeStream

Introduction

In this page you can find the example usage for org.apache.hadoop.io IOUtils closeStream.

Prototype

public static void closeStream(java.io.Closeable stream) 

Source Link

Document

Closes the stream ignoring Throwable .

Usage

From source file:edu.bigdata.training.fileformats.compress.SequenceFileWriter.java

public static void main(String[] args) throws IOException {
    String uri = "output";
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    Path path = new Path(uri);
    IntWritable key = new IntWritable();
    Text value = new Text();
    File infile = new File("src/main/resources/input.txt");
    SequenceFile.Writer writer = null;
    try {//from www  .ja  v  a2 s.  com
        writer = SequenceFile.createWriter(conf, Writer.file(path), Writer.keyClass(key.getClass()),
                Writer.valueClass(value.getClass()),
                Writer.bufferSize(fs.getConf().getInt("io.file.buffer.size", 4096)),
                Writer.replication(fs.getDefaultReplication()), Writer.blockSize(1073741824),
                Writer.compression(SequenceFile.CompressionType.BLOCK, new DefaultCodec()),
                Writer.progressable(null), Writer.metadata(new Metadata()));
        int ctr = 100;
        List<String> lines = FileUtils.readLines(infile);
        for (String line : lines) {
            key.set(ctr++);
            value.set(line);
            if (ctr < 150) {
                System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value);
            }
            writer.append(key, value);
        }
    } finally {
        IOUtils.closeStream(writer);
    }
}

From source file:edu.cuhk.hccl.SequenceFileWriter.java

License:Apache License

private static void createSeqFile(File[] files, String seqName) {
    Configuration conf = new Configuration();
    LongWritable key = new LongWritable();
    Text value = new Text();

    SequenceFile.Writer writer = null;

    try {/*from  w  w  w . j  a v a2  s.c  om*/
        FileSystem fs = FileSystem.get(URI.create(seqName), conf);
        writer = SequenceFile.createWriter(fs, conf, new Path(seqName), key.getClass(), value.getClass());

        for (File file : files) {
            //System.out.printf("Processing file: %s \n", file.getPath());
            key.set(Integer.parseInt(file.getName().split("_")[1]));
            value.set(FileUtils.readFileToString(file));
            writer.append(key, value);
        }
        System.out.printf("[INFO] The sequence file %s has been created for %d files! \n", seqName,
                files.length);

    } catch (IOException e) {
        System.out.println(e.getMessage());
    } finally {
        IOUtils.closeStream(writer);
    }
}

From source file:edu.indiana.d2i.htrc.corpus.analysis.LDAAnalysisDriver.java

License:Apache License

/**
 * /*from  w w  w.  j a  v a 2  s .c  om*/
 * @param ldaStateFilePath
 *            : HDFS path pointing to lda state file
 * @return
 * @throws IOException
 */
private static boolean isAnalysisConverged(String ldaStateFilePath) throws IOException {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    SequenceFile.Reader seqFileReader = null;

    try {
        seqFileReader = new SequenceFile.Reader(fs, new Path(ldaStateFilePath), conf);

        Text key = (Text) ReflectionUtils.newInstance(seqFileReader.getKeyClass(), conf);
        LDAState ldaState = (LDAState) ReflectionUtils.newInstance(seqFileReader.getValueClass(), conf);

        // the sequence file should only have one record
        seqFileReader.next(key, ldaState);

        return LDAAnalyzer.isConverged(ldaState);

    } finally {
        IOUtils.closeStream(seqFileReader);
    }
}

From source file:edu.indiana.d2i.htrc.corpus.analysis.LDAAnalysisMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    String mappingTableFileName = conf.get("user.args.mapping.table.filename");
    String topicsFileName = conf.get("user.args.topics.filename");

    BufferedReader reader = null;
    String line = null;//from  w w w . j  a v a2 s  . c  o  m

    // load mapping table
    try {
        reader = new BufferedReader(new InputStreamReader(new FileInputStream(mappingTableFileName)));

        /*
         * each line is a mapping: <word> <index of the word in full word
         * set>
         */
        while ((line = reader.readLine()) != null) {
            String trimmedLine = line.trim();
            int idx = trimmedLine.lastIndexOf(' ');
            mappingTable.add(new MappingTableEntry(trimmedLine.substring(0, idx),
                    Integer.parseInt(trimmedLine.substring(idx + 1))));

        }

    } finally {
        if (reader != null)
            reader.close();
    }

    mappingIndices = CorpusProcessingUtils.extractIdxFromMappingTable(mappingTable);

    reader = null;
    line = null;

    // load topics
    try {
        reader = new BufferedReader(new InputStreamReader(new FileInputStream(topicsFileName)));

        /* Each line is a topic */
        while ((line = reader.readLine()) != null) {
            topics.add(line.trim());
        }

    } finally {
        if (reader != null)
            reader.close();
    }

    // load LDA state, stateFilePath is the path in HDFS
    String stateFilePath = conf.get("user.args.lda.state.filepath");
    int stepSize = conf.getInt("user.args.topdoctable.capacity.stepsize",
            Integer.parseInt(Constants.LDA_ANALYSIS_DEFAULT_STEP_SIZE));

    if (stateFilePath == null) {
        // No previous state for initialization (first iteration)
        ldaAnalyzer = new LDAAnalyzer(mappingTable, topics, stepSize);
    } else {
        // second and following iterations
        FileSystem fs = FileSystem.get(conf);
        SequenceFile.Reader seqFileReader = null;

        try {
            seqFileReader = new SequenceFile.Reader(fs, new Path(stateFilePath), conf);

            Text key = (Text) ReflectionUtils.newInstance(seqFileReader.getKeyClass(), conf);
            LDAState ldaState = (LDAState) ReflectionUtils.newInstance(seqFileReader.getValueClass(), conf);

            // the sequence file should only have one record
            seqFileReader.next(key, ldaState);

            ldaAnalyzer = new LDAAnalyzer(ldaState.getWordsTopicsTable(), ldaState.getTopicsDocumentsTable(),
                    mappingTable, topics);
        } finally {
            IOUtils.closeStream(seqFileReader);
        }

    }

}

From source file:edu.usc.pgroup.louvain.hadoop.GraphPartitionRecordReader.java

License:Apache License

@Override
public boolean nextKeyValue() throws IOException, InterruptedException {

    if (isFinished) {
        return false;
    }/* ww w.j a v a2  s  .  c o m*/

    int fileLength = (int) split.getLength();
    byte[] result = new byte[fileLength];

    FileSystem fs = FileSystem.get(conf);
    Path path = split.getPath();
    currentKey = new Text(path.getName());
    FSDataInputStream in = null;
    try {
        in = fs.open(split.getPath());
        IOUtils.readFully(in, result, 0, fileLength);
        currentValue.set(result, 0, fileLength);

    } finally {
        IOUtils.closeStream(in);
    }

    this.isFinished = true;
    return true;
}

From source file:eu.edisonproject.utility.commons.WholeFileRecordReader.java

License:Apache License

@Override
public boolean nextKeyValue() throws IOException {
    if (!processed) {
        byte[] contents = new byte[(int) fileSplit.getLength()];

        Path file = fileSplit.getPath();
        FileSystem fs = file.getFileSystem(conf);

        FSDataInputStream in = null;/*from   w ww .  jav  a 2 s.c om*/
        try {
            in = fs.open(file);
            IOUtils.readFully(in, contents, 0, contents.length);
            //        value.set(contents, 0, contents.length);
            value.set(new String(contents));
        } finally {
            IOUtils.closeStream(in);
        }
        processed = true;
        return true;
    }
    return false;
}

From source file:eu.scape_project.tb.lsdr.seqfileutility.SequenceFileWriter.java

License:Apache License

@Override
public void run() {
    try {/* ww  w  . ja va2  s.  c  o m*/
        FileSystem fs = FileSystem.get(URI.create(uri), conf);
        path = new Path(uri);
        Class keyClass = Text.class;
        Class valueClass = BytesWritable.class;
        if (pc.isTextlinemode()) {
            keyClass = Text.class;
            valueClass = Text.class;
        }
        writer = SequenceFile.createWriter(fs, conf, path, keyClass, valueClass,
                CompressionType.get(pc.getCompressionType()));
        traverseDir(rootDir);
    } catch (Exception e) {
        logger.error(this.getId() + ": " + "IOException occurred", e);
    } finally {
        IOUtils.closeStream(writer);
    }
}

From source file:eu.stratosphere.addons.parquet.ParquetFileSourceTest.java

License:Apache License

/**
 * In this method we populate a Parquet file using Groups.
 *///from www  . j a  v a  2  s  . c om
private void populateParquetFile(MessageType schema, File f, String[] content) throws IOException {
    Path path = new Path(f.toURI());
    final MessageType finalSchema = schema;

    // init needs to be overridden as configuration cannot be passed in an
    // other way.
    ParquetWriter<Group> parquetWriter = new ParquetWriter<Group>(path, new GroupWriteSupport() {
        @Override
        public WriteSupport.WriteContext init(Configuration configuration) {
            if (configuration.get(GroupWriteSupport.PARQUET_EXAMPLE_SCHEMA) == null) {
                configuration.set(GroupWriteSupport.PARQUET_EXAMPLE_SCHEMA, finalSchema.toString());
            }
            return super.init(configuration);
        }
    });

    Group group = new SimpleGroup(schema);
    try {
        for (String line : content) {
            group.append("text_in_line", Binary.fromString(line));
        }
        parquetWriter.write(group);
    } finally {
        IOUtils.closeStream(parquetWriter);
    }
}

From source file:eu.stratosphere.addons.parquet.SequenceFileSourceTest.java

License:Apache License

private void populateSeqFile(File f, String[] content) throws IOException {
    URI uri = f.toURI();/* ww w . java  2 s .  c o m*/
    Configuration conf = new JobConf();
    FileSystem fs = FileSystem.get(uri, conf);
    Path path = new Path(uri);
    IntWritable key = new IntWritable();
    Text value = new Text();
    SequenceFile.Writer writer = null;
    try {
        writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass());
        for (int i = 0; i < content.length; i++) {
            key.set(i);
            value.set(content[i]);
            writer.append(key, value);
        }
    } finally {
        IOUtils.closeStream(writer);
    }
}

From source file:explain.ExplainTask.java

License:Apache License

public int explain(ArrayList<Task<? extends Serializable>> rootTasks, OutputStream outS) {

    PrintStream out = null;//from  w ww.j  ava 2s.  co  m
    try {
        out = new PrintStream(outS);
        // Go over all the tasks and dump out the plans
        outputStagePlans(out, rootTasks, 0);

        return (0);
    } catch (Exception e) {

        return (1);
    } finally {
        IOUtils.closeStream(out);
    }
}