List of usage examples for org.apache.hadoop.io IOUtils closeStream
public static void closeStream(java.io.Closeable stream)
From source file:pl.edu.icm.coansys.disambiguation.author.PigScriptExtractor.java
License:Open Source License
public static LinkedList<String> extract(String filename) throws FileNotFoundException, IOException { LinkedList<String> script = new LinkedList<String>(); FileReader fr = null;//from ww w .ja va2s . c om BufferedReader in = null; try { fr = new FileReader(filename); in = new BufferedReader(fr); String line; linesLoop: while ((line = in.readLine()) != null) { if (line.length() == 0) continue; for (String prefix : skipPrefixes) { if (line.startsWith(prefix)) { continue linesLoop; } } script.add(line); } } finally { IOUtils.closeStream(in); IOUtils.closeStream(fr); } return script; }
From source file:smile.wide.AttributeValueHistogram.java
License:Apache License
@Override public int run(String[] arg) throws Exception { if (arg.length < 2) { s_logger.fatal("Usage: AttributeValueHistogram <infile> <outfile>"); // TODO: return an error code? }/*from w w w . j a v a2 s . com*/ s_logger.debug("Got " + arg.length + " arguments"); inPath_ = arg[0]; s_logger.info("Input path is " + inPath_); // parse the key-value arguments passed - by now these are the arguments // specific to AttributeValueHistogram for (int i = 1; i < arg.length; ++i) { String[] tokens = arg[i].split("="); if (tokens.length != 2) { s_logger.fatal("Can't parse argument" + arg[i]); } if (tokens[0].equals("xdata.bayesnets.datasetreader.class")) { readerClass_ = tokens[1].trim(); s_logger.debug("Set reader class to " + readerClass_); } else if (tokens[0].equals("xdata.bayesnets.datasetreader.filter")) { readerFilter_ = tokens[1].trim(); s_logger.debug("Set reader filter to " + readerFilter_); } else if (tokens[0].equals("xdata.bayesnets.datasetreader.instid")) { readerInstID_ = tokens[1].trim(); s_logger.debug("Set reader's instance ID column to " + readerInstID_); } else if (tokens[0].equals("xdata.bayesnets.datasetreader.variablenames")) { variableNames_ = tokens[1].trim(); s_logger.debug("Set reader's variable names to " + variableNames_); } else { s_logger.warn("Unknown argument " + arg[i]); } } conf_ = getConf(); // pass the reader class to the mapper, in jobconf // TODO: use setClass here - fails early if wrong, not in the mapper conf_.set("xdata.bayesnets.datasetreader.class", readerClass_); conf_.set("xdata.bayesnets.datasetreader.filter", readerFilter_); // conf_.set("xdata.bayesnets.datasetreader.instid", readerInstID_); // not used conf_.set("xdata.bayesnets.datasetreader.variablenames", variableNames_); conf_.setBoolean("mapred.compress.map.output", true); // compress intermediate data conf_.set("mapred.output.compression.type", CompressionType.BLOCK.toString()); // by block, to keep splittable conf_.setClass("mapred.map.output.compression.codec", GzipCodec.class, CompressionCodec.class); // for debugging conf_.set("keep.failed.task.files", "true"); conf_.set("keep.failed.task.pattern", "*"); Job job = new Job(conf_); job.setJarByClass(AttributeValueHistogram.class); // use this jar job.setJobName("Collect value histograms by attribute"); FileInputFormat.addInputPath(job, new Path(inPath_)); int rnd = (new Random()).nextInt(); lastWorkingDir_ = job.getWorkingDirectory().toUri(); s_logger.info("Job working directory is " + lastWorkingDir_); String tempDirName = job.getWorkingDirectory() + "/tmp/attvalhist" + rnd + ".tmp"; s_logger.info("Temp files in directory " + tempDirName); FileOutputFormat.setOutputPath(job, new Path(tempDirName)); job.setMapperClass(AttributeValueHistogramMapper.class); job.setCombinerClass(AttributeValueHistogramReducer.class); job.setReducerClass(AttributeValueHistogramReducer.class); // set both the map and reduce in/out classes job.setOutputKeyClass(Text.class); // the name of the attribute job.setOutputValueClass(MapWritable.class); // Value -> count map job.setOutputFormatClass(SequenceFileOutputFormat.class); // run'em int result = job.waitForCompletion(true) ? 0 : 16; // retain the temp file, collect the output attributeValues_ = new TreeMap<String, Map<String, Integer>>(); FileSystem fs = FileSystem.get(conf_); SequenceFile.Reader reader = null; Path resPath = new Path(tempDirName); FileStatus[] stats = fs.listStatus(resPath); // read all output files for (FileStatus stat : stats) { if (stat.getPath().toUri().toString().contains("part-r-")) try { s_logger.info("Reading results from " + stat.getPath()); reader = new SequenceFile.Reader(fs, stat.getPath(), conf_); // Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf_); // MapWritable value = (MapWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf_); Text key = new Text(); MapWritable value = new MapWritable(); while (reader.next(key, value)) { TreeMap<String, Integer> valueCounts = new TreeMap<String, Integer>(); for (Writable attValue : value.keySet()) { valueCounts.put(((Text) attValue).toString(), ((IntWritable) (value.get(attValue))).get()); } attributeValues_.put(key.toString(), valueCounts); } } finally { IOUtils.closeStream(reader); } } fs.deleteOnExit(resPath); return result; }
From source file:smile.wide.Network.java
License:Apache License
/** * Retrieve the result from file and be ready to return it as an array * from inferenceResult()//from w ww .j ava 2s . c o m */ public void retrieveResult() { FileSystem fs; try { fs = FileSystem.get(conf_); FileStatus[] stats = fs.listStatus(tempDir_); SequenceFile.Reader reader = null; ArrayList<Integer> instids = new ArrayList<Integer>(); ArrayList<float[]> posts = new ArrayList<float[]>(); // read all output files for (FileStatus stat : stats) { if (stat.getPath().toUri().toString().contains("part-r-")) try { logger_.info("Reading results from " + stat.getPath()); reader = new SequenceFile.Reader(fs, stat.getPath(), conf_); IntWritable key = new IntWritable(); DoubleArrayWritable value = new DoubleArrayWritable(); while (reader.next(key, value)) { instids.add(key.get()); DoubleWritable[] unpack = (DoubleWritable[]) value.get(); float[] post = new float[unpack.length]; for (int i = 0; i < unpack.length; ++i) { post[i] = (float) unpack[i].get(); } posts.add(post); } } finally { IOUtils.closeStream(reader); } } // copy over the result assert (instids.size() == posts.size()); instanceIDs_ = new int[instids.size()]; posteriors_ = new float[posts.size()][]; for (int i = 0; i < instids.size(); ++i) { instanceIDs_[i] = instids.get(i); posteriors_[i] = posts.get(i); } } catch (IOException e) { logger_.error("Could not read in the inference results"); e.printStackTrace(); } }
From source file:tv.icntv.log.crawl.store.HdfsDefaultStore.java
License:Apache License
@Override public void createFile(String name) { if (isNull(name)) { return;//from w w w .jav a 2s .c o m } FSDataOutputStream out = null; FileSystem fileSystem = null; try { Path path = new Path(name); fileSystem = FileSystem.get(configuration); if (fileSystem.exists(path)) { return; } out = fileSystem.create(path); out.flush(); return; } catch (IOException e) { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. return; } finally { IOUtils.closeStream(out); if (null != fileSystem) { try { fileSystem.close(); } catch (IOException e) { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. } } } }
From source file:tv.icntv.log.crawl.store.HdfsDefaultStore.java
License:Apache License
@Override public boolean rename(String srcName, String name) { FSDataOutputStream out = null;//from w w w. jav a 2s. c o m FileSystem fileSystem = null; try { fileSystem = FileSystem.get(configuration); if (fileSystem.exists(new Path(srcName))) { return fileSystem.rename(new Path(srcName), new Path(name)); } logger.info("try rename ,but name={} not exist,create file{} ", srcName, name); out = fileSystem.create(new Path(name)); out.flush(); return false; } catch (IOException e) { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. logger.error("rename error:", e); return false; } finally { if (null != out) { IOUtils.closeStream(out); } if (null != fileSystem) { try { fileSystem.close(); } catch (IOException e) { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. } } } }
From source file:tv.icntv.log.stb.commons.HadoopUtils.java
License:Apache License
public static boolean createFile(Path path) { FSDataOutputStream out = null;/*from www . j ava2 s . c om*/ FileSystem fileSystem = null; try { fileSystem = FileSystem.get(configuration); if (fileSystem.exists(path)) { logger.info("file {} existed", path.toString()); return false; } out = fileSystem.create(path); out.flush(); return true; } catch (IOException e) { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. logger.error("create File error!"); return false; } finally { IOUtils.closeStream(out); if (null != fileSystem) { try { fileSystem.close(); } catch (IOException e) { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. } } } }
From source file:tv.icntv.log.tools.FileApi.java
License:Apache License
@Override public synchronized boolean writeDat(Path[] inputs, final String regular, Path output) { FileSystem fileSystem = null; BufferedReader reader = null; FSDataOutputStream outputStream = null; try {//from w w w. jav a 2s. c om fileSystem = FileSystem.get(conf); // FileStatus[] fileStatuses = fileSystem.listStatus(inputs, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().matches(regular); //To change body of implemented methods use File | Settings | File Templates. } }); if (null == fileStatuses || fileStatuses.length == 0) { System.out.println("null..."); return false; } System.out.println(fileStatuses.length); outputStream = fileSystem.create(output, true, 40960); for (FileStatus status : fileStatuses) { if (regular.endsWith("lzo")) { reader = new BufferedReader(new InputStreamReader( lzopInputStream.createInputStream(fileSystem.open(status.getPath())), "utf-8")); } else { reader = new BufferedReader(new InputStreamReader(fileSystem.open(status.getPath()))); } String line = null; while (null != (line = reader.readLine())) { byte[] lineByte = (line + "\r\n").getBytes("utf-8"); outputStream.write(lineByte, 0, lineByte.length); } } } catch (IOException e) { System.out.println(e); e.printStackTrace(); return false; } finally { IOUtils.closeStream(reader); IOUtils.closeStream(outputStream); IOUtils.closeStream(fileSystem); } return true; }
From source file:utils.ReaderSeqFile.java
public static void main(String[] args) throws IOException, URISyntaxException { //URI uri = new URI("/home/asabater/data_to_cluster"); URI uri = new URI("/home/asabater/Desktop/part-r-00000"); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(uri, conf); Path path = new Path(uri); FileOutputStream out = new FileOutputStream("/home/asabater/nuevo/cluster"); SequenceFile.Reader reader = null; try {/*from w w w .ja v a 2s . co m*/ reader = new SequenceFile.Reader(fs, path, conf); Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); ClusterWritable value = (ClusterWritable) //final WeightedPropertyVectorWritable value = new WeightedPropertyVectorWritable(); ReflectionUtils.newInstance(reader.getValueClass(), conf); long position = reader.getPosition(); System.out.println("TRYING:" + reader.getPosition()); while (reader.next(key, value)) { System.out.println(value.getClass()); System.out.printf("%s\t%s\n", key, value.getValue().toString()); position = reader.getPosition(); // beginning of next record } out.close(); } finally { IOUtils.closeStream(reader); } }