List of usage examples for org.apache.hadoop.fs FileSystem open
public FSDataInputStream open(PathHandle fd) throws IOException
From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java
License:Apache License
public void readRandomHeap() throws Exception { System.out.println("reading random file in heap mode " + path); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); FileStatus status = fs.getFileStatus(path); FSDataInputStream instream = fs.open(path); byte[] buf = new byte[size]; double sumbytes = 0; double ops = 0; long _range = status.getLen() - ((long) buf.length); double range = (double) _range; Random random = new Random(); System.out.println("file capacity " + status.getLen()); System.out.println("read size " + size); System.out.println("operations " + loop); long start = System.currentTimeMillis(); while (ops < loop) { double _offset = range * random.nextDouble(); long offset = (long) _offset; instream.seek(offset);//ww w.j a v a2s. c om double ret = (double) this.read(instream, buf); if (ret > 0) { sumbytes = sumbytes + ret; ops = ops + 1.0; } else { break; } } long end = System.currentTimeMillis(); double executionTime = ((double) (end - start)) / 1000.0; double throughput = 0.0; double latency = 0.0; double sumbits = sumbytes * 8.0; if (executionTime > 0) { throughput = sumbits / executionTime / 1024.0 / 1024.0; latency = 1000000.0 * executionTime / ops; } System.out.println("execution time " + executionTime); System.out.println("ops " + ops); System.out.println("sumbytes " + sumbytes); System.out.println("throughput " + throughput); System.out.println("latency " + latency); System.out.println("closing stream"); instream.close(); fs.close(); }
From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java
License:Apache License
void keyGet() throws Exception { System.out.println("key get, path " + path + ", size " + size + ", loop " + loop); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path[] paths = new Path[loop]; for (int i = 0; i < loop; i++) { String child = "" + i; paths[i] = new Path(path, child); System.out.println("path " + paths[i]); }/*w w w . j a v a 2 s.c om*/ byte[] outBuf = new byte[size]; for (Path p : paths) { FSDataOutputStream outputStream = fs.create(p); outputStream.write(outBuf); outputStream.close(); } long start = System.currentTimeMillis(); ByteBuffer inBuf = ByteBuffer.allocateDirect(size); for (int i = 0; i < loop; i++) { Path p = paths[i]; FSDataInputStream inputStream = fs.open(p); inBuf.clear(); while (inBuf.remaining() > 0) { inputStream.read(inBuf); } inputStream.close(); } long end = System.currentTimeMillis(); double executionTime = ((double) (end - start)); double latency = executionTime * 1000.0 / ((double) loop); System.out.println("execution time [ms] " + executionTime); System.out.println("latency [us] " + latency); fs.close(); }
From source file:com.ibm.stocator.fs.swift2d.SwiftTestUtils.java
License:Open Source License
/** * Read an object from Swift//from www . ja v a 2 s .co m * * @param fs filesystem * @param path object bath * @param len how much to read * @return byte array * @throws IOException if can't read an object */ public static byte[] readDataset(FileSystem fs, Path path, int len) throws IOException { FSDataInputStream in = fs.open(path); byte[] dest = new byte[len]; try { in.readFully(0, dest); } finally { in.close(); } return dest; }
From source file:com.ibm.stocator.fs.swift2d.systemtests.SwiftTestUtils.java
License:Open Source License
/** * Read in "length" bytes, convert to an ascii string * @param fs filesystem/*from ww w .java 2 s . com*/ * @param path path to read * @param length #of bytes to read * @return the bytes read and converted to a string * @throws IOException */ public static String readBytesToString(FileSystem fs, Path path, int length) throws IOException { FSDataInputStream in = fs.open(path); try { byte[] buf = new byte[length]; in.readFully(0, buf); return toChar(buf); } finally { in.close(); } }
From source file:com.ikanow.infinit.e.processing.custom.utils.HadoopUtils.java
License:Open Source License
public static BasicDBList getBsonFromTextFiles(CustomMapReduceJobPojo cmr, int nLimit, String fields) throws IOException, SAXException, ParserConfigurationException { BasicDBList dbl = new BasicDBList(); PropertiesManager props = new PropertiesManager(); Configuration conf = getConfiguration(props); Path pathDir = HadoopUtils.getPathForJob(cmr, conf, false); FileSystem fs = FileSystem.get(conf); FileStatus[] files = fs.globStatus(new Path(pathDir.toString() + "/part-*")); for (FileStatus file : files) { if (file.getLen() > 0) { FSDataInputStream in = fs.open(file.getPath()); BufferedReader bin = new BufferedReader(new InputStreamReader(in)); for (;;) { String s = bin.readLine(); if (null == s) break; String[] keyValue = s.split("\t", 2); BasicDBObject dbo = new BasicDBObject(); if (keyValue.length > 1) { dbo.put("key", keyValue[0]); dbo.put("value", keyValue[1]); } else { dbo.put("value", keyValue[0]); }// ww w . j av a 2 s .com dbl.add(dbo); } in.close(); } } return dbl; }
From source file:com.indeed.imhotep.builder.tsv.EasyIndexBuilderFromTSV.java
License:Apache License
private BufferedReader getInputFileReader(Path inputFile) { try {/*from w w w .j av a 2s . co m*/ final FileSystem hdfs = getHDFS(inputFile); final Path qualifiedInputFile = inputFile.makeQualified(hdfs); if (!hdfs.exists(inputFile)) { throw new RuntimeException("The provided input file doesn't exist " + qualifiedInputFile + "\nFor hdfs files use 'hdfs:' prefix like hdfs:/tmp/file.tsv"); } log.info("Reading TSV data from " + qualifiedInputFile); InputStream inputStream = hdfs.open(inputFile); if (inputFile.getName().endsWith(".gz")) { inputStream = new GZIPInputStream(inputStream); } return new BufferedReader(new InputStreamReader(inputStream, Charsets.UTF_8)); } catch (IOException e) { throw Throwables.propagate(e); } }
From source file:com.indeed.imhotep.builder.tsv.KerberosUtils.java
License:Apache License
/** * Use for testing keytab logins/*from w ww . j av a 2 s . co m*/ */ public static void main(String[] args) throws Exception { KerberosUtils.loginFromKeytab(new BaseConfiguration()); final FileSystem fileSystem = FileSystem.get(new org.apache.hadoop.conf.Configuration()); final Path path = new Path("/CLUSTERNAME"); if (fileSystem.exists(path)) { System.out.println(CharStreams.toString(new InputStreamReader(fileSystem.open(path), Charsets.UTF_8))); } }
From source file:com.inmobi.conduit.AbstractService.java
License:Apache License
protected Table<String, Long, Long> parseCountersFile(FileSystem fs) { List<Path> partFiles = listPartFiles(tmpCounterOutputPath, fs); if (partFiles == null || partFiles.size() == 0) { LOG.warn("No counters files generated by mapred job"); return null; }//from w w w . j a v a 2 s . c o m Table<String, Long, Long> result = HashBasedTable.create(); for (Path filePath : partFiles) { FSDataInputStream fin = null; Scanner scanner = null; try { fin = fs.open(filePath); scanner = new Scanner(fin); while (scanner.hasNext()) { String counterNameValue = null; try { counterNameValue = scanner.next(); String tmp[] = counterNameValue.split(ConduitConstants.AUDIT_COUNTER_NAME_DELIMITER); if (tmp.length < 4) { LOG.error("Malformed counter name,skipping " + counterNameValue); continue; } String streamFileNameCombo = tmp[0] + ConduitConstants.AUDIT_COUNTER_NAME_DELIMITER + tmp[1]; Long publishTimeWindow = Long.parseLong(tmp[2]); Long numOfMsgs = Long.parseLong(tmp[3]); result.put(streamFileNameCombo, publishTimeWindow, numOfMsgs); } catch (Exception e) { LOG.error("Counters file has malformed line with counter name = " + counterNameValue + " ..skipping the line", e); } } } catch (IOException e1) { LOG.error("Error while opening file " + filePath + " Skipping"); continue; } finally { try { if (fin != null) { fin.close(); } if (scanner != null) { scanner.close(); } } catch (Exception e) { LOG.warn("Error while closing file " + filePath + " or scanner"); } } } return result; }
From source file:com.inmobi.conduit.CompressedFileReaderTest.java
License:Apache License
private void uncompress(String fileName) throws Exception { Configuration conf = new Configuration(); FileSystem fs; fs = FileSystem.getLocal(conf); CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf); CompressionCodec codec = codecFactory.getCodec(new Path(fileName)); if (codec == null) { System.out.println("cant find codec"); System.exit(1);//from w w w . j a v a 2 s . c o m } LOG.info("Using compression codec [" + codec.toString() + "]"); CompressionInputStream is = codec.createInputStream(fs.open(new Path(fileName))); OutputStream out = null; try { String outputURI = CompressionCodecFactory.removeSuffix(fileName, codec.getDefaultExtension()); out = fs.create(new Path(outputURI + "-uncompressed")); org.apache.hadoop.io.IOUtils.copyBytes(is, out, conf); } finally { org.apache.hadoop.io.IOUtils.closeStream(out); IOUtils.closeStream(is); } }
From source file:com.inmobi.conduit.distcp.tools.FileBasedCopyListing.java
License:Apache License
protected static List<Path> fetchFileList(Path sourceListing, Configuration conf) throws IOException { List<Path> result = new ArrayList<Path>(); FileSystem fs = sourceListing.getFileSystem(conf); BufferedReader input = null;/*from w w w. j ava 2s .com*/ try { input = new BufferedReader(new InputStreamReader(fs.open(sourceListing))); String line = input.readLine(); while (line != null) { result.add(new Path(line)); line = input.readLine(); } } finally { IOUtils.closeStream(input); } return result; }