List of usage examples for org.apache.hadoop.fs FileSystem open
public FSDataInputStream open(PathHandle fd) throws IOException
From source file:cascading.tap.hadoop.ZipInputFormat.java
License:Open Source License
/** * Return true only if the file is in ZIP format. * * @param fs the file system that the file is on * @param file the path that represents this file * @return is this file splitable?// w w w . j a v a2 s. c o m */ protected boolean isSplitable(FileSystem fs, Path file) { if (!isAllowSplits(fs)) return false; if (LOG.isDebugEnabled()) LOG.debug("verifying ZIP format for file: " + file.toString()); boolean splitable = true; ZipInputStream zipInputStream = null; try { zipInputStream = new ZipInputStream(fs.open(file)); ZipEntry zipEntry = zipInputStream.getNextEntry(); if (zipEntry == null) throw new IOException("no entries found, empty zip file"); if (LOG.isDebugEnabled()) LOG.debug("ZIP format verification successful"); } catch (IOException exception) { LOG.error("exception encountered while trying to open and read ZIP input stream", exception); splitable = false; } finally { safeClose(zipInputStream); } return splitable; }
From source file:cascading.tap.hadoop.ZipInputFormat.java
License:Open Source License
private void makeSplits(JobConf job, ArrayList<ZipSplit> splits, FileSystem fs, Path file) throws IOException { ZipInputStream zipInputStream = new ZipInputStream(fs.open(file)); try {//from w w w. j av a 2s. co m ZipEntry zipEntry; while ((zipEntry = zipInputStream.getNextEntry()) != null) { ZipSplit zipSplit = new ZipSplit(file, zipEntry.getName(), zipEntry.getSize()); if (LOG.isDebugEnabled()) LOG.debug(String.format( "creating split for zip entry: %s size: %d method: %s compressed size: %d", zipEntry.getName(), zipEntry.getSize(), ZipEntry.DEFLATED == zipEntry.getMethod() ? "DEFLATED" : "STORED", zipEntry.getCompressedSize())); splits.add(zipSplit); } } finally { safeClose(zipInputStream); } }
From source file:cascading.tap.hadoop.ZipInputFormat.java
License:Open Source License
public RecordReader<LongWritable, Text> getRecordReader(InputSplit genericSplit, JobConf job, Reporter reporter) throws IOException { reporter.setStatus(genericSplit.toString()); ZipSplit split = (ZipSplit) genericSplit; Path file = split.getPath();//from w w w . j av a 2 s . co m long length = split.getLength(); // Set it max value if length is unknown. // Setting length to Max value does not have // a side effect as Record reader would not be // able to read past the actual size of // current entry. length = length == -1 ? Long.MAX_VALUE - 1 : length; FileSystem fs = file.getFileSystem(job); FSDataInputStream inputStream = fs.open(file); if (isAllowSplits(fs)) return getReaderForEntry(inputStream, split, length); else return getReaderForAll(inputStream); }
From source file:cc.solr.lucene.store.hdfs.HdfsFileReader.java
License:Apache License
public static long getLength(FileSystem fileSystem, Path path) throws IOException { FSDataInputStream inputStream = null; try {// w w w . ja v a2s . c om FileStatus fileStatus = fileSystem.getFileStatus(path); inputStream = fileSystem.open(path); long hdfsLength = fileStatus.getLen(); inputStream.seek(hdfsLength - 12); long length = inputStream.readLong(); int version = inputStream.readInt(); if (version != VERSION) { throw new RuntimeException( "Version of file [" + version + "] does not match reader [" + VERSION + "]"); } return length; } finally { if (inputStream != null) { inputStream.close(); } } }
From source file:chaohBIM.ZipFileRecordReader.java
License:Apache License
/** * Initialise and open the ZIP file from the FileSystem */// w w w.j a v a 2s .c o m @Override public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { FileSplit split = (FileSplit) inputSplit; Configuration conf = taskAttemptContext.getConfiguration(); Path path = split.getPath(); FileSystem fs = path.getFileSystem(conf); // Open the stream fsin = fs.open(path); zip = new ZipInputStream(fsin); zipfilename = path.getName().replaceAll(".zip", ""); //System.out.println(zipfilename); }
From source file:cmd.download.java
License:Apache License
private void mergeToLocalFile(FileSystem fs, Path src, String outPath, Configuration configuration) throws FileNotFoundException, IOException { FileStatus[] status = fs.listStatus(src); Map<String, Path> paths = new TreeMap<String, Path>(); for (FileStatus fileStatus : status) { Path path = fileStatus.getPath(); String pathName = path.getName(); if (pathName.startsWith(Constants.NAME_SECOND)) { paths.put(pathName, path);//from www .j a v a 2s .c o m } } File outFile = new File(outPath, Names.indexId2Node + ".dat"); OutputStream out = new FileOutputStream(outFile); for (String pathName : paths.keySet()) { Path path = new Path(src, paths.get(pathName)); log.debug("Concatenating {} into {}...", path.toUri(), outFile.getAbsoluteFile()); InputStream in = fs.open(new Path(path, Names.indexId2Node + ".dat")); IOUtils.copyBytes(in, out, configuration, false); in.close(); } out.close(); }
From source file:cmd.tdbloader4.java
License:Apache License
private void createOffsetsFile(FileSystem fs, String input, String output) throws IOException { log.debug("Creating offsets file..."); Map<Long, Long> offsets = new TreeMap<Long, Long>(); FileStatus[] status = fs.listStatus(new Path(input)); for (FileStatus fileStatus : status) { Path file = fileStatus.getPath(); if (file.getName().startsWith("part-r-")) { log.debug("Processing: {}", file.getName()); BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(file))); String line = in.readLine(); String[] tokens = line.split("\\s"); long partition = Long.valueOf(tokens[0]); long offset = Long.valueOf(tokens[1]); log.debug("Partition {} has offset {}", partition, offset); offsets.put(partition, offset); }//from w ww .j a v a2 s .c o m } Path outputPath = new Path(output, Constants.OFFSETS_FILENAME); PrintWriter out = new PrintWriter(new OutputStreamWriter(fs.create(outputPath))); for (Long partition : offsets.keySet()) { out.println(partition + "\t" + offsets.get(partition)); } out.close(); log.debug("Offset file created."); }
From source file:cn.jpush.hdfs.mr.example.WordMedian.java
License:Apache License
/** * This is a standard program to read and find a median value based on a * file of word counts such as: 1 456, 2 132, 3 56... Where the first values * are the word lengths and the following values are the number of times * that words of that length appear.//from w w w . ja v a2 s. co m * * @param path * The path to read the HDFS file from * (part-r-00000...00001...etc). * @param medianIndex1 * The first length value to look for. * @param medianIndex2 * The second length value to look for (will be the same as the * first if there are an even number of words total). * @throws IOException * If file cannot be found, we throw an exception. * */ private double readAndFindMedian(String path, int medianIndex1, int medianIndex2, Configuration conf) throws IOException { // FileSystem fs = FileSystem.get(conf);// ? FileSystem fs = new Path(path, "part-r-00000").getFileSystem(conf);// ? Path file = new Path(path, "part-r-00000"); if (!fs.exists(file)) throw new IOException("Output not found!"); BufferedReader br = null; try { br = new BufferedReader(new InputStreamReader(fs.open(file), Charsets.UTF_8)); int num = 0; String line; while ((line = br.readLine()) != null) { StringTokenizer st = new StringTokenizer(line); // grab length String currLen = st.nextToken(); // grab count String lengthFreq = st.nextToken(); int prevNum = num; num += Integer.parseInt(lengthFreq); if (medianIndex2 >= prevNum && medianIndex1 <= num) { System.out.println("The median is: " + currLen); br.close(); return Double.parseDouble(currLen); } else if (medianIndex2 >= prevNum && medianIndex1 < num) { String nextCurrLen = st.nextToken(); double theMedian = (Integer.parseInt(currLen) + Integer.parseInt(nextCurrLen)) / 2.0; System.out.println("The median is: " + theMedian); br.close(); return theMedian; } } } finally { if (br != null) { br.close(); } } // error, no median found return -1; }
From source file:cn.lhfei.hadoop.ch03.FileSystemCat.java
License:Apache License
public static void main(String[] args) { Logger log = LoggerFactory.getLogger(FileSystemCat.class); String uri = args[0];/* w ww. ja va2 s . c om*/ Configuration conf = new Configuration(); FileSystem fs = null; InputStream in = null; try { fs = FileSystem.get(URI.create(uri), conf); in = fs.open(new Path(uri)); IOUtils.copyBytes(in, System.out, 4096, false); } catch (IOException e) { log.error(e.getMessage(), e); } finally { IOUtils.closeStream(in); } }
From source file:cn.lhfei.hadoop.ch03.FileSystemDoubleCat.java
License:Apache License
public static void main(String[] args) { String uri = args[0];/* w w w.j a v a 2 s .c o m*/ FSDataInputStream in = null; FileSystem fs = null; Configuration conf = new Configuration(); try { fs = FileSystem.get(URI.create(uri), conf); in = fs.open(new Path(uri)); IOUtils.copyBytes(in, System.out, 4096, false); in.seek(0l); // go back to the start of the file IOUtils.copyBytes(in, System.out, 4096, false); } catch (IOException e) { e.printStackTrace(); } }