Example usage for org.apache.hadoop.fs FileSystem open

List of usage examples for org.apache.hadoop.fs FileSystem open

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem open.

Prototype

public FSDataInputStream open(PathHandle fd) throws IOException 

Source Link

Document

Open an FSDataInputStream matching the PathHandle instance.

Usage

From source file:cascading.tap.hadoop.ZipInputFormat.java

License:Open Source License

/**
 * Return true only if the file is in ZIP format.
 *
 * @param fs   the file system that the file is on
 * @param file the path that represents this file
 * @return is this file splitable?//  w w w  .  j  a  v  a2 s.  c o  m
 */
protected boolean isSplitable(FileSystem fs, Path file) {
    if (!isAllowSplits(fs))
        return false;

    if (LOG.isDebugEnabled())
        LOG.debug("verifying ZIP format for file: " + file.toString());

    boolean splitable = true;
    ZipInputStream zipInputStream = null;

    try {
        zipInputStream = new ZipInputStream(fs.open(file));
        ZipEntry zipEntry = zipInputStream.getNextEntry();

        if (zipEntry == null)
            throw new IOException("no entries found, empty zip file");

        if (LOG.isDebugEnabled())
            LOG.debug("ZIP format verification successful");
    } catch (IOException exception) {
        LOG.error("exception encountered while trying to open and read ZIP input stream", exception);
        splitable = false;
    } finally {
        safeClose(zipInputStream);
    }

    return splitable;
}

From source file:cascading.tap.hadoop.ZipInputFormat.java

License:Open Source License

private void makeSplits(JobConf job, ArrayList<ZipSplit> splits, FileSystem fs, Path file) throws IOException {
    ZipInputStream zipInputStream = new ZipInputStream(fs.open(file));

    try {//from w w w.  j av a  2s.  co m
        ZipEntry zipEntry;

        while ((zipEntry = zipInputStream.getNextEntry()) != null) {
            ZipSplit zipSplit = new ZipSplit(file, zipEntry.getName(), zipEntry.getSize());

            if (LOG.isDebugEnabled())
                LOG.debug(String.format(
                        "creating split for zip entry: %s size: %d method: %s compressed size: %d",
                        zipEntry.getName(), zipEntry.getSize(),
                        ZipEntry.DEFLATED == zipEntry.getMethod() ? "DEFLATED" : "STORED",
                        zipEntry.getCompressedSize()));

            splits.add(zipSplit);
        }
    } finally {
        safeClose(zipInputStream);
    }
}

From source file:cascading.tap.hadoop.ZipInputFormat.java

License:Open Source License

public RecordReader<LongWritable, Text> getRecordReader(InputSplit genericSplit, JobConf job, Reporter reporter)
        throws IOException {
    reporter.setStatus(genericSplit.toString());

    ZipSplit split = (ZipSplit) genericSplit;
    Path file = split.getPath();//from   w w w  .  j  av  a  2  s .  co m
    long length = split.getLength();

    // Set it max value if length is unknown.
    // Setting length to Max value does not have
    // a side effect as Record reader would not be
    // able to read past the actual size of
    // current entry.
    length = length == -1 ? Long.MAX_VALUE - 1 : length;

    FileSystem fs = file.getFileSystem(job);

    FSDataInputStream inputStream = fs.open(file);

    if (isAllowSplits(fs))
        return getReaderForEntry(inputStream, split, length);
    else
        return getReaderForAll(inputStream);
}

From source file:cc.solr.lucene.store.hdfs.HdfsFileReader.java

License:Apache License

public static long getLength(FileSystem fileSystem, Path path) throws IOException {
    FSDataInputStream inputStream = null;
    try {// w w  w  . ja  v a2s .  c om
        FileStatus fileStatus = fileSystem.getFileStatus(path);
        inputStream = fileSystem.open(path);
        long hdfsLength = fileStatus.getLen();
        inputStream.seek(hdfsLength - 12);
        long length = inputStream.readLong();
        int version = inputStream.readInt();
        if (version != VERSION) {
            throw new RuntimeException(
                    "Version of file [" + version + "] does not match reader [" + VERSION + "]");
        }
        return length;
    } finally {
        if (inputStream != null) {
            inputStream.close();
        }
    }
}

From source file:chaohBIM.ZipFileRecordReader.java

License:Apache License

/**
 * Initialise and open the ZIP file from the FileSystem
 */// w w w.j a  v  a 2s  .c o  m
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) inputSplit;
    Configuration conf = taskAttemptContext.getConfiguration();
    Path path = split.getPath();
    FileSystem fs = path.getFileSystem(conf);

    // Open the stream
    fsin = fs.open(path);
    zip = new ZipInputStream(fsin);

    zipfilename = path.getName().replaceAll(".zip", "");
    //System.out.println(zipfilename);
}

From source file:cmd.download.java

License:Apache License

private void mergeToLocalFile(FileSystem fs, Path src, String outPath, Configuration configuration)
        throws FileNotFoundException, IOException {
    FileStatus[] status = fs.listStatus(src);
    Map<String, Path> paths = new TreeMap<String, Path>();
    for (FileStatus fileStatus : status) {
        Path path = fileStatus.getPath();
        String pathName = path.getName();
        if (pathName.startsWith(Constants.NAME_SECOND)) {
            paths.put(pathName, path);//from www  .j  a  v  a  2s  .c o m
        }
    }

    File outFile = new File(outPath, Names.indexId2Node + ".dat");
    OutputStream out = new FileOutputStream(outFile);
    for (String pathName : paths.keySet()) {
        Path path = new Path(src, paths.get(pathName));
        log.debug("Concatenating {} into {}...", path.toUri(), outFile.getAbsoluteFile());
        InputStream in = fs.open(new Path(path, Names.indexId2Node + ".dat"));
        IOUtils.copyBytes(in, out, configuration, false);
        in.close();
    }
    out.close();
}

From source file:cmd.tdbloader4.java

License:Apache License

private void createOffsetsFile(FileSystem fs, String input, String output) throws IOException {
    log.debug("Creating offsets file...");
    Map<Long, Long> offsets = new TreeMap<Long, Long>();
    FileStatus[] status = fs.listStatus(new Path(input));
    for (FileStatus fileStatus : status) {
        Path file = fileStatus.getPath();
        if (file.getName().startsWith("part-r-")) {
            log.debug("Processing: {}", file.getName());
            BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(file)));
            String line = in.readLine();
            String[] tokens = line.split("\\s");
            long partition = Long.valueOf(tokens[0]);
            long offset = Long.valueOf(tokens[1]);
            log.debug("Partition {} has offset {}", partition, offset);
            offsets.put(partition, offset);
        }//from   w ww .j  a  v a2 s  .c  o m
    }

    Path outputPath = new Path(output, Constants.OFFSETS_FILENAME);
    PrintWriter out = new PrintWriter(new OutputStreamWriter(fs.create(outputPath)));
    for (Long partition : offsets.keySet()) {
        out.println(partition + "\t" + offsets.get(partition));
    }
    out.close();
    log.debug("Offset file created.");
}

From source file:cn.jpush.hdfs.mr.example.WordMedian.java

License:Apache License

/**
 * This is a standard program to read and find a median value based on a
 * file of word counts such as: 1 456, 2 132, 3 56... Where the first values
 * are the word lengths and the following values are the number of times
 * that words of that length appear.//from   w  w  w .  ja  v  a2  s. co  m
 * 
 * @param path
 *            The path to read the HDFS file from
 *            (part-r-00000...00001...etc).
 * @param medianIndex1
 *            The first length value to look for.
 * @param medianIndex2
 *            The second length value to look for (will be the same as the
 *            first if there are an even number of words total).
 * @throws IOException
 *             If file cannot be found, we throw an exception.
 * */
private double readAndFindMedian(String path, int medianIndex1, int medianIndex2, Configuration conf)
        throws IOException {
    // FileSystem fs = FileSystem.get(conf);// ?
    FileSystem fs = new Path(path, "part-r-00000").getFileSystem(conf);// ?
    Path file = new Path(path, "part-r-00000");

    if (!fs.exists(file))
        throw new IOException("Output not found!");

    BufferedReader br = null;

    try {
        br = new BufferedReader(new InputStreamReader(fs.open(file), Charsets.UTF_8));
        int num = 0;

        String line;
        while ((line = br.readLine()) != null) {
            StringTokenizer st = new StringTokenizer(line);

            // grab length
            String currLen = st.nextToken();

            // grab count
            String lengthFreq = st.nextToken();

            int prevNum = num;
            num += Integer.parseInt(lengthFreq);

            if (medianIndex2 >= prevNum && medianIndex1 <= num) {
                System.out.println("The median is: " + currLen);
                br.close();
                return Double.parseDouble(currLen);
            } else if (medianIndex2 >= prevNum && medianIndex1 < num) {
                String nextCurrLen = st.nextToken();
                double theMedian = (Integer.parseInt(currLen) + Integer.parseInt(nextCurrLen)) / 2.0;
                System.out.println("The median is: " + theMedian);
                br.close();
                return theMedian;
            }
        }
    } finally {
        if (br != null) {
            br.close();
        }
    }
    // error, no median found
    return -1;
}

From source file:cn.lhfei.hadoop.ch03.FileSystemCat.java

License:Apache License

public static void main(String[] args) {

    Logger log = LoggerFactory.getLogger(FileSystemCat.class);

    String uri = args[0];/*  w ww.  ja  va2  s . c  om*/
    Configuration conf = new Configuration();

    FileSystem fs = null;
    InputStream in = null;

    try {
        fs = FileSystem.get(URI.create(uri), conf);
        in = fs.open(new Path(uri));
        IOUtils.copyBytes(in, System.out, 4096, false);

    } catch (IOException e) {
        log.error(e.getMessage(), e);
    } finally {
        IOUtils.closeStream(in);
    }
}

From source file:cn.lhfei.hadoop.ch03.FileSystemDoubleCat.java

License:Apache License

public static void main(String[] args) {

    String uri = args[0];/* w  w w.j a v  a  2 s  .c o m*/
    FSDataInputStream in = null;
    FileSystem fs = null;
    Configuration conf = new Configuration();

    try {
        fs = FileSystem.get(URI.create(uri), conf);
        in = fs.open(new Path(uri));

        IOUtils.copyBytes(in, System.out, 4096, false);
        in.seek(0l); // go back to the start of the file

        IOUtils.copyBytes(in, System.out, 4096, false);

    } catch (IOException e) {
        e.printStackTrace();
    }
}