Example usage for org.apache.hadoop.fs FileSystem open

List of usage examples for org.apache.hadoop.fs FileSystem open

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem open.

Prototype

public FSDataInputStream open(PathHandle fd) throws IOException 

Source Link

Document

Open an FSDataInputStream matching the PathHandle instance.

Usage

From source file:boostingPL.MR.AdaBoostPLTestMapper.java

License:Open Source License

protected void setup(Context context) throws IOException, InterruptedException {
    // classifier file
    Path path = new Path(context.getConfiguration().get("BoostingPL.modelPath") + "/part-r-00000");
    String boostingName = context.getConfiguration().get("BoostingPL.boostingName");
    boostingPL = BoostingPLFactory.createBoostingPL(boostingName, context.getConfiguration(), path);

    // testing dataset metadata
    String pathSrc = context.getConfiguration().get("BoostingPL.metadata");
    FileSystem hdfs = FileSystem.get(context.getConfiguration());
    FSDataInputStream dis = new FSDataInputStream(hdfs.open(new Path(pathSrc)));
    LineReader in = new LineReader(dis);
    insts = InstancesHelper.createInstancesFromMetadata(in);
    in.close();//from w ww.  jav a  2  s  . c  om
    dis.close();

    try {
        eval = new Evaluation(insts);
    } catch (Exception e) {
        LOG.error("[BoostingPL-Test]: Evaluation init error!");
        e.printStackTrace();
    }
    instanceCounter = context.getCounter("BoostingPL", "Number of instances");
}

From source file:boostingPL.MR.AdaBoostPLTestReducer.java

License:Open Source License

protected void setup(Context context) throws IOException, InterruptedException {
    // classifier file
    Path path = new Path(context.getConfiguration().get("BoostingPL.modelPath") + "/part-r-00000");
    String boostingName = context.getConfiguration().get("BoostingPL.boostingName");
    boostingPL = BoostingPLFactory.createBoostingPL(boostingName, context.getConfiguration(), path);

    // testing dataset metadata
    String pathSrc = context.getConfiguration().get("BoostingPL.metadata");
    FileSystem hdfs = FileSystem.get(context.getConfiguration());
    FSDataInputStream dis = new FSDataInputStream(hdfs.open(new Path(pathSrc)));
    LineReader in = new LineReader(dis);
    insts = InstancesHelper.createInstancesFromMetadata(in);
    in.close();/*from  w  ww.  ja  v a  2 s .co  m*/
    dis.close();

    try {
        eval = new Evaluation(insts);
    } catch (Exception e) {
        LOG.error("[BoostingPL-Test]: Evaluation init error!");
        e.printStackTrace();
    }
}

From source file:br.ufrj.nce.recureco.distributedindex.search.controller.DocumentViewerServlet.java

License:Open Source License

protected void doGet(javax.servlet.http.HttpServletRequest request,
        javax.servlet.http.HttpServletResponse response) throws javax.servlet.ServletException, IOException {

    String doc = request.getParameter("doc");

    if (doc != null && doc.trim().length() > 0) {

        try {/*from ww  w  . j  a va2  s.  c o m*/

            String filePath = DIR_DOWNLOAD + doc;

            Configuration conf = new Configuration();

            conf.addResource(new Path(DIR_HADOOP_CONF + "core-site.xml"));
            conf.addResource(new Path(DIR_HADOOP_CONF + "hdfs-site.xml"));
            conf.addResource(new Path(DIR_HADOOP_CONF + "mapred-site.xml"));

            FileSystem fileSystem = FileSystem.get(conf);

            Path path = new Path(filePath);
            if (!fileSystem.exists(path)) {
                response.getWriter().print("File not found.");
                return;
            }

            FSDataInputStream in = fileSystem.open(path);

            response.setContentType("text/plain");

            int read = 0;
            byte[] bytes = new byte[BYTES_DOWNLOAD];
            OutputStream os = response.getOutputStream();

            while ((read = in.read(bytes)) != -1) {
                os.write(bytes, 0, read);
            }
            os.flush();
            os.close();
        } catch (FileNotFoundException e) {
            response.getWriter().print("File not found.");
        }

    } else {
        //print invalid document
        response.getWriter().print("File not informed.");
    }

}

From source file:brush.FastqRecordReader.java

License:Apache License

/**
 * Builds a new record reader given a config file and an input split.
 *
 * @param conf The Hadoop configuration object. Used for gaining access
 *   to the underlying file system.//from   w  w  w .  j ava  2 s  .  c o  m
 * @param split The file split to read.
 */
protected FastqRecordReader(final Configuration conf, final FileSplit split) throws IOException {
    file = split.getPath();
    start = split.getStart();
    end = start + split.getLength();

    FileSystem fs = file.getFileSystem(conf);
    FSDataInputStream fileIn = fs.open(file);

    CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
    CompressionCodec codec = codecFactory.getCodec(file);

    if (codec == null) { // no codec.  Uncompressed file.
        positionAtFirstRecord(fileIn);
        inputStream = fileIn;
    } else {
        // compressed file
        if (start != 0) {
            throw new RuntimeException("Start position for compressed file is not 0! (found " + start + ")");
        }

        inputStream = codec.createInputStream(fileIn);
        end = Long.MAX_VALUE; // read until the end of the file
    }

    lineReader = new LineReader(inputStream);
}

From source file:bucket_sort.NLineInputFormat.java

License:Apache License

public static List<FileSplit> getSplitsForFile(FileStatus status, Configuration conf, int numLinesPerSplit)
        throws IOException {
    List<FileSplit> splits = new ArrayList<FileSplit>();
    Path fileName = status.getPath();
    if (status.isDir()) {
        throw new IOException("Not a file: " + fileName);
    }/*w  w w. jav  a  2 s. co  m*/
    FileSystem fs = fileName.getFileSystem(conf);
    LineReader lr = null;
    try {
        FSDataInputStream in = fs.open(fileName);
        lr = new LineReader(in, conf);
        Text line = new Text();
        int numLines = 0;
        long begin = 0;
        long length = 0;
        int num = -1;
        while ((num = lr.readLine(line)) > 0) {
            numLines++;
            length += num;
            if (numLines == numLinesPerSplit) {
                // NLineInputFormat uses LineRecordReader, which always reads
                // (and consumes) at least one character out of its upper split
                // boundary. So to make sure that each mapper gets N lines, we
                // move back the upper split limits of each split 
                // by one character here.
                if (begin == 0) {
                    splits.add(new FileSplit(fileName, begin, length - 1, new String[] {}));
                } else {
                    splits.add(new FileSplit(fileName, begin - 1, length, new String[] {}));
                }
                begin += length;
                length = 0;
                numLines = 0;
            }
        }
        if (numLines != 0) {
            splits.add(new FileSplit(fileName, begin, length, new String[] {}));
        }
    } finally {
        if (lr != null) {
            lr.close();
        }
    }
    return splits;
}

From source file:ca.sparkera.adapters.mainframe.CobolSerdeUtils.java

License:Apache License

protected static String getLayoutFromFS(String layoutFSUrl, Configuration conf)
        throws IOException, URISyntaxException {
    FSDataInputStream in = null;/*from w  ww  . j  a va2 s .  co m*/
    FileSystem fs = null;
    try {
        fs = FileSystem.get(new URI(layoutFSUrl), conf);
    } catch (IOException ioe) {
        // return null only if the file system in layout is not recognized
        String msg = "Failed to open file system for uri " + layoutFSUrl
                + " assuming it is not a FileSystem url";
        LOG.debug(msg, ioe);
        return null;
    }
    try {
        in = fs.open(new Path(layoutFSUrl));
        String s = CobolSerdeUtils.getLayoutFor(in);
        return s;
    } finally {
        if (in != null)
            in.close();
    }
}

From source file:ca.sparkera.adapters.mapred.MainframeVBInputFormat.java

License:Apache License

/**
 * Splits files returned by {@link #listStatus(JobConf)} when they're too
 * big./*from  ww w.  j av a 2 s.c o m*/
 */
@Override
@SuppressWarnings("deprecation")
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {

    FileStatus[] files = listStatus(job);
    for (FileStatus file : files) { // check we have valid files
        if (file.isDir()) {
            throw new IOException("Not a file: " + file.getPath());
        }
        totalSize += file.getLen();
    }

    long goalSize = totalSize / (numSplits == 0 ? 1 : numSplits);
    long minSize = Math.max(job.getLong("mapred.min.split.size", 1), minSplitSize);
    // generate splits
    ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits);
    for (FileStatus file : files) {
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job);
        FSDataInputStream fileIn;
        InputStream inputStream;
        fileIn = fs.open(path);
        inputStream = fileIn;
        filePosition = fileIn;
        long offset = 0;
        long length = file.getLen();
        BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
        if ((length != 0) && isSplitable(fs, path)) {
            long blockSize = file.getBlockSize();

            long bytesRemaining = length;
            long splitSize = 0;
            while (offset < length) {
                splitSize = computeSplitSize(goalSize, minSize, blockSize, inputStream);

                int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                splits.add(new FileSplit(path, length - bytesRemaining, splitSize,
                        blkLocations[blkIndex].getHosts()));

                bytesRemaining -= splitSize;
                offset = length - bytesRemaining;
            }

            if (bytesRemaining != 0) {
                throw new IOException(
                        "Partial record(length = " + bytesRemaining + ") found at the end of file " + path);
            }
        } else if (length != 0) {
            splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts()));
        } else {
            // Create empty hosts array for zero length files
            splits.add(new FileSplit(path, 0, length, new String[0]));
        }
        if (inputStream != null) {
            inputStream.close();
            inputStream = null;
        }
    }
    java.util.Date date = new java.util.Date();
    System.out.println((new Timestamp(date.getTime())) + ",\t Split = 100%  Total Splits - " + (++splitCount)
            + "\t Total Records in VB file - " + totalRecords);

    LOG.debug("Total # of splits: " + splits.size());
    return splits.toArray(new FileSplit[splits.size()]);
}

From source file:ca.sparkera.adapters.mapreduce.MainframeVBRecordReader.java

License:Apache License

public void initialize(Configuration job, long splitStart, long splitLength, Path file) throws IOException {

    start = splitStart;//from ww w  .j  a va2s  . c o m
    end = start + splitLength;
    LOG.info("Start of the split:" + start + "-End of split:" + end);
    LOG.debug("VLR initialize started: start pos:" + start + "endpos:" + end);

    // open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);

    CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
    if (null != codec) {
        isCompressedInput = true;
        decompressor = CodecPool.getDecompressor(codec);
        CompressionInputStream cIn = codec.createInputStream(fileIn, decompressor);
        filePosition = cIn;
        inputStream = cIn;
        LOG.info("Compressed input; cannot compute number of records in the split");
    } else {
        fileIn.seek(start);
        filePosition = fileIn;
        inputStream = fileIn;
        numBytesRemainingInSplit = splitLength;
        LOG.info("Variable length input; cannot compute number of records in the split");

    }
    this.pos = start;
}

From source file:cascading.avro.AvroScheme.java

License:Apache License

/**
 * This method peeks at the source data to get a schema when none has been provided.
 *
 * @param flowProcess The cascading FlowProcess object for this flow.
 * @param tap         The cascading Tap object.
 * @return Schema The schema of the peeked at data, or Schema.NULL if none exists.
 *///from   w ww  .  j a  v a  2 s  .c  om
private Schema getSourceSchema(FlowProcess<JobConf> flowProcess, Tap tap) throws IOException {

    if (tap instanceof CompositeTap) {
        tap = (Tap) ((CompositeTap) tap).getChildTaps().next();
    }
    final String path = tap.getIdentifier();
    Path p = new Path(path);
    final FileSystem fs = p.getFileSystem(flowProcess.getConfigCopy());
    // Get all the input dirs
    List<FileStatus> statuses = new LinkedList<FileStatus>(Arrays.asList(fs.globStatus(p, filter)));
    // Now get all the things that are one level down
    for (FileStatus status : new LinkedList<FileStatus>(statuses)) {
        if (status.isDir())
            for (FileStatus child : Arrays.asList(fs.listStatus(status.getPath(), filter))) {
                if (child.isDir()) {
                    statuses.addAll(Arrays.asList(fs.listStatus(child.getPath(), filter)));
                } else if (fs.isFile(child.getPath())) {
                    statuses.add(child);
                }
            }
    }
    for (FileStatus status : statuses) {
        Path statusPath = status.getPath();
        if (fs.isFile(statusPath)) {
            // no need to open them all
            InputStream stream = null;
            DataFileStream reader = null;
            try {
                stream = new BufferedInputStream(fs.open(statusPath));
                reader = new DataFileStream(stream, new GenericDatumReader());
                return reader.getSchema();
            } finally {
                if (reader == null) {
                    if (stream != null) {
                        stream.close();
                    }
                } else {
                    reader.close();
                }
            }

        }
    }
    // couldn't find any Avro files, return null schema
    return Schema.create(Schema.Type.NULL);
}

From source file:cascading.scheme.DeprecatedAvroScheme.java

License:Apache License

/**
 * This method peeks at the source data to get a schema when none has been provided.
 *
 * @param flowProcess The cascading FlowProcess object for this flow.
 * @param tap         The cascading Tap object.
 * @return Schema The schema of the peeked at data, or Schema.NULL if none exists.
 *//*  w w w .j  ava 2 s  .c om*/
private Schema getSourceSchema(FlowProcess<? extends Configuration> flowProcess, Tap tap) throws IOException {

    if (tap instanceof CompositeTap) {
        tap = (Tap) ((CompositeTap) tap).getChildTaps().next();
    }
    final String path = tap.getIdentifier();
    Path p = new Path(path);
    final FileSystem fs = p.getFileSystem(flowProcess.getConfigCopy());
    // Get all the input dirs
    List<FileStatus> statuses = new LinkedList<FileStatus>(Arrays.asList(fs.globStatus(p, filter)));
    // Now get all the things that are one level down
    for (FileStatus status : new LinkedList<FileStatus>(statuses)) {
        if (status.isDir())
            for (FileStatus child : Arrays.asList(fs.listStatus(status.getPath(), filter))) {
                if (child.isDir()) {
                    statuses.addAll(Arrays.asList(fs.listStatus(child.getPath(), filter)));
                } else if (fs.isFile(child.getPath())) {
                    statuses.add(child);
                }
            }
    }
    for (FileStatus status : statuses) {
        Path statusPath = status.getPath();
        if (fs.isFile(statusPath)) {
            // no need to open them all
            InputStream stream = null;
            DataFileStream reader = null;
            try {
                stream = new BufferedInputStream(fs.open(statusPath));
                reader = new DataFileStream(stream, new GenericDatumReader());
                return reader.getSchema();
            } finally {
                if (reader == null) {
                    if (stream != null) {
                        stream.close();
                    }
                } else {
                    reader.close();
                }
            }

        }
    }
    // couldn't find any Avro files, return null schema
    return Schema.create(Schema.Type.NULL);
}