Example usage for org.apache.hadoop.fs FSDataInputStream read

List of usage examples for org.apache.hadoop.fs FSDataInputStream read

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FSDataInputStream read.

Prototype

@Override
    public int read(ByteBuffer buf) throws IOException 

Source Link

Usage

From source file:SingleFileReader.java

License:Apache License

private void seqRead() throws Exception {
    //bufferSize = 4; /*Tachyon reads one int a time*/
    FSDataInputStream is = fs.open(hdfsFilePath);
    // byte[] bbuf = new byte[bufferSize];
    ByteBuffer buf = ByteBuffer.allocate(bufferSize);
    t.start(2);//from   www .  ja  va 2 s . com
    int bytesRead = is.read(buf);
    buf.flip();
    while (bytesRead != -1) {
        bytesRead = is.read(buf);
        buf.flip();
    }
    t.end(2);
    is.close();
}

From source file:SingleFileReader.java

License:Apache License

private void randRead() throws Exception {
    //bufferSize = 4; /*Tachyon reads one int a time*/
    FSDataInputStream is = fs.open(hdfsFilePath);
    // byte[] bbuf = new byte[bufferSize];
    ByteBuffer buf = ByteBuffer.allocate(bufferSize);
    double offsetMax = fileSize - bufferSize - 1;
    long offset = (long) (Math.random() * offsetMax);
    long numIters = (long) (fileSize / bufferSize);
    t.start(2);//w  w  w .  j  av a  2s  .  co m
    while (numIters != 0) {
        /*
        if (numIters % 500 == 0) {
        System.out.println(offset);
        }
        */
        is.seek(offset);
        int bytesRead = is.read(buf);
        buf.flip();
        offset = (long) (Math.random() * offsetMax);
        numIters = numIters - 1;
    }
    t.end(2);
    is.close();
}

From source file:Assignment3_P2_MergeStockAverageCount.StockPriceMergeDriver.java

/**
 * @param args the command line arguments
 *///from   w ww.jav  a 2  s. c  o m
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();

    // local file system handle
    FileSystem local = FileSystem.getLocal(conf);

    // hdfs file system handle
    FileSystem hdfs = FileSystem.get(conf);

    // local input directory
    Path inputDir = new Path(args[0]);

    // hdfs i/p  directory
    Path inputDir1 = new Path(args[1]);

    // local input files in local dir
    FileStatus[] inputFiles = local.listStatus(inputDir);

    // o/p stream
    FSDataOutputStream out = hdfs.create(inputDir1);

    // open each file and extract contents of file
    for (int i = 0; i < inputFiles.length; i++) {
        System.out.println("File name ----------------------------------------------------------------> "
                + inputFiles[i].getPath().getName());
        FSDataInputStream in = local.open(inputFiles[i].getPath());
        byte buffer[] = new byte[256];
        int bytesRead = 0;

        // extract all contents of file
        while ((bytesRead = in.read(buffer)) > 0) {
            out.write(buffer, 0, bytesRead);
        }

        // close input stream
        in.close();
    }

    Job job = Job.getInstance(conf, "Average Stock Price");
    job.setJarByClass(StockPriceMergeDriver.class);
    job.setMapperClass(StockPriceMerge_Mapper.class);
    job.setCombinerClass(StockPriceMerge_Reducer.class);
    job.setReducerClass(StockPriceMerge_Reducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(FloatWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[1])); // above programs output will be input for mapper
    FileOutputFormat.setOutputPath(job, new Path(args[2]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:audr.text.utils.FileUtils.java

License:Open Source License

public static byte[] HDFSFile2ByteArray(FSDataInputStream image) throws IOException {
    image.seek(0);/* w  ww.  j  a  v  a  2  s. c o  m*/
    // BufferedInputStream in = new BufferedInputStream(image);
    ByteArrayOutputStream out = new ByteArrayOutputStream(1024);

    // System.out.println("Available bytes:" + in.available());
    byte[] temp = new byte[1024];
    int size = 0;
    while ((size = image.read(temp)) > 0) {
        out.write(temp, 0, size);
    }

    byte[] content = out.toByteArray();
    // System.out.println("Readed bytes count:" + new String(content));
    return content;
}

From source file:audr.text.utils.FileUtils.java

License:Open Source License

public static String readTextFromHDFS(Path filePath) throws IOException {

    // Configuration
    Configuration conf = new Configuration();
    // DataInputStream
    FSDataInputStream in = null;
    // byteFSDataInputStream
    byte[] line;//from   www. j  av a2 s  .c  om

    // 
    FileSystem fs = FileSystem.get(conf);
    // FileSystemopen
    in = fs.open(filePath);
    line = new byte[MAX_LENGTH];
    in.read(line);

    return new String(line).trim();

}

From source file:bigsatgps.BigDataHandler.java

License:Open Source License

/**
 *
 * @param infile/*from w  ww .j a v  a  2  s.  c o  m*/
 * @return
 * @throws Exception
 */
public String ImageToSequence(String infile) throws Exception {
    String log4jConfPath = "lib/log4j.properties";
    PropertyConfigurator.configure(log4jConfPath);
    confHadoop = new Configuration();
    confHadoop.addResource(new Path("/hadoop/projects/hadoop-1.0.4/conf/core-site.xml"));
    confHadoop.addResource(new Path("/hadoop/projects/hadoop-1.0.4/conf/hdfs-site.xml"));
    FileSystem fs = FileSystem.get(confHadoop);
    Path inPath = new Path(infile);
    String outfile = infile.substring(0, infile.indexOf(".")) + ".seq";
    Path outPath = new Path(outfile);
    System.out.println();
    System.out.println("Successfully created the sequencefile " + outfile);
    FSDataInputStream in = null;
    Text key = new Text();
    BytesWritable value = new BytesWritable();
    SequenceFile.Writer writer = null;
    try {
        in = fs.open(inPath);
        byte buffer[] = new byte[in.available()];
        in.read(buffer);
        writer = SequenceFile.createWriter(fs, confHadoop, outPath, key.getClass(), value.getClass());
        writer.append(new Text(inPath.getName()), new BytesWritable(buffer));
        IOUtils.closeStream(writer);
        return outfile;
    } catch (IOException e) {
        System.err.println("Exception MESSAGES = " + e.getMessage());
        IOUtils.closeStream(writer);
        return null;
    }
}

From source file:boa.functions.BoaIntrinsics.java

License:Apache License

/**
 * Given the model URL, deserialize the model and return Model type
 *
 * @param Take URL for the model// w w  w .jav a  2s.c om
 * @return Model type after deserializing
 */
// TODO Take complete URL and then deserialize the model
// FIXME Returning Object as a type, this needs to be changed once we defined Model Type
@FunctionSpec(name = "load", returnType = "Model", formalParameters = { "string" })
public static Object load(final String URL) throws Exception {
    Object unserializedObject = null;
    FSDataInputStream in = null;
    try {
        final Configuration conf = new Configuration();
        final FileSystem fileSystem = FileSystem.get(conf);
        final Path path = new Path("hdfs://boa-njt" + URL);

        if (in != null)
            try {
                in.close();
            } catch (final Exception e) {
                e.printStackTrace();
            }

        in = fileSystem.open(path);
        int numBytes = 0;
        final byte[] b = new byte[(int) fileSystem.getLength(path) + 1];
        long length = 0;

        in.read(b);

        ByteArrayInputStream bin = new ByteArrayInputStream(b);
        ObjectInputStream dataIn = new ObjectInputStream(bin);
        unserializedObject = dataIn.readObject();
        dataIn.close();
    } catch (Exception ex) {
    }
    return unserializedObject;
}

From source file:boa.io.BoaOutputCommitter.java

License:Apache License

private void storeOutput(final JobContext context, final int jobId) {
    if (jobId == 0)
        return;//w w w  .  j a  va  2  s. co m

    Connection con = null;
    FileSystem fileSystem = null;
    FSDataInputStream in = null;
    FSDataOutputStream out = null;

    try {
        fileSystem = outputPath.getFileSystem(context.getConfiguration());

        con = DriverManager.getConnection(url, user, password);

        PreparedStatement ps = null;
        try {
            ps = con.prepareStatement("INSERT INTO boa_output (id, length) VALUES (" + jobId + ", 0)");
            ps.executeUpdate();
        } catch (final Exception e) {
        } finally {
            try {
                if (ps != null)
                    ps.close();
            } catch (final Exception e) {
                e.printStackTrace();
            }
        }

        fileSystem.mkdirs(new Path("/boa", new Path("" + jobId)));
        out = fileSystem.create(new Path("/boa", new Path("" + jobId, new Path("output.txt"))));

        int partNum = 0;

        final byte[] b = new byte[64 * 1024 * 1024];
        long length = 0;
        boolean hasWebResult = false;

        while (true) {
            final Path path = new Path(outputPath, "part-r-" + String.format("%05d", partNum++));
            if (!fileSystem.exists(path))
                break;

            if (in != null)
                try {
                    in.close();
                } catch (final Exception e) {
                    e.printStackTrace();
                }
            in = fileSystem.open(path);

            int numBytes = 0;

            while ((numBytes = in.read(b)) > 0) {
                if (!hasWebResult) {
                    hasWebResult = true;

                    try {
                        ps = con.prepareStatement("UPDATE boa_output SET web_result=? WHERE id=" + jobId);
                        int webSize = 64 * 1024 - 1;
                        ps.setString(1, new String(b, 0, numBytes < webSize ? numBytes : webSize));
                        ps.executeUpdate();
                    } finally {
                        try {
                            if (ps != null)
                                ps.close();
                        } catch (final Exception e) {
                            e.printStackTrace();
                        }
                    }
                }
                out.write(b, 0, numBytes);
                length += numBytes;

                this.context.progress();
            }
        }

        try {
            ps = con.prepareStatement("UPDATE boa_output SET length=? WHERE id=" + jobId);
            ps.setLong(1, length);
            ps.executeUpdate();
        } finally {
            try {
                if (ps != null)
                    ps.close();
            } catch (final Exception e) {
                e.printStackTrace();
            }
        }
    } catch (final Exception e) {
        e.printStackTrace();
    } finally {
        try {
            if (con != null)
                con.close();
        } catch (final Exception e) {
            e.printStackTrace();
        }
        try {
            if (in != null)
                in.close();
        } catch (final Exception e) {
            e.printStackTrace();
        }
        try {
            if (out != null)
                out.close();
        } catch (final Exception e) {
            e.printStackTrace();
        }
        try {
            if (fileSystem != null)
                fileSystem.close();
        } catch (final Exception e) {
            e.printStackTrace();
        }
    }
}

From source file:br.ufrj.nce.recureco.distributedindex.search.controller.DocumentViewerServlet.java

License:Open Source License

protected void doGet(javax.servlet.http.HttpServletRequest request,
        javax.servlet.http.HttpServletResponse response) throws javax.servlet.ServletException, IOException {

    String doc = request.getParameter("doc");

    if (doc != null && doc.trim().length() > 0) {

        try {/* w w w .  j a v a2 s. co m*/

            String filePath = DIR_DOWNLOAD + doc;

            Configuration conf = new Configuration();

            conf.addResource(new Path(DIR_HADOOP_CONF + "core-site.xml"));
            conf.addResource(new Path(DIR_HADOOP_CONF + "hdfs-site.xml"));
            conf.addResource(new Path(DIR_HADOOP_CONF + "mapred-site.xml"));

            FileSystem fileSystem = FileSystem.get(conf);

            Path path = new Path(filePath);
            if (!fileSystem.exists(path)) {
                response.getWriter().print("File not found.");
                return;
            }

            FSDataInputStream in = fileSystem.open(path);

            response.setContentType("text/plain");

            int read = 0;
            byte[] bytes = new byte[BYTES_DOWNLOAD];
            OutputStream os = response.getOutputStream();

            while ((read = in.read(bytes)) != -1) {
                os.write(bytes, 0, read);
            }
            os.flush();
            os.close();
        } catch (FileNotFoundException e) {
            response.getWriter().print("File not found.");
        }

    } else {
        //print invalid document
        response.getWriter().print("File not informed.");
    }

}

From source file:com.alibaba.jstorm.hdfs.HdfsCache.java

License:Apache License

public byte[] read(String dstPath) throws IOException {
    Path path = new Path(dstPath);
    if (!fs.exists(path)) {
        throw new IOException(dstPath + " is not exist!");
    }/*w  ww  .j  a  v  a 2 s  .  co  m*/

    FSDataInputStream in = null;
    try {
        in = fs.open(path);
        LOG.debug("Try to read data from file-{}, dataLen={}", dstPath, in.available());
        ByteBuffer buf = ByteBuffer.allocate(in.available());
        in.read(buf);
        return buf.array();
    } finally {
        if (in != null)
            in.close();
    }
}