Example usage for org.apache.hadoop.fs FileSystem open

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem open.

Prototype

public FSDataInputStream open(PathHandle fd) throws IOException

Source Link

Document

Open an FSDataInputStream matching the PathHandle instance.

Usage

From source file:com.cloudera.circus.test.TestXTest.java

License:Open Source License

@Test
@TestHadoop//  w  w w. j  ava 2 s. co  m
public void testHadoopMapReduce() throws Exception {
    JobConf conf = getHadoopConf();
    FileSystem fs = FileSystem.get(conf);
    JobClient jobClient = new JobClient(conf);
    try {
        Path inputDir = new Path(getHadoopTestDir(), "input");
        Path outputDir = new Path(getHadoopTestDir(), "output");

        fs.mkdirs(inputDir);
        Writer writer = new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
        writer.write("a\n");
        writer.write("b\n");
        writer.write("c\n");
        writer.close();

        JobConf jobConf = getHadoopConf();
        jobConf.setInt("mapred.map.tasks", 1);
        jobConf.setInt("mapred.map.max.attempts", 1);
        jobConf.setInt("mapred.reduce.max.attempts", 1);
        jobConf.set("mapred.input.dir", inputDir.toString());
        jobConf.set("mapred.output.dir", outputDir.toString());
        final RunningJob runningJob = jobClient.submitJob(jobConf);
        waitFor(60 * 1000, true, new Predicate() {
            @Override
            public boolean evaluate() throws Exception {
                return runningJob.isComplete();
            }
        });
        Assert.assertTrue(runningJob.isSuccessful());
        Assert.assertTrue(fs.exists(new Path(outputDir, "part-00000")));
        BufferedReader reader = new BufferedReader(
                new InputStreamReader(fs.open(new Path(outputDir, "part-00000"))));
        Assert.assertTrue(reader.readLine().trim().endsWith("a"));
        Assert.assertTrue(reader.readLine().trim().endsWith("b"));
        Assert.assertTrue(reader.readLine().trim().endsWith("c"));
        Assert.assertNull(reader.readLine());
        reader.close();
    } finally {
        fs.close();
        jobClient.close();
    }
}

From source file:com.cloudera.crunch.io.text.TextFileReaderFactory.java

License:Open Source License

@Override
public Iterator<T> read(FileSystem fs, Path path) {
    MapFn mapFn = null;//from  ww w  . j a v a  2s.c  o m
    if (String.class.equals(ptype.getTypeClass())) {
        mapFn = IdentityFn.getInstance();
    } else {
        // Check for a composite MapFn for the PType.
        // Note that this won't work for Avro-- need to solve that.
        MapFn input = ptype.getInputMapFn();
        if (input instanceof CompositeMapFn) {
            mapFn = ((CompositeMapFn) input).getSecond();
        }
    }
    mapFn.initialize();

    FSDataInputStream is = null;
    try {
        is = fs.open(path);
    } catch (IOException e) {
        LOG.info("Could not read path: " + path, e);
        return Iterators.emptyIterator();
    }

    final BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    final MapFn<String, T> iterMapFn = mapFn;
    return new UnmodifiableIterator<T>() {
        private String nextLine;

        @Override
        public boolean hasNext() {
            try {
                return (nextLine = reader.readLine()) != null;
            } catch (IOException e) {
                LOG.info("Exception reading text file stream", e);
                return false;
            }
        }

        @Override
        public T next() {
            return iterMapFn.map(nextLine);
        }
    };
}

From source file:com.cloudera.flume.handlers.hdfs.TestDFSWrite.java

License:Apache License

@Test
public void testDirectWrite() throws IOException {
    FlumeConfiguration conf = FlumeConfiguration.get();

    Path path = new Path("file:///tmp/testfile");
    FileSystem hdfs = path.getFileSystem(conf);
    hdfs.deleteOnExit(path);//from w  w w .j  a  v a  2  s. co  m

    String STRING = "Hello World";

    // writing
    FSDataOutputStream dos = hdfs.create(path);
    dos.writeUTF(STRING);
    dos.close();

    // reading
    FSDataInputStream dis = hdfs.open(path);
    String s = dis.readUTF();
    System.out.println(s);
    assertEquals(STRING, s);

    dis.close();

    hdfs.close();
}

From source file:com.cloudera.flume.handlers.hive.MarkerStore.java

License:Apache License

private boolean runElasticSearchMarkerQueries() {
    boolean success = true;
    FileSystem hdfs;
    FSDataInputStream in;/*w w w .  ja  v  a 2 s. c  o m*/
    dstPath = new Path(elasticsearchMarkerFolder);
    LOG.info("DSTPATH: " + dstPath);
    try {
        hdfs = dstPath.getFileSystem(conf);
        if (hdfs.exists(dstPath)) {
            FileStatus[] fileListing = hdfs.listStatus(dstPath);
            for (FileStatus fs : fileListing) {
                if (!fs.isDir()) {
                    LOG.info("File marker path: " + fs.getPath());
                    in = hdfs.open(fs.getPath());
                    byte[] fileData = new byte[(int) fs.getLen()];
                    in.readFully(fileData);
                    in.close();
                    LOG.info("cleaning markerfile @: " + fs.getPath().toString());
                    cleanMarkerFile(fs.getPath().toString());
                    sendESQuery(elasticsearchUrl, new String(fileData));

                }
            }
        }
    } catch (Exception e) {
        success = false;
    }
    return success;
}

From source file:com.cloudera.flume.handlers.hive.MarkerStore.java

License:Apache License

private boolean runHiveMarkerQueries() {
    boolean queryStatus = true;
    FileSystem hdfs;
    FSDataInputStream in;/*  w  w  w .  j av  a2 s .  c  o  m*/
    dstPath = new Path(hiveMarkerFolder);
    LOG.info("DSTPATH: " + dstPath);
    try {
        hdfs = dstPath.getFileSystem(conf);
        if (hdfs.exists(dstPath)) {
            FileStatus[] fileListing = hdfs.listStatus(dstPath);
            for (FileStatus fs : fileListing) {
                if (!fs.isDir()) {
                    LOG.info("File marker path: " + fs.getPath());
                    in = hdfs.open(fs.getPath());
                    byte[] fileData = new byte[(int) fs.getLen()];
                    in.readFully(fileData);
                    String[] splitTab = new String(fileData).split("\t");
                    if (splitTab.length == 2) {
                        dstPath = new Path(splitTab[0]);
                        FileSystem hiveFile = dstPath.getFileSystem(conf);
                        if (hiveFile.exists(dstPath)) {
                            LOG.info("marker file data: " + splitTab[1]);
                            if (runHiveQuery(splitTab[1])) {
                                LOG.info("Marker query is successful");
                                in.close();
                                cleanMarkerFile(fs.getPath().toString());
                            } else {
                                LOG.info("Error running marker query, marker point not deleted");
                                queryStatus = false;
                            }

                        } else {
                            LOG.info("marker points to invalid hive file location, deleting the marker");
                            in.close();
                            cleanMarkerFile(fs.getPath().toString());
                        }
                    }
                    //in.close();
                }
            }
        }
        hdfs.close();
    } catch (IOException e) {
        LOG.error("ERROR running runMarkerQueries:" + e.getMessage());
    }

    return queryStatus;
}

From source file:com.cloudera.flume.handlers.hive.MarkerStore.java

License:Apache License

public boolean mergeFiles(String folder, Path file, String hiveOutputLocation) {
    FileSystem hdfs;
    FSDataInputStream in;//from  w  ww . j  a  va 2 s. co  m
    FSDataOutputStream out;
    List<Path> fileCollection = new ArrayList<Path>();
    dstPath = new Path(folder);
    LOG.info("mergeFiles DSTPATH: " + dstPath);
    try {
        hdfs = dstPath.getFileSystem(conf);

        if (hdfs.exists(dstPath)) {
            FileStatus[] fileListing = hdfs.listStatus(dstPath);
            LOG.error("Creating file @: " + hiveOutputLocation);
            out = hdfs.create(new Path(hiveOutputLocation));

            in = hdfs.open(file);
            byte[] fileData = new byte[(int) hdfs.getFileStatus(file).getLen()];
            in.readFully(fileData);
            out.write(fileData);

            for (FileStatus fs : fileListing) {
                if (!fs.isDir()) {
                    LOG.info("mergeFiles File marker path: " + fs.getPath());
                    fileCollection.add(fs.getPath());
                    in = hdfs.open(fs.getPath());
                    fileData = new byte[(int) fs.getLen()];
                    in.readFully(fileData);
                    out.write(fileData);
                }
            }
            out.close();
        }

        hdfs.close();
        LOG.error("Written file: " + hiveOutputLocation);

        //lets start the purge process, delete all files except the merged file
        hdfs = dstPath.getFileSystem(conf);
        for (Path p : fileCollection) {
            if (hdfs.delete(p, false)) {
                LOG.error("Successfully deleted: " + p);
            } else {
                LOG.error("Error deleting file: " + p);
            }
        }

    } catch (IOException e) {
        LOG.error("ERROR running runMarkerQueries:" + e.getMessage());
    }
    LOG.error("mergeFiles Done merging files");
    return false;
}

From source file:com.cloudera.fts.spark.format.RawFileRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {

    Configuration conf = context.getConfiguration();
    FileSplit split = (FileSplit) inputSplit;
    Path path = split.getPath();//from  w  ww  .  j a  v  a 2 s . co m
    FileSystem fs = path.getFileSystem(conf);
    fileIn = fs.open(path);
    key = new Text(path.toString());
    finished = false;
}

From source file:com.cloudera.hadoop.hdfs.nfs.nfs4.NFS4Handler.java

License:Apache License

/**
 * Open if not open or obtain the input stream opened by the StateID.
 *
 * @param stateID// w ww . j a  va 2 s  . c o m
 * @param fs
 * @param fileHandle
 * @return FSDataInputStream for reading
 * @throws NFS4Exception if the file is already open for write, the open is
 * not confirmed or the file handle is stale.
 * @throws IOException if the file open throws an IOException
 */
public synchronized FSDataInputStream forRead(StateID stateID, FileSystem fs, FileHandle fileHandle)
        throws NFS4Exception, IOException {
    FileHolder fileHolder = mFileHandleMap.get(fileHandle);
    if (fileHolder != null) {
        if (fileHolder.isOpenForWrite()) {
            throw new NFS4Exception(NFS4ERR_FILE_OPEN); // TODO lock unavailable
            // should be _LOCK?
        }
        Path path = new Path(fileHolder.getPath());
        OpenFile<FSDataInputStream> file = fileHolder.getFSDataInputStream(stateID);
        if (file != null) {
            if (!file.isConfirmed()) {
                throw new NFS4Exception(NFS4ERR_DENIED);
            }
            return file.get();
        }
        FileStatus status = fs.getFileStatus(path);
        if (status.isDir()) {
            throw new NFS4Exception(NFS4ERR_ISDIR);
        }
        FSDataInputStream in = fs.open(path);
        this.incrementMetric("FILES_OPENED_READ", 1);
        fileHolder.putFSDataInputStream(stateID, in);
        return in;
    }
    throw new NFS4Exception(NFS4ERR_STALE);
}

From source file:com.cloudera.hadoop.hdfs.nfs.nfs4.state.HDFSState.java

License:Apache License

/**
 * Open a file for read.//from  ww w.  ja  va2 s.  c  o  m
 *
 * @param stateID
 * @param fileHandle
 * @return HDFSInputStream resource allocated
 * @throws NFS4Exception
 * @throws IOException
 */
public synchronized HDFSInputStream openForRead(FileSystem fs, StateID stateID, FileHandle fileHandle)
        throws NFS4Exception, IOException {
    HDFSFile hdfsFile = mOpenFilesMap.get(fileHandle);
    if ((hdfsFile != null) && hdfsFile.isOpenForWrite()) {
        throw new NFS4Exception(NFS4ERR_FILE_OPEN); // TODO lock unavailable
        // should be _LOCK?
    }
    INode inode = mFileHandleINodeMap.getINodeByFileHandle(fileHandle);
    if (inode == null) {
        throw new NFS4Exception(NFS4ERR_STALE);
    }
    Path path = new Path(inode.getPath());
    FileStatus status = fs.getFileStatus(path);
    if (status.isDir()) {
        throw new NFS4Exception(NFS4ERR_ISDIR);
    }
    HDFSInputStream in = new HDFSInputStream(fs.open(path));
    mMetrics.incrementMetric(FILES_OPENED_READ, 1);
    if (hdfsFile == null) {
        hdfsFile = new HDFSFile(fileHandle, inode.getPath(), inode.getNumber());
        mOpenFilesMap.put(fileHandle, hdfsFile);
    }
    hdfsFile.putInputStream(stateID, in);
    return in;
}

From source file:com.cloudera.HdfsClientHeapBench.java

License:Apache License

public static void main(String[] args) throws Exception {
    System.out.println("running HdfsClientHeapBench: benchmarks " + "input stream size in Hadoop...\n");
    final int NUM_OPENS = 50000;
    if (args.length < 1) {
        System.err.println("You must specify a single argument: the URI " + "of a directory to test.\n"
                + "Examples: file:///tmp, hdfs:///\n");
        System.exit(1);/*from w w  w .jav  a  2 s  .c  om*/
    }
    final String uri = args[0];
    Configuration conf = new Configuration();
    conf.setBoolean("dfs.client.read.shortcircuit", false);
    FSDataInputStream[] streams = new FSDataInputStream[NUM_OPENS];
    try {
        FileSystem dfs = FileSystem.get(new URI(uri), conf);
        final Path TEST_PATH = new Path("/testFile");
        createFile(dfs, TEST_PATH, 131072);
        for (int i = 0; i < NUM_OPENS; i++) {
            streams[i] = dfs.open(TEST_PATH);
            System.out.println("opening file " + i + "...");
            if (0 != streams[i].read()) {
                throw new IOException("failed to read a byte from stream " + i + ": unexpected EOF.");
            }
            streams[i].unbuffer();
        }
        // Sleep for a long time so we can run jmat to get a heap dump
        Thread.sleep(9000000L);
    } finally {
        for (FSDataInputStream stream : streams) {
            try {
                if (stream != null) {
                    stream.close();
                }
            } catch (IOException e) {
                System.out.println("error closing stream: " + e.getMessage());
            }
        }
    }
}