List of usage examples for org.apache.hadoop.fs FileSystem open
public FSDataInputStream open(PathHandle fd) throws IOException
From source file:com.cloudera.circus.test.TestXTest.java
License:Open Source License
@Test @TestHadoop// w w w. j ava 2 s. co m public void testHadoopMapReduce() throws Exception { JobConf conf = getHadoopConf(); FileSystem fs = FileSystem.get(conf); JobClient jobClient = new JobClient(conf); try { Path inputDir = new Path(getHadoopTestDir(), "input"); Path outputDir = new Path(getHadoopTestDir(), "output"); fs.mkdirs(inputDir); Writer writer = new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt"))); writer.write("a\n"); writer.write("b\n"); writer.write("c\n"); writer.close(); JobConf jobConf = getHadoopConf(); jobConf.setInt("mapred.map.tasks", 1); jobConf.setInt("mapred.map.max.attempts", 1); jobConf.setInt("mapred.reduce.max.attempts", 1); jobConf.set("mapred.input.dir", inputDir.toString()); jobConf.set("mapred.output.dir", outputDir.toString()); final RunningJob runningJob = jobClient.submitJob(jobConf); waitFor(60 * 1000, true, new Predicate() { @Override public boolean evaluate() throws Exception { return runningJob.isComplete(); } }); Assert.assertTrue(runningJob.isSuccessful()); Assert.assertTrue(fs.exists(new Path(outputDir, "part-00000"))); BufferedReader reader = new BufferedReader( new InputStreamReader(fs.open(new Path(outputDir, "part-00000")))); Assert.assertTrue(reader.readLine().trim().endsWith("a")); Assert.assertTrue(reader.readLine().trim().endsWith("b")); Assert.assertTrue(reader.readLine().trim().endsWith("c")); Assert.assertNull(reader.readLine()); reader.close(); } finally { fs.close(); jobClient.close(); } }
From source file:com.cloudera.crunch.io.text.TextFileReaderFactory.java
License:Open Source License
@Override public Iterator<T> read(FileSystem fs, Path path) { MapFn mapFn = null;//from ww w . j a v a 2s.c o m if (String.class.equals(ptype.getTypeClass())) { mapFn = IdentityFn.getInstance(); } else { // Check for a composite MapFn for the PType. // Note that this won't work for Avro-- need to solve that. MapFn input = ptype.getInputMapFn(); if (input instanceof CompositeMapFn) { mapFn = ((CompositeMapFn) input).getSecond(); } } mapFn.initialize(); FSDataInputStream is = null; try { is = fs.open(path); } catch (IOException e) { LOG.info("Could not read path: " + path, e); return Iterators.emptyIterator(); } final BufferedReader reader = new BufferedReader(new InputStreamReader(is)); final MapFn<String, T> iterMapFn = mapFn; return new UnmodifiableIterator<T>() { private String nextLine; @Override public boolean hasNext() { try { return (nextLine = reader.readLine()) != null; } catch (IOException e) { LOG.info("Exception reading text file stream", e); return false; } } @Override public T next() { return iterMapFn.map(nextLine); } }; }
From source file:com.cloudera.flume.handlers.hdfs.TestDFSWrite.java
License:Apache License
@Test public void testDirectWrite() throws IOException { FlumeConfiguration conf = FlumeConfiguration.get(); Path path = new Path("file:///tmp/testfile"); FileSystem hdfs = path.getFileSystem(conf); hdfs.deleteOnExit(path);//from w w w .j a v a 2 s. co m String STRING = "Hello World"; // writing FSDataOutputStream dos = hdfs.create(path); dos.writeUTF(STRING); dos.close(); // reading FSDataInputStream dis = hdfs.open(path); String s = dis.readUTF(); System.out.println(s); assertEquals(STRING, s); dis.close(); hdfs.close(); }
From source file:com.cloudera.flume.handlers.hive.MarkerStore.java
License:Apache License
private boolean runElasticSearchMarkerQueries() { boolean success = true; FileSystem hdfs; FSDataInputStream in;/*w w w . ja v a 2 s. c o m*/ dstPath = new Path(elasticsearchMarkerFolder); LOG.info("DSTPATH: " + dstPath); try { hdfs = dstPath.getFileSystem(conf); if (hdfs.exists(dstPath)) { FileStatus[] fileListing = hdfs.listStatus(dstPath); for (FileStatus fs : fileListing) { if (!fs.isDir()) { LOG.info("File marker path: " + fs.getPath()); in = hdfs.open(fs.getPath()); byte[] fileData = new byte[(int) fs.getLen()]; in.readFully(fileData); in.close(); LOG.info("cleaning markerfile @: " + fs.getPath().toString()); cleanMarkerFile(fs.getPath().toString()); sendESQuery(elasticsearchUrl, new String(fileData)); } } } } catch (Exception e) { success = false; } return success; }
From source file:com.cloudera.flume.handlers.hive.MarkerStore.java
License:Apache License
private boolean runHiveMarkerQueries() { boolean queryStatus = true; FileSystem hdfs; FSDataInputStream in;/* w w w . j av a2 s . c o m*/ dstPath = new Path(hiveMarkerFolder); LOG.info("DSTPATH: " + dstPath); try { hdfs = dstPath.getFileSystem(conf); if (hdfs.exists(dstPath)) { FileStatus[] fileListing = hdfs.listStatus(dstPath); for (FileStatus fs : fileListing) { if (!fs.isDir()) { LOG.info("File marker path: " + fs.getPath()); in = hdfs.open(fs.getPath()); byte[] fileData = new byte[(int) fs.getLen()]; in.readFully(fileData); String[] splitTab = new String(fileData).split("\t"); if (splitTab.length == 2) { dstPath = new Path(splitTab[0]); FileSystem hiveFile = dstPath.getFileSystem(conf); if (hiveFile.exists(dstPath)) { LOG.info("marker file data: " + splitTab[1]); if (runHiveQuery(splitTab[1])) { LOG.info("Marker query is successful"); in.close(); cleanMarkerFile(fs.getPath().toString()); } else { LOG.info("Error running marker query, marker point not deleted"); queryStatus = false; } } else { LOG.info("marker points to invalid hive file location, deleting the marker"); in.close(); cleanMarkerFile(fs.getPath().toString()); } } //in.close(); } } } hdfs.close(); } catch (IOException e) { LOG.error("ERROR running runMarkerQueries:" + e.getMessage()); } return queryStatus; }
From source file:com.cloudera.flume.handlers.hive.MarkerStore.java
License:Apache License
public boolean mergeFiles(String folder, Path file, String hiveOutputLocation) { FileSystem hdfs; FSDataInputStream in;//from w ww . j a va 2 s. co m FSDataOutputStream out; List<Path> fileCollection = new ArrayList<Path>(); dstPath = new Path(folder); LOG.info("mergeFiles DSTPATH: " + dstPath); try { hdfs = dstPath.getFileSystem(conf); if (hdfs.exists(dstPath)) { FileStatus[] fileListing = hdfs.listStatus(dstPath); LOG.error("Creating file @: " + hiveOutputLocation); out = hdfs.create(new Path(hiveOutputLocation)); in = hdfs.open(file); byte[] fileData = new byte[(int) hdfs.getFileStatus(file).getLen()]; in.readFully(fileData); out.write(fileData); for (FileStatus fs : fileListing) { if (!fs.isDir()) { LOG.info("mergeFiles File marker path: " + fs.getPath()); fileCollection.add(fs.getPath()); in = hdfs.open(fs.getPath()); fileData = new byte[(int) fs.getLen()]; in.readFully(fileData); out.write(fileData); } } out.close(); } hdfs.close(); LOG.error("Written file: " + hiveOutputLocation); //lets start the purge process, delete all files except the merged file hdfs = dstPath.getFileSystem(conf); for (Path p : fileCollection) { if (hdfs.delete(p, false)) { LOG.error("Successfully deleted: " + p); } else { LOG.error("Error deleting file: " + p); } } } catch (IOException e) { LOG.error("ERROR running runMarkerQueries:" + e.getMessage()); } LOG.error("mergeFiles Done merging files"); return false; }
From source file:com.cloudera.fts.spark.format.RawFileRecordReader.java
License:Apache License
@Override public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); FileSplit split = (FileSplit) inputSplit; Path path = split.getPath();//from w ww . j a v a 2 s . co m FileSystem fs = path.getFileSystem(conf); fileIn = fs.open(path); key = new Text(path.toString()); finished = false; }
From source file:com.cloudera.hadoop.hdfs.nfs.nfs4.NFS4Handler.java
License:Apache License
/** * Open if not open or obtain the input stream opened by the StateID. * * @param stateID// w ww . j a va 2 s . c o m * @param fs * @param fileHandle * @return FSDataInputStream for reading * @throws NFS4Exception if the file is already open for write, the open is * not confirmed or the file handle is stale. * @throws IOException if the file open throws an IOException */ public synchronized FSDataInputStream forRead(StateID stateID, FileSystem fs, FileHandle fileHandle) throws NFS4Exception, IOException { FileHolder fileHolder = mFileHandleMap.get(fileHandle); if (fileHolder != null) { if (fileHolder.isOpenForWrite()) { throw new NFS4Exception(NFS4ERR_FILE_OPEN); // TODO lock unavailable // should be _LOCK? } Path path = new Path(fileHolder.getPath()); OpenFile<FSDataInputStream> file = fileHolder.getFSDataInputStream(stateID); if (file != null) { if (!file.isConfirmed()) { throw new NFS4Exception(NFS4ERR_DENIED); } return file.get(); } FileStatus status = fs.getFileStatus(path); if (status.isDir()) { throw new NFS4Exception(NFS4ERR_ISDIR); } FSDataInputStream in = fs.open(path); this.incrementMetric("FILES_OPENED_READ", 1); fileHolder.putFSDataInputStream(stateID, in); return in; } throw new NFS4Exception(NFS4ERR_STALE); }
From source file:com.cloudera.hadoop.hdfs.nfs.nfs4.state.HDFSState.java
License:Apache License
/** * Open a file for read.//from ww w. ja va2 s. c o m * * @param stateID * @param fileHandle * @return HDFSInputStream resource allocated * @throws NFS4Exception * @throws IOException */ public synchronized HDFSInputStream openForRead(FileSystem fs, StateID stateID, FileHandle fileHandle) throws NFS4Exception, IOException { HDFSFile hdfsFile = mOpenFilesMap.get(fileHandle); if ((hdfsFile != null) && hdfsFile.isOpenForWrite()) { throw new NFS4Exception(NFS4ERR_FILE_OPEN); // TODO lock unavailable // should be _LOCK? } INode inode = mFileHandleINodeMap.getINodeByFileHandle(fileHandle); if (inode == null) { throw new NFS4Exception(NFS4ERR_STALE); } Path path = new Path(inode.getPath()); FileStatus status = fs.getFileStatus(path); if (status.isDir()) { throw new NFS4Exception(NFS4ERR_ISDIR); } HDFSInputStream in = new HDFSInputStream(fs.open(path)); mMetrics.incrementMetric(FILES_OPENED_READ, 1); if (hdfsFile == null) { hdfsFile = new HDFSFile(fileHandle, inode.getPath(), inode.getNumber()); mOpenFilesMap.put(fileHandle, hdfsFile); } hdfsFile.putInputStream(stateID, in); return in; }
From source file:com.cloudera.HdfsClientHeapBench.java
License:Apache License
public static void main(String[] args) throws Exception { System.out.println("running HdfsClientHeapBench: benchmarks " + "input stream size in Hadoop...\n"); final int NUM_OPENS = 50000; if (args.length < 1) { System.err.println("You must specify a single argument: the URI " + "of a directory to test.\n" + "Examples: file:///tmp, hdfs:///\n"); System.exit(1);/*from w w w .jav a 2 s .c om*/ } final String uri = args[0]; Configuration conf = new Configuration(); conf.setBoolean("dfs.client.read.shortcircuit", false); FSDataInputStream[] streams = new FSDataInputStream[NUM_OPENS]; try { FileSystem dfs = FileSystem.get(new URI(uri), conf); final Path TEST_PATH = new Path("/testFile"); createFile(dfs, TEST_PATH, 131072); for (int i = 0; i < NUM_OPENS; i++) { streams[i] = dfs.open(TEST_PATH); System.out.println("opening file " + i + "..."); if (0 != streams[i].read()) { throw new IOException("failed to read a byte from stream " + i + ": unexpected EOF."); } streams[i].unbuffer(); } // Sleep for a long time so we can run jmat to get a heap dump Thread.sleep(9000000L); } finally { for (FSDataInputStream stream : streams) { try { if (stream != null) { stream.close(); } } catch (IOException e) { System.out.println("error closing stream: " + e.getMessage()); } } } }