List of usage examples for org.apache.hadoop.fs FileSystem open
public FSDataInputStream open(PathHandle fd) throws IOException
From source file:com.jkoolcloud.tnt4j.streams.inputs.HdfsFileLineStreamTest.java
License:Apache License
@Test() public void test() throws Exception { FileSystem fs = mock(FileSystem.class); HdfsFileLineStream stream = new HdfsFileLineStream(); TestFileList files = new TestFileList(false); final String fileName = ("file:////" + files.get(0).getParentFile() + File.separator + files.getPrefix() // NON-NLS + "*.TST").replace("\\", "/"); // NON-NLS Map<String, String> props = new HashMap<>(2); props.put(StreamProperties.PROP_FILENAME, fileName); props.put(StreamProperties.PROP_RESTORE_STATE, "false"); // NON-NLS when(fs.open(any(Path.class))).thenReturn(new FSDataInputStream(new TestInputStreamStub())); final FileStatus fileStatusMock = mock(FileStatus.class); final FileStatus[] array = new FileStatus[10]; Arrays.fill(array, fileStatusMock); when(fs.listStatus(any(Path.class), any(PathFilter.class))).thenReturn(array); when(fileStatusMock.getModificationTime()).thenReturn(1L, 2L, 3L); when(fileStatusMock.getPath()).thenReturn(mock(Path.class)); when(fs.getContentSummary(any(Path.class))).thenReturn(mock(ContentSummary.class)); Method m = FileSystem.class.getDeclaredMethod("addFileSystemForTesting", URI.class, Configuration.class, // NON-NLS FileSystem.class); m.setAccessible(true);// w w w .j a v a 2 s.c om m.invoke(FileSystem.class, URI.create(fileName), new Configuration(), fs); StreamThread st = mock(StreamThread.class); st.setName("HdfsFileLineStreamTestThreadName"); // NON-NLS stream.setOwnerThread(st); stream.setProperties(props.entrySet()); stream.startStream(); verify(fileStatusMock, atLeastOnce()).getModificationTime(); verify(fileStatusMock, atLeastOnce()).getPath(); verify(fs, atLeastOnce()).listStatus(any(Path.class), any(PathFilter.class)); stream.cleanup(); }
From source file:com.kadwa.hadoop.DistExec.java
License:Open Source License
private static List<Path> fetchFileList(Configuration conf, Path srcList) throws IOException { List<Path> result = new ArrayList<Path>(); FileSystem fs = srcList.getFileSystem(conf); BufferedReader input = null;// w ww . ja va 2s. co m try { input = new BufferedReader(new InputStreamReader(fs.open(srcList))); String line = input.readLine(); while (line != null) { result.add(new Path(line)); line = input.readLine(); } } finally { checkAndClose(input); } return result; }
From source file:com.knewton.mapreduce.SSTableRecordReader.java
License:Apache License
/** * Copies a remote path to the local filesystem, while updating hadoop that we're making * progress. Doesn't support directories. */// w w w . ja v a 2 s . co m @VisibleForTesting void copyToLocalFile(FileSystem remoteFS, FileSystem localFS, Path remote, Path local) throws IOException { // don't support transferring from remote directories FileStatus remoteStat = remoteFS.getFileStatus(remote); Preconditions.checkArgument(!remoteStat.isDirectory(), String.format("Path %s is directory!", remote)); // if local is a dir, copy to inside that dir, like 'cp /path/file /tmp/' would do if (localFS.exists(local)) { FileStatus localStat = localFS.getFileStatus(local); if (localStat.isDirectory()) { local = new Path(local, remote.getName()); } } long remoteFileSize = remoteStat.getLen(); // do actual copy InputStream in = null; OutputStream out = null; try { long startTime = System.currentTimeMillis(); long lastLogTime = 0; long bytesCopied = 0; in = remoteFS.open(remote); out = localFS.create(local, true); int buffSize = this.conf.getInt(CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY, CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_DEFAULT); byte[] buf = new byte[buffSize]; int bytesRead = in.read(buf); while (bytesRead >= 0) { long now = System.currentTimeMillis(); // log transfer rate once per min, starting 1 min after transfer began if (now - lastLogTime > 60000L && now - startTime > 60000L) { double elapsedSec = (now - startTime) / 1000D; double bytesPerSec = bytesCopied / elapsedSec; LOG.info("Transferred {} of {} bytes at {} bytes per second", bytesCopied, remoteFileSize, bytesPerSec); lastLogTime = now; } this.ctx.progress(); out.write(buf, 0, bytesRead); bytesCopied += bytesRead; bytesRead = in.read(buf); } // try to close these outside of finally so we receive exception on failure out.close(); out = null; in.close(); in = null; } finally { // make sure everything's closed IOUtils.closeStream(out); IOUtils.closeStream(in); } }
From source file:com.knewton.mapreduce.SSTableRecordReaderTest.java
License:Apache License
/** * Tests to see if tables can be correctly copied locally *//*from ww w .java2 s. com*/ @Test public void testCopyTablesToLocal() throws Exception { TaskAttemptContext context = getTaskAttemptContext(true, true, true); ssTableColumnRecordReader.initialize(inputSplit, context); doCallRealMethod().when(ssTableColumnRecordReader).copyTablesToLocal(any(FileSystem.class), any(FileSystem.class), any(Path.class), any(TaskAttemptContext.class)); FileSystem remoteFS = mock(FileSystem.class); FileSystem localFS = mock(FileSystem.class); byte[] data = new byte[] { 0xA }; FSDataInputStream fsIn = new FSDataInputStream(new MemoryDataInputStream(data)); FSDataOutputStream fsOut = mock(FSDataOutputStream.class); when(remoteFS.open(any(Path.class))).thenReturn(fsIn); when(localFS.create(any(Path.class), anyBoolean())).thenReturn(fsOut); Path dataTablePath = inputSplit.getPath(); FileStatus fileStatus = mock(FileStatus.class); when(fileStatus.getLen()).thenReturn(10L); when(fileStatus.isDirectory()).thenReturn(false); when(remoteFS.getFileStatus(any(Path.class))).thenReturn(fileStatus); ssTableColumnRecordReader.copyTablesToLocal(remoteFS, localFS, dataTablePath, context); verify(remoteFS).getFileStatus(dataTablePath); ssTableColumnRecordReader.close(); verify(fsOut).write(any(byte[].class), eq(0), eq(data.length)); assertEquals(2, ssTableColumnRecordReader.getComponentSize()); }
From source file:com.knewton.mapreduce.SSTableRecordReaderTest.java
License:Apache License
/** * Tests to see if tables can be correctly copied locally including the compression info table */// ww w.j a v a 2 s.com @Test public void testCopyTablesToLocalWithCompressionInfo() throws Exception { TaskAttemptContext context = getTaskAttemptContext(true, true, true); ssTableColumnRecordReader.initialize(inputSplit, context); doCallRealMethod().when(ssTableColumnRecordReader).copyTablesToLocal(any(FileSystem.class), any(FileSystem.class), any(Path.class), any(TaskAttemptContext.class)); FileSystem remoteFS = mock(FileSystem.class); FileSystem localFS = mock(FileSystem.class); byte[] data = new byte[] { 0xA }; FSDataInputStream fsIn = new FSDataInputStream(new MemoryDataInputStream(data)); FSDataOutputStream fsOut = mock(FSDataOutputStream.class); when(remoteFS.open(any(Path.class))).thenReturn(fsIn); when(localFS.create(any(Path.class), anyBoolean())).thenReturn(fsOut); Path dataTablePath = inputSplit.getPath(); FileStatus fileStatus = mock(FileStatus.class); when(fileStatus.getLen()).thenReturn(10L); when(fileStatus.isDirectory()).thenReturn(false); when(remoteFS.getFileStatus(any(Path.class))).thenReturn(fileStatus); String str = ssTableColumnRecordReader.getDescriptor().filenameFor(Component.COMPRESSION_INFO); when(remoteFS.exists(new Path(str))).thenReturn(true); ssTableColumnRecordReader.copyTablesToLocal(remoteFS, localFS, dataTablePath, context); verify(remoteFS).getFileStatus(dataTablePath); ssTableColumnRecordReader.close(); verify(fsOut).write(any(byte[].class), eq(0), eq(data.length)); assertEquals(3, ssTableColumnRecordReader.getComponentSize()); }
From source file:com.knewton.mrtool.io.JsonRecordReader.java
License:Apache License
/** * Get the line reader to be used for the file. A <code>LineReader</code> can read a file line * by line. This separate method helps with testing too. * /*from ww w .j av a2s . co m*/ * @param fileSplit * @param conf * @return * @throws IOException */ protected LineReader initLineReader(FileSplit fileSplit, Configuration conf) throws IOException { final Path file = fileSplit.getPath(); final CompressionCodec codec = compressionCodecs.getCodec(file); FileSystem fs = file.getFileSystem(conf); FSDataInputStream fileIn = fs.open(fileSplit.getPath()); seekableIn = fileIn; boolean skipFirstLine = false; LineReader lineReader; if (codec != null) { lineReader = new LineReader(codec.createInputStream(fileIn), conf); } else { // if the start is not the beginning of the file then skip the first line to get the // next complete json record. The previous json record will be read by the record reader // that got assigned the previous InputSplit. if (start != 0) { skipFirstLine = true; --start; fileIn.seek(start); } lineReader = new LineReader(fileIn, conf); } if (skipFirstLine) { start += lineReader.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start)); } return lineReader; }
From source file:com.kse.bigdata.file.SequenceSampler.java
License:Apache License
public LinkedList<Sequence> getRandomSample() { System.out.println("Sampling Start..."); System.out.println("Sample Size is " + SAMPLE_SIZE); try {/* ww w . j av a 2s . c om*/ FileSystem fs = FileSystem.get(new Configuration()); BufferedReader fileReader = new BufferedReader(new InputStreamReader(fs.open(sampleFile))); LinkedList<Double> deque = new LinkedList<Double>(); String line; int[] sampleIndexes = getRandomSampleIndexArray(); int counter = -1; while ((line = fileReader.readLine()) != null) { counter++; deque.add(extractValidInformation(line)); if (deque.size() == Sequence.SIZE_OF_SEQUENCE) { for (int sampleIndex : sampleIndexes) if (sampleIndex == counter) randomSamples.add(new Sequence(deque)); deque.removeFirst(); } if (randomSamples.size() == SAMPLE_SIZE) return randomSamples; } } catch (IOException e) { e.printStackTrace(); } return this.randomSamples; }
From source file:com.kse.bigdata.main.Driver.java
License:Apache License
public static void main(String[] args) throws Exception { /********************************************************************************** ** Merge the source files into one. ** /** Should change the directories of each file before executing the program ** ***********************************************************************************/ // String inputFileDirectory = "/media/bk/??/BigData_Term_Project/Debug"; // String resultFileDirectory = "/media/bk/??/BigData_Term_Project/debug.csv"; // File resultFile = new File(resultFileDirectory); // if(!resultFile.exists()) // new SourceFileMerger(inputFileDirectory, resultFileDirectory).mergeFiles(); /********************************************************************************** * Hadoop Operation.//ww w .jav a 2 s . c o m * Befort Start, Check the Length of Sequence We Want to Predict. **********************************************************************************/ Configuration conf = new Configuration(); //Enable MapReduce intermediate compression as Snappy conf.setBoolean("mapred.compress.map.output", true); conf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec"); //Enable Profiling //conf.setBoolean("mapred.task.profile", true); String testPath = null; String inputPath = null; String outputPath = null; int sampleSize = 1; ArrayList<String> results = new ArrayList<String>(); for (int index = 0; index < args.length; index++) { /* * Mandatory command */ //Extract input path string from command line. if (args[index].equals("-in")) inputPath = args[index + 1]; //Extract output path string from command line. if (args[index].equals("-out")) outputPath = args[index + 1]; //Extract test data path string from command line. if (args[index].equals("-test")) testPath = args[index + 1]; /* * Optional command */ //Extract a number of neighbors. if (args[index].equals("-nn")) conf.setInt(Reduce.NUMBER_OF_NEAREAST_NEIGHBOR, Integer.parseInt(args[index + 1])); //Whether job uses normalization or not. if (args[index].equals("-norm")) conf.setBoolean(Map.NORMALIZATION, true); //Extract the number of sample size to test. if (args[index].equals("-s")) sampleSize = Integer.valueOf(args[index + 1]); //Whether job uses mean or median //[Default : mean] if (args[index].equals("-med")) conf.setBoolean(Reduce.MEDIAN, true); } String outputFileName = "part-r-00000"; SequenceSampler sampler = new SequenceSampler(testPath, sampleSize); LinkedList<Sequence> testSequences = sampler.getRandomSample(); // Test Sequence // String testSeqString = "13.591-13.674-13.778-13.892-13.958-14.049-14.153-14.185-14.169-14.092-13.905-13.702-13.438-13.187-13.0-12.914-12.868-12.766-12.62-12.433-12.279-12.142-12.063-12.025-100"; // Sequence testSeq = new Sequence(testSeqString); // LinkedList<Sequence> testSequences = new LinkedList<>(); // testSequences.add(testSeq); for (Sequence seq : testSequences) { /* ******************** Hadoop Launch *********************** */ System.out.println(seq.getTailString()); conf.set(Map.INPUT_SEQUENCE, seq.toString()); Job job = new Job(conf); job.setJarByClass(Driver.class); job.setJobName("term-project-driver"); job.setMapperClass(Map.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); // Should think another way to implement the combiner class // Current Implementation is not helpful to Job. // job.setCombinerClass(Combiner.class); //Set 1 for number of reduce task for keeping 100 most neighbors in sorted set. job.setNumReduceTasks(1); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.waitForCompletion(true); /* * if job finishes, get result of the job and store it in results(list). */ try { FileSystem hdfs = FileSystem.get(new Configuration()); BufferedReader fileReader = new BufferedReader( new InputStreamReader(hdfs.open(new Path(outputPath + "/" + outputFileName)))); String line; while ((line = fileReader.readLine()) != null) { results.add(seq.getSeqString() + " " + line); } fileReader.close(); hdfs.delete(new Path(outputPath), true); hdfs.close(); } catch (IOException e) { e.printStackTrace(); System.exit(1); } } /* * if all jobs finish, store results of jobs to output/result.txt file. */ String finalOutputPath = "output/result.csv"; try { FileSystem hdfs = FileSystem.get(new Configuration()); Path file = new Path(finalOutputPath); if (hdfs.exists(file)) { hdfs.delete(file, true); } OutputStream os = hdfs.create(file); PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(os, "UTF-8")); //CSV File Header printWriter.println("Actual,Predicted,MER,MAE"); printWriter.flush(); for (String result : results) { String[] tokens = result.split("\\s+"); printWriter.println(tokens[0] + "," + tokens[1] + "," + tokens[2] + "," + tokens[3]); printWriter.flush(); } printWriter.close(); hdfs.close(); } catch (IOException e) { e.printStackTrace(); System.exit(1); } }
From source file:com.kxen.han.projection.giraph.BspCase.java
License:Apache License
/** * Read all parts- files in the output and count their lines. * This works only for textual output!/*from w w w. j a v a 2s . co m*/ * * @param conf Configuration * @param outputPath Output path * @return Number of output lines * @throws IOException */ public int getNumResults(Configuration conf, Path outputPath) throws IOException { FileSystem fs = FileSystem.get(conf); int numResults = 0; for (FileStatus status : fs.listStatus(outputPath, PARTS_FILTER)) { FSDataInputStream in = null; BufferedReader reader = null; try { in = fs.open(status.getPath()); reader = new BufferedReader(new InputStreamReader(in, Charsets.UTF_8)); while (reader.readLine() != null) { numResults++; } } finally { Closeables.closeQuietly(in); Closeables.closeQuietly(reader); } } return numResults; }
From source file:com.kylinolap.common.persistence.HBaseResourceStore.java
License:Apache License
@Override protected InputStream getResourceImpl(String resPath) throws IOException { Result r = getByScan(resPath, B_FAMILY, B_COLUMN); if (r == null) return null; byte[] value = r.getValue(B_FAMILY, B_COLUMN); if (value.length == 0) { Path redirectPath = bigCellHDFSPath(resPath); Configuration hconf = HadoopUtil.getCurrentConfiguration(); FileSystem fileSystem = FileSystem.get(hconf); return fileSystem.open(redirectPath); } else {// w w w . j av a2 s . c o m return new ByteArrayInputStream(value); } }