List of usage examples for org.apache.hadoop.fs FileSystem exists
public boolean exists(Path f) throws IOException
From source file:com.inmobi.databus.purge.DataPurgerServiceTest.java
License:Apache License
private void createTestPurgefiles(FileSystem fs, Cluster cluster, Calendar date) throws Exception { for (String streamname : cluster.getSourceStreams()) { String[] files = new String[NUM_OF_FILES]; String datapath = Cluster.getDateAsYYYYMMDDHHMNPath(date.getTime()); String commitpath = cluster.getLocalFinalDestDirRoot() + File.separator + streamname + File.separator + datapath;/*from ww w . j a v a 2s. com*/ String mergecommitpath = cluster.getFinalDestDirRoot() + File.separator + streamname + File.separator + datapath; String trashpath = cluster.getTrashPath() + File.separator + CalendarHelper.getDateAsString(date) + File.separator; fs.mkdirs(new Path(commitpath)); for (int j = 0; j < NUM_OF_FILES; ++j) { files[j] = new String(cluster.getName() + "-" + TestLocalStreamService.getDateAsYYYYMMDDHHmm(new Date()) + "_" + idFormat.format(j)); { Path path = new Path(commitpath + File.separator + files[j]); // LOG.info("Creating streams_local File " + path.getName()); FSDataOutputStream streamout = fs.create(path); streamout.writeBytes("Creating Test data for teststream " + path.toString()); streamout.close(); Assert.assertTrue(fs.exists(path)); } { Path path = new Path(mergecommitpath + File.separator + files[j]); // LOG.info("Creating streams File " + path.getName()); FSDataOutputStream streamout = fs.create(path); streamout.writeBytes("Creating Test data for teststream " + path.toString()); streamout.close(); Assert.assertTrue(fs.exists(path)); } { Path path = new Path(trashpath + File.separator + String.valueOf(date.get(Calendar.HOUR_OF_DAY)) + File.separator + files[j]); // LOG.info("Creating trash File " + path.toString()); FSDataOutputStream streamout = fs.create(path); streamout.writeBytes("Creating Test trash data for teststream " + path.getName()); streamout.close(); Assert.assertTrue(fs.exists(path)); } } } }
From source file:com.inmobi.grid.fs.s4fs.NativeS4FileSystem.java
License:Apache License
/** * fs.default.name in conf is the HDFS store which has credential file for * s3n in /user/<name> with bucket name.crd * the .crd file contains access:secret in a single line. *//*from w w w .j a va 2s .c o m*/ @Override public void initialize(URI uri, Configuration conf) throws IOException { this.uri = uri; if (new Path(conf.get("fs.default.name")).toUri().getScheme().equals("s4")) { // currently illegal to set fs.default.name to s4; // without this, below code causes recursive call. return; } FileSystem fs = FileSystem.get(conf); Path nnWorkingDir = fs.getHomeDirectory(); if (!fs.exists(nnWorkingDir)) { throw new IOException("Users home directory does not exist: " + fs.getWorkingDirectory()); } String scheme = uri.getScheme(); String bucket = uri.getAuthority(); Path credFile = new Path(nnWorkingDir, bucket + ".crd"); if (!fs.exists(credFile)) { throw new IOException(credFile.toString() + " does not exists"); } StringBuilder sb = new StringBuilder(getCredentialFromFile(fs, credFile)).append("@").append(bucket); String bucketWithAccess = uri.toString().replaceFirst(scheme, "s3n"); bucketWithAccess = bucketWithAccess.replaceFirst(bucket, sb.toString()); super.initialize(new Path(bucketWithAccess).toUri(), conf); }
From source file:com.kadwa.hadoop.DistExec.java
License:Open Source License
/** * Sanity check for srcPath/*from w w w .j a v a 2 s . c om*/ */ private static void checkSrcPath(JobConf jobConf, List<Path> srcPaths) throws IOException { List<IOException> rslt = new ArrayList<IOException>(); Path[] ps = new Path[srcPaths.size()]; ps = srcPaths.toArray(ps); TokenCache.obtainTokensForNamenodes(jobConf.getCredentials(), ps, jobConf); for (Path p : srcPaths) { FileSystem fs = p.getFileSystem(jobConf); if (!fs.exists(p)) { rslt.add(new IOException("Input source " + p + " does not exist.")); } } if (!rslt.isEmpty()) { throw new InvalidInputException(rslt); } }
From source file:com.kadwa.hadoop.DistExec.java
License:Open Source License
/** * Initialize ExecFilesMapper specific job-configuration. * * @param conf : The dfs/mapred configuration. * @param jobConf : The handle to the jobConf object to be initialized. * @param args Arguments//from w w w . j a v a 2 s . c o m * @return true if it is necessary to launch a job. */ private static boolean setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException { jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString()); jobConf.set(EXEC_CMD_LABEL, args.execCmd); //set boolean values jobConf.setBoolean(Options.REDIRECT_ERROR_TO_OUT.propertyname, args.flags.contains(Options.REDIRECT_ERROR_TO_OUT)); final String randomId = getRandomId(); JobClient jClient = new JobClient(jobConf); Path stagingArea; try { stagingArea = JobSubmissionFiles.getStagingDir(jClient, conf); } catch (InterruptedException e) { throw new IOException(e); } Path jobDirectory = new Path(stagingArea + NAME + "_" + randomId); FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION); FileSystem.mkdirs(FileSystem.get(jobDirectory.toUri(), conf), jobDirectory, mapredSysPerms); jobConf.set(JOB_DIR_LABEL, jobDirectory.toString()); FileSystem dstfs = args.dst.getFileSystem(conf); // get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(jobConf.getCredentials(), new Path[] { args.dst }, conf); boolean dstExists = dstfs.exists(args.dst); boolean dstIsDir = false; if (dstExists) { dstIsDir = dstfs.getFileStatus(args.dst).isDir(); } // default logPath Path logPath = args.log; if (logPath == null) { String filename = "_" + NAME + "_logs_" + randomId; if (!dstExists || !dstIsDir) { Path parent = args.dst.getParent(); if (!dstfs.exists(parent)) { dstfs.mkdirs(parent); } logPath = new Path(parent, filename); } else { logPath = new Path(args.dst, filename); } } FileOutputFormat.setOutputPath(jobConf, logPath); // create src list, dst list FileSystem jobfs = jobDirectory.getFileSystem(jobConf); Path srcfilelist = new Path(jobDirectory, "_" + NAME + "_src_files"); jobConf.set(SRC_LIST_LABEL, srcfilelist.toString()); SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class, FilePair.class, SequenceFile.CompressionType.NONE); Path dstfilelist = new Path(jobDirectory, "_" + NAME + "_dst_files"); SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class, Text.class, SequenceFile.CompressionType.NONE); Path dstdirlist = new Path(jobDirectory, "_" + NAME + "_dst_dirs"); jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString()); SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class, FilePair.class, SequenceFile.CompressionType.NONE); // handle the case where the destination directory doesn't exist // and we've only a single src directory. final boolean special = (args.srcs.size() == 1 && !dstExists); int srcCount = 0, cnsyncf = 0, dirsyn = 0; long fileCount = 0L, byteCount = 0L, cbsyncs = 0L; try { for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) { final Path src = srcItr.next(); FileSystem srcfs = src.getFileSystem(conf); FileStatus srcfilestat = srcfs.getFileStatus(src); Path root = special && srcfilestat.isDir() ? src : src.getParent(); if (srcfilestat.isDir()) { ++srcCount; } Stack<FileStatus> pathstack = new Stack<FileStatus>(); for (pathstack.push(srcfilestat); !pathstack.empty();) { FileStatus cur = pathstack.pop(); FileStatus[] children = srcfs.listStatus(cur.getPath()); for (int i = 0; i < children.length; i++) { boolean skipfile = false; final FileStatus child = children[i]; final String dst = makeRelative(root, child.getPath()); ++srcCount; if (child.isDir()) { pathstack.push(child); } else { if (!skipfile) { ++fileCount; byteCount += child.getLen(); if (LOG.isTraceEnabled()) { LOG.trace("adding file " + child.getPath()); } ++cnsyncf; cbsyncs += child.getLen(); if (cnsyncf > SYNC_FILE_MAX || cbsyncs > BYTES_PER_MAP) { src_writer.sync(); dst_writer.sync(); cnsyncf = 0; cbsyncs = 0L; } } } if (!skipfile) { src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()), new FilePair(child, dst)); } dst_writer.append(new Text(dst), new Text(child.getPath().toString())); } if (cur.isDir()) { String dst = makeRelative(root, cur.getPath()); dir_writer.append(new Text(dst), new FilePair(cur, dst)); if (++dirsyn > SYNC_FILE_MAX) { dirsyn = 0; dir_writer.sync(); } } } } } finally { checkAndClose(src_writer); checkAndClose(dst_writer); checkAndClose(dir_writer); } FileStatus dststatus = null; try { dststatus = dstfs.getFileStatus(args.dst); } catch (FileNotFoundException fnfe) { LOG.info(args.dst + " does not exist."); } // create dest path dir if copying > 1 file if (dststatus == null) { if (srcCount > 1 && !dstfs.mkdirs(args.dst)) { throw new IOException("Failed to create" + args.dst); } } final Path sorted = new Path(jobDirectory, "_" + NAME + "_sorted"); checkDuplication(jobfs, dstfilelist, sorted, conf); Path tmpDir = new Path( (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst, "_" + NAME + "_tmp_" + randomId); jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString()); LOG.info("sourcePathsCount=" + srcCount); LOG.info("filesToExecCount=" + fileCount); LOG.info("bytesToExecCount=" + StringUtils.humanReadableInt(byteCount)); jobConf.setInt(SRC_COUNT_LABEL, srcCount); jobConf.setLong(TOTAL_SIZE_LABEL, byteCount); setMapCount(fileCount, jobConf); return fileCount > 0; }
From source file:com.knewton.mapreduce.SSTableRecordReader.java
License:Apache License
/** * Moves all the minimum required tables for the table reader to work to local disk. * * @param split The table to work on./*w w w .j av a2 s . co m*/ */ @VisibleForTesting void copyTablesToLocal(FileSystem remoteFS, FileSystem localFS, Path dataTablePath, TaskAttemptContext context) throws IOException { Configuration conf = context.getConfiguration(); String hdfsDataTablePathStr = dataTablePath.toUri().getPath(); String localDataTablePathStr = dataTablePath.toUri().getHost() + File.separator + dataTablePath.toUri().getPath(); // Make path relative due to EMR permissions if (localDataTablePathStr.startsWith("/")) { String mapTaskId = conf.get("mapreduce.task.attempt.id"); String mapTempDir = conf.get("mapreduce.cluster.temp.dir"); String taskWorkDir = mapTempDir + File.separator + mapTaskId; LOG.info("Appending {} to {}", taskWorkDir, localDataTablePathStr); localDataTablePathStr = taskWorkDir + localDataTablePathStr; } Path localDataTablePath = new Path(localDataTablePathStr); LOG.info("Copying hdfs file from {} to local disk at {}.", dataTablePath.toUri(), localDataTablePath.toUri()); copyToLocalFile(remoteFS, localFS, dataTablePath, localDataTablePath); boolean isCompressed = conf.getBoolean(PropertyConstants.COMPRESSION_ENABLED.txt, false); if (isCompressed) { decompress(localDataTablePath, context); } components.add(Component.DATA); desc = Descriptor.fromFilename(localDataTablePathStr); Descriptor hdfsDesc = Descriptor.fromFilename(hdfsDataTablePathStr); String indexPathStr = hdfsDesc.filenameFor(Component.PRIMARY_INDEX); components.add(Component.PRIMARY_INDEX); Path localIdxPath = new Path(desc.filenameFor(Component.PRIMARY_INDEX)); LOG.info("Copying hdfs file from {} to local disk at {}.", indexPathStr, localIdxPath); copyToLocalFile(remoteFS, localFS, new Path(indexPathStr), localIdxPath); if (isCompressed) { decompress(localIdxPath, context); } String compressionTablePathStr = hdfsDesc.filenameFor(Component.COMPRESSION_INFO.name()); Path compressionTablePath = new Path(compressionTablePathStr); if (remoteFS.exists(compressionTablePath)) { Path localCompressionPath = new Path(desc.filenameFor(Component.COMPRESSION_INFO.name())); LOG.info("Copying hdfs file from {} to local disk at {}.", compressionTablePath.toUri(), localCompressionPath); copyToLocalFile(remoteFS, localFS, compressionTablePath, localCompressionPath); if (isCompressed) { decompress(localCompressionPath, context); } components.add(Component.COMPRESSION_INFO); } }
From source file:com.knewton.mapreduce.SSTableRecordReader.java
License:Apache License
/** * Copies a remote path to the local filesystem, while updating hadoop that we're making * progress. Doesn't support directories. *//*from w w w . j a v a 2s. com*/ @VisibleForTesting void copyToLocalFile(FileSystem remoteFS, FileSystem localFS, Path remote, Path local) throws IOException { // don't support transferring from remote directories FileStatus remoteStat = remoteFS.getFileStatus(remote); Preconditions.checkArgument(!remoteStat.isDirectory(), String.format("Path %s is directory!", remote)); // if local is a dir, copy to inside that dir, like 'cp /path/file /tmp/' would do if (localFS.exists(local)) { FileStatus localStat = localFS.getFileStatus(local); if (localStat.isDirectory()) { local = new Path(local, remote.getName()); } } long remoteFileSize = remoteStat.getLen(); // do actual copy InputStream in = null; OutputStream out = null; try { long startTime = System.currentTimeMillis(); long lastLogTime = 0; long bytesCopied = 0; in = remoteFS.open(remote); out = localFS.create(local, true); int buffSize = this.conf.getInt(CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY, CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_DEFAULT); byte[] buf = new byte[buffSize]; int bytesRead = in.read(buf); while (bytesRead >= 0) { long now = System.currentTimeMillis(); // log transfer rate once per min, starting 1 min after transfer began if (now - lastLogTime > 60000L && now - startTime > 60000L) { double elapsedSec = (now - startTime) / 1000D; double bytesPerSec = bytesCopied / elapsedSec; LOG.info("Transferred {} of {} bytes at {} bytes per second", bytesCopied, remoteFileSize, bytesPerSec); lastLogTime = now; } this.ctx.progress(); out.write(buf, 0, bytesRead); bytesCopied += bytesRead; bytesRead = in.read(buf); } // try to close these outside of finally so we receive exception on failure out.close(); out = null; in.close(); in = null; } finally { // make sure everything's closed IOUtils.closeStream(out); IOUtils.closeStream(in); } }
From source file:com.knewton.mapreduce.SSTableRecordReaderTest.java
License:Apache License
/** * Tests to see if tables can be correctly copied locally including the compression info table *//* w w w .jav a 2 s . c o m*/ @Test public void testCopyTablesToLocalWithCompressionInfo() throws Exception { TaskAttemptContext context = getTaskAttemptContext(true, true, true); ssTableColumnRecordReader.initialize(inputSplit, context); doCallRealMethod().when(ssTableColumnRecordReader).copyTablesToLocal(any(FileSystem.class), any(FileSystem.class), any(Path.class), any(TaskAttemptContext.class)); FileSystem remoteFS = mock(FileSystem.class); FileSystem localFS = mock(FileSystem.class); byte[] data = new byte[] { 0xA }; FSDataInputStream fsIn = new FSDataInputStream(new MemoryDataInputStream(data)); FSDataOutputStream fsOut = mock(FSDataOutputStream.class); when(remoteFS.open(any(Path.class))).thenReturn(fsIn); when(localFS.create(any(Path.class), anyBoolean())).thenReturn(fsOut); Path dataTablePath = inputSplit.getPath(); FileStatus fileStatus = mock(FileStatus.class); when(fileStatus.getLen()).thenReturn(10L); when(fileStatus.isDirectory()).thenReturn(false); when(remoteFS.getFileStatus(any(Path.class))).thenReturn(fileStatus); String str = ssTableColumnRecordReader.getDescriptor().filenameFor(Component.COMPRESSION_INFO); when(remoteFS.exists(new Path(str))).thenReturn(true); ssTableColumnRecordReader.copyTablesToLocal(remoteFS, localFS, dataTablePath, context); verify(remoteFS).getFileStatus(dataTablePath); ssTableColumnRecordReader.close(); verify(fsOut).write(any(byte[].class), eq(0), eq(data.length)); assertEquals(3, ssTableColumnRecordReader.getComponentSize()); }
From source file:com.kse.bigdata.main.Driver.java
License:Apache License
public static void main(String[] args) throws Exception { /********************************************************************************** ** Merge the source files into one. ** /** Should change the directories of each file before executing the program ** ***********************************************************************************/ // String inputFileDirectory = "/media/bk/??/BigData_Term_Project/Debug"; // String resultFileDirectory = "/media/bk/??/BigData_Term_Project/debug.csv"; // File resultFile = new File(resultFileDirectory); // if(!resultFile.exists()) // new SourceFileMerger(inputFileDirectory, resultFileDirectory).mergeFiles(); /********************************************************************************** * Hadoop Operation.//from ww w .j a v a2 s . c om * Befort Start, Check the Length of Sequence We Want to Predict. **********************************************************************************/ Configuration conf = new Configuration(); //Enable MapReduce intermediate compression as Snappy conf.setBoolean("mapred.compress.map.output", true); conf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec"); //Enable Profiling //conf.setBoolean("mapred.task.profile", true); String testPath = null; String inputPath = null; String outputPath = null; int sampleSize = 1; ArrayList<String> results = new ArrayList<String>(); for (int index = 0; index < args.length; index++) { /* * Mandatory command */ //Extract input path string from command line. if (args[index].equals("-in")) inputPath = args[index + 1]; //Extract output path string from command line. if (args[index].equals("-out")) outputPath = args[index + 1]; //Extract test data path string from command line. if (args[index].equals("-test")) testPath = args[index + 1]; /* * Optional command */ //Extract a number of neighbors. if (args[index].equals("-nn")) conf.setInt(Reduce.NUMBER_OF_NEAREAST_NEIGHBOR, Integer.parseInt(args[index + 1])); //Whether job uses normalization or not. if (args[index].equals("-norm")) conf.setBoolean(Map.NORMALIZATION, true); //Extract the number of sample size to test. if (args[index].equals("-s")) sampleSize = Integer.valueOf(args[index + 1]); //Whether job uses mean or median //[Default : mean] if (args[index].equals("-med")) conf.setBoolean(Reduce.MEDIAN, true); } String outputFileName = "part-r-00000"; SequenceSampler sampler = new SequenceSampler(testPath, sampleSize); LinkedList<Sequence> testSequences = sampler.getRandomSample(); // Test Sequence // String testSeqString = "13.591-13.674-13.778-13.892-13.958-14.049-14.153-14.185-14.169-14.092-13.905-13.702-13.438-13.187-13.0-12.914-12.868-12.766-12.62-12.433-12.279-12.142-12.063-12.025-100"; // Sequence testSeq = new Sequence(testSeqString); // LinkedList<Sequence> testSequences = new LinkedList<>(); // testSequences.add(testSeq); for (Sequence seq : testSequences) { /* ******************** Hadoop Launch *********************** */ System.out.println(seq.getTailString()); conf.set(Map.INPUT_SEQUENCE, seq.toString()); Job job = new Job(conf); job.setJarByClass(Driver.class); job.setJobName("term-project-driver"); job.setMapperClass(Map.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); // Should think another way to implement the combiner class // Current Implementation is not helpful to Job. // job.setCombinerClass(Combiner.class); //Set 1 for number of reduce task for keeping 100 most neighbors in sorted set. job.setNumReduceTasks(1); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.waitForCompletion(true); /* * if job finishes, get result of the job and store it in results(list). */ try { FileSystem hdfs = FileSystem.get(new Configuration()); BufferedReader fileReader = new BufferedReader( new InputStreamReader(hdfs.open(new Path(outputPath + "/" + outputFileName)))); String line; while ((line = fileReader.readLine()) != null) { results.add(seq.getSeqString() + " " + line); } fileReader.close(); hdfs.delete(new Path(outputPath), true); hdfs.close(); } catch (IOException e) { e.printStackTrace(); System.exit(1); } } /* * if all jobs finish, store results of jobs to output/result.txt file. */ String finalOutputPath = "output/result.csv"; try { FileSystem hdfs = FileSystem.get(new Configuration()); Path file = new Path(finalOutputPath); if (hdfs.exists(file)) { hdfs.delete(file, true); } OutputStream os = hdfs.create(file); PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(os, "UTF-8")); //CSV File Header printWriter.println("Actual,Predicted,MER,MAE"); printWriter.flush(); for (String result : results) { String[] tokens = result.split("\\s+"); printWriter.println(tokens[0] + "," + tokens[1] + "," + tokens[2] + "," + tokens[3]); printWriter.flush(); } printWriter.close(); hdfs.close(); } catch (IOException e) { e.printStackTrace(); System.exit(1); } }
From source file:com.kylinolap.common.persistence.HBaseResourceStore.java
License:Apache License
private Path writeLargeCellToHdfs(String resPath, byte[] largeColumn, HTableInterface table) throws IOException { Path redirectPath = bigCellHDFSPath(resPath); Configuration hconf = HadoopUtil.getCurrentConfiguration(); FileSystem fileSystem = FileSystem.get(hconf); if (fileSystem.exists(redirectPath)) { fileSystem.delete(redirectPath, true); }// www . j a va 2s .c o m FSDataOutputStream out = fileSystem.create(redirectPath); try { out.write(largeColumn); } finally { IOUtils.closeQuietly(out); } return redirectPath; }
From source file:com.kylinolap.common.persistence.HBaseResourceStoreTest.java
License:Apache License
@Test public void testHBaseStoreWithLargeCell() throws Exception { String path = "/cube/_test_large_cell.json"; String largeContent = "THIS_IS_A_LARGE_CELL"; StringEntity content = new StringEntity(largeContent); KylinConfig config = KylinConfig.getInstanceFromEnv(); int origSize = config.getHBaseKeyValueSize(); ResourceStore store = ResourceStore.getStore(KylinConfig.getInstanceFromEnv()); try {/* w ww . ja v a 2 s.com*/ config.setProperty("kylin.hbase.client.keyvalue.maxsize", String.valueOf(largeContent.length() - 1)); store.deleteResource(path); store.putResource(path, content, StringEntity.serializer); assertTrue(store.exists(path)); StringEntity t = store.getResource(path, StringEntity.class, StringEntity.serializer); assertEquals(content, t); Path redirectPath = ((HBaseResourceStore) store).bigCellHDFSPath(path); Configuration hconf = HadoopUtil.getCurrentConfiguration(); FileSystem fileSystem = FileSystem.get(hconf); assertTrue(fileSystem.exists(redirectPath)); FSDataInputStream in = fileSystem.open(redirectPath); assertEquals(largeContent, in.readUTF()); in.close(); store.deleteResource(path); } finally { config.setProperty("kylin.hbase.client.keyvalue.maxsize", "" + origSize); store.deleteResource(path); } }