List of usage examples for org.apache.hadoop.fs FileSystem mkdirs
public boolean mkdirs(Path f) throws IOException
From source file:com.streamsets.pipeline.stage.origin.hdfs.cluster.ClusterHDFSSourceIT.java
License:Apache License
@Test public void testWrongHDFSDirLocation() throws Exception { ClusterHdfsConfigBean conf = new ClusterHdfsConfigBean(); conf.hdfsUri = miniDFS.getURI().toString(); conf.hdfsDirLocations = Arrays.asList(dir.toUri().getPath()); conf.hdfsConfigs = new HashMap<>(); conf.hdfsConfigs.put("x", "X"); conf.dataFormat = DataFormat.TEXT;/*from www. j av a 2 s . c o m*/ conf.dataFormatConfig.textMaxLineLen = 1024; conf.hdfsUri = "/pathwithnoschemeorauthority"; ClusterHdfsSource clusterHdfsSource = createSource(conf); try { List<ConfigIssue> issues = clusterHdfsSource.init(null, ContextInfoCreator .createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_02")); conf.hdfsUri = "file://localhost:8020/"; clusterHdfsSource = createSource(conf); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_12")); conf.hdfsUri = "hdfs:///noauthority"; clusterHdfsSource = createSource(conf); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_13")); conf.hdfsUri = "hdfs://localhost:50000"; clusterHdfsSource = createSource(conf); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_11")); conf.hdfsUri = miniDFS.getURI().toString(); conf.hdfsDirLocations = Arrays.asList("/pathdoesnotexist"); clusterHdfsSource = createSource(conf); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_10")); conf.hdfsUri = miniDFS.getURI().toString(); conf.hdfsDirLocations = Arrays.asList(dir.toUri().getPath()); FileSystem fs = miniDFS.getFileSystem(); Path someFile = new Path(new Path(dir.toUri()), "/someFile"); fs.create(someFile).close(); clusterHdfsSource = createSource(conf); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 0, issues.size()); conf.hdfsUri = null; conf.hdfsConfigs.put(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, miniDFS.getURI().toString()); someFile = new Path(new Path(dir.toUri()), "/someFile2"); fs.create(someFile).close(); clusterHdfsSource = createSource(conf); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 0, issues.size()); Path dummyFile = new Path(new Path(dir.toUri()), "/dummyFile"); fs.create(dummyFile).close(); conf.hdfsUri = miniDFS.getURI().toString(); conf.hdfsDirLocations = Arrays.asList(dummyFile.toUri().getPath()); clusterHdfsSource = createSource(conf); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_15")); Path emptyDir = new Path(dir.toUri().getPath(), "emptyDir"); fs.mkdirs(emptyDir); conf.hdfsUri = miniDFS.getURI().toString(); conf.hdfsDirLocations = Arrays.asList(emptyDir.toUri().getPath()); clusterHdfsSource = createSource(conf); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_16")); Path path1 = new Path(emptyDir, "path1"); fs.create(path1).close(); conf.hdfsUri = miniDFS.getURI().toString(); conf.hdfsDirLocations = Arrays.asList(emptyDir.toUri().getPath()); clusterHdfsSource = createSource(conf); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 0, issues.size()); } finally { clusterHdfsSource.destroy(); } }
From source file:com.streamsets.pipeline.stage.origin.hdfs.cluster.TestClusterHDFSSource.java
License:Apache License
@BeforeClass public static void setUpBeforeClass() throws Exception { File minidfsDir = new File("target/minidfs-" + UUID.randomUUID()).getAbsoluteFile(); minidfsDir.mkdirs();//from w w w .j a v a 2 s. co m Assert.assertTrue(minidfsDir.exists()); System.setProperty(MiniDFSCluster.PROP_TEST_BUILD_DATA, minidfsDir.getPath()); Configuration conf = new HdfsConfiguration(); conf.set("dfs.namenode.fs-limits.min-block-size", String.valueOf(32)); EditLogFileOutputStream.setShouldSkipFsyncForTesting(true); miniDFS = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); dir = new Path(miniDFS.getURI() + "/dir"); FileSystem fs = miniDFS.getFileSystem(); fs.mkdirs(dir); writeFile(fs, new Path(dir + "/forAllTests/" + "path"), 1000); dummyEtc = new File(minidfsDir, "dummy-etc"); dummyEtc.mkdirs(); Assert.assertTrue(dummyEtc.exists()); Configuration dummyConf = new Configuration(false); for (String file : new String[] { "core", "hdfs", "mapred", "yarn" }) { File siteXml = new File(dummyEtc, file + "-site.xml"); FileOutputStream out = new FileOutputStream(siteXml); dummyConf.writeXml(out); out.close(); } resourcesDir = minidfsDir.getAbsolutePath(); hadoopConfDir = dummyEtc.getName(); System.setProperty("sdc.resources.dir", resourcesDir); ; }
From source file:com.streamsets.pipeline.stage.origin.hdfs.cluster.TestClusterHDFSSource.java
License:Apache License
@Test public void testWrongHDFSDirLocation() throws Exception { ClusterHdfsDSource dSource = new ForTestClusterHdfsDSource(); configure(dSource, dir.toUri().getPath()); dSource.hdfsUri = "/pathwithnoschemeorauthority"; ClusterHdfsSource clusterHdfsSource = (ClusterHdfsSource) dSource.createSource(); try {// w ww . j a va 2 s.c o m List<ConfigIssue> issues = clusterHdfsSource.init(null, ContextInfoCreator .createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_02")); dSource.hdfsUri = "file://localhost:8020/"; clusterHdfsSource = (ClusterHdfsSource) dSource.createSource(); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_12")); dSource.hdfsUri = "hdfs:///noauthority"; clusterHdfsSource = (ClusterHdfsSource) dSource.createSource(); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_13")); dSource.hdfsUri = "hdfs://localhost:8020"; clusterHdfsSource = (ClusterHdfsSource) dSource.createSource(); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_11")); dSource.hdfsUri = miniDFS.getURI().toString(); dSource.hdfsDirLocations = Arrays.asList("/pathdoesnotexist"); clusterHdfsSource = (ClusterHdfsSource) dSource.createSource(); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_10")); dSource.hdfsUri = miniDFS.getURI().toString(); dSource.hdfsDirLocations = Arrays.asList(dir.toUri().getPath()); FileSystem fs = miniDFS.getFileSystem(); Path someFile = new Path(new Path(dir.toUri()), "/someFile"); fs.create(someFile).close(); clusterHdfsSource = (ClusterHdfsSource) dSource.createSource(); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 0, issues.size()); dSource.hdfsUri = null; dSource.hdfsConfigs.put(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, miniDFS.getURI().toString()); someFile = new Path(new Path(dir.toUri()), "/someFile2"); fs.create(someFile).close(); clusterHdfsSource = (ClusterHdfsSource) dSource.createSource(); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 0, issues.size()); Path dummyFile = new Path(new Path(dir.toUri()), "/dummyFile"); fs.create(dummyFile).close(); dSource.hdfsUri = miniDFS.getURI().toString(); dSource.hdfsDirLocations = Arrays.asList(dummyFile.toUri().getPath()); clusterHdfsSource = (ClusterHdfsSource) dSource.createSource(); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_15")); Path emptyDir = new Path(dir.toUri().getPath(), "emptyDir"); fs.mkdirs(emptyDir); dSource.hdfsUri = miniDFS.getURI().toString(); dSource.hdfsDirLocations = Arrays.asList(emptyDir.toUri().getPath()); clusterHdfsSource = (ClusterHdfsSource) dSource.createSource(); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 1, issues.size()); assertTrue(String.valueOf(issues), issues.get(0).toString().contains("HADOOPFS_16")); Path path1 = new Path(emptyDir, "path1"); fs.create(path1).close(); dSource.hdfsUri = miniDFS.getURI().toString(); dSource.hdfsDirLocations = Arrays.asList(emptyDir.toUri().getPath()); clusterHdfsSource = (ClusterHdfsSource) dSource.createSource(); issues = clusterHdfsSource.init(null, ContextInfoCreator.createSourceContext("myInstance", false, OnRecordError.TO_ERROR, ImmutableList.of("lane"))); assertEquals(String.valueOf(issues), 0, issues.size()); } finally { clusterHdfsSource.destroy(); } }
From source file:com.TCG.Nutch_DNS.HostDb.java
License:Apache License
public static void install(JobConf job, Path crawlDb) throws IOException { boolean preserveBackup = job.getBoolean("db.preserve.backup", true); Path newCrawlDb = FileOutputFormat.getOutputPath(job); FileSystem fs = new JobClient(job).getFs(); Path old = new Path(crawlDb, "old"); Path current = new Path(crawlDb, CURRENT_NAME); if (fs.exists(current)) { if (fs.exists(old)) fs.delete(old, true);// w ww. jav a2s. co m fs.rename(current, old); } fs.mkdirs(crawlDb); fs.rename(newCrawlDb, current); if (!preserveBackup && fs.exists(old)) fs.delete(old, true); Path lock = new Path(crawlDb, LOCK_NAME); LockUtil.removeLockFile(fs, lock); }
From source file:com.tdunning.plume.local.lazy.MapRedExecutor.java
License:Apache License
/** * This method can be called to execute a {@link PlumeWorkflow} by using Hadoop Map-Reduce implementation. * It will build the execution tree, optimize it and convert each MSCR step into a MapRed job. * It will launch MSCR jobs in parallel when it is allowable to do so by using a ThreadPool. If one MSCR fails, * all the work flow is canceled. Because it stores the result in a temporary folder, it will only flush the final * result to the API parameter if the work flow has been executed successfully. * /*w ww . ja v a 2 s .co m*/ * @param workFlow The {@link PlumeWorkflow} to execute * @param outputTo Output folder where the result of the work flow will be stored if executed successfully * * @throws IOException If the work flow had to be canceled * @throws InterruptedException */ public void execute(PlumeWorkflow workFlow, String outputTo) throws IOException, InterruptedException { Optimizer optimizer = new Optimizer(); ExecutionStep step = optimizer.optimize(workFlow); int nStep = 0; final String workFlowId = workFlow.getClass().getName() + "-" + System.currentTimeMillis(); do { nStep++; log.info("Begin execution step " + nStep + " for workflow " + workFlow.getClass().getName()); // Create a latch to mark the end of a concurrent step where all MSCRs can be executed in parallel final CountDownLatch latch = new CountDownLatch(step.mscrSteps.size()); // Create a signal that can be flagged if one of the MSCRs fail to abort all the workFlow // - I have chosen an AtomicBoolean in case this flag can be re-set to false under some circumstance - final AtomicBoolean abort = new AtomicBoolean(false); // For each MSCR that can be executed concurrently... for (final MSCR mscr : step.mscrSteps) { final String workFlowOutputPath = tmpOutputFolder + "/" + workFlowId; final String jobId = workFlowId + "/" + mscr.getId(); final String jobOutputPath = tmpOutputFolder + "/" + jobId; log.info("Triggering execution of jobId " + jobId + ". Its output will be saved to " + jobOutputPath); // ... Get its MapRed Job final Job job = getMapRed(mscr, workFlow, workFlowOutputPath, jobOutputPath); final FileSystem fS = FileSystem.getLocal(job.getConfiguration()); // ... Submit it to the ThreadPool executor.submit(new Runnable() { @Override public void run() { try { job.waitForCompletion(true); // job completed successfully - materialize outputs log.info("jobId " + jobId + " completed successfully, now materializing outputs."); for (Map.Entry<PCollection<?>, Integer> entry : mscr.getNumberedChannels().entrySet()) { LazyCollection<?> oCol = (LazyCollection<?>) mscr.getOutputChannels() .get(entry.getKey()).output; // Move this output to somewhere recognizable - this executor's tmp folder + this PCollection's Plume Id // This way, mappers that read unmaterialized collections will know where to find intermediate states. FileStatus[] files = fS.listStatus(new Path(jobOutputPath)); Path materializedPath = new Path(workFlowOutputPath + "/" + oCol.getPlumeId()); fS.mkdirs(materializedPath); for (FileStatus file : files) { if (file.getPath().getName().startsWith(entry.getValue() + "-r-")) { FileUtil.copy(fS, file.getPath(), fS, materializedPath, false, job.getConfiguration()); oCol.setFile(materializedPath.toString()); } } log.info( "Materialized plume output " + oCol.getPlumeId() + " to " + oCol.getFile()); } } catch (IOException e) { log.warn("One Job failed: " + jobId + ", current Workflow will be aborted ", e); abort.set(true); // Flag the premature end of this workflow } catch (InterruptedException e) { log.warn("One Job failed: " + jobId + ", current Workflow will be aborted ", e); abort.set(true); // Flag the premature end of this workflow } catch (ClassNotFoundException e) { log.warn("One Job failed: " + jobId + ", current Workflow will be aborted ", e); abort.set(true); // Flag the premature end of this workflow } finally { latch.countDown(); // Count down under any circumstance } } }); } latch.await(); // wait until all MSCRs from this step are completed if (abort.get()) { throw new IOException("Current Workflow was aborted"); } step = step.nextStep; } while (step != null); log.info("Workflow ended correctly."); // Move temporary result to where API user wants to: WARN: Local-specific implementation Files.move(new File(tmpOutputFolder + "/" + workFlowId), new File(outputTo)); }
From source file:com.test.PiEstimatorKrb.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi/*from w w w.ja va 2 s.com*/ */ public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException { //setup job conf jobConf.setJobName(PiEstimatorKrb.class.getSimpleName()); jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputKeyClass(BooleanWritable.class); jobConf.setOutputValueClass(LongWritable.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); jobConf.setMapperClass(PiMapper.class); jobConf.setNumMapTasks(numMaps); jobConf.setReducerClass(PiReducer.class); jobConf.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. jobConf.setSpeculativeExecution(false); //setup input/output directories final Path inDir = new Path(TMP_DIR, "in"); final Path outDir = new Path(TMP_DIR, "out"); FileInputFormat.setInputPaths(jobConf, inDir); FileOutputFormat.setOutputPath(jobConf, outDir); final FileSystem fs = FileSystem.get(jobConf); if (fs.exists(TMP_DIR)) { throw new IOException( "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { //generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } sLogger.info("Wrote input for Map #" + i); } //start a map/reduce job sLogger.info("Starting Job"); final long startTime = System.currentTimeMillis(); if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } JobClient.runJob(jobConf); final double duration = (System.currentTimeMillis() - startTime) / 1000.0; sLogger.info("Job Finished in " + duration + " seconds"); //read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf); try { reader.next(numInside, numOutside); } finally { reader.close(); } //compute estimated value return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())) .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints)); } finally { fs.delete(TMP_DIR, true); } }
From source file:com.trace.hadoop.TestDFSRename.java
License:Apache License
public void testRename() throws Exception { FileSystem fs = cluster.getFileSystem(); assertTrue(fs.mkdirs(dir)); { //test lease Path a = new Path(dir, "a"); Path aa = new Path(dir, "aa"); Path b = new Path(dir, "b"); createFile(fs, a);// w w w . jav a 2 s .co m //should not have any lease assertEquals(0, countLease(cluster)); createFile(fs, aa); DataOutputStream aa_out = fs.create(aa); aa_out.writeBytes("something"); //should have 1 lease assertEquals(1, countLease(cluster)); list(fs, "rename0"); fs.rename(a, b); list(fs, "rename1"); aa_out.writeBytes(" more"); aa_out.close(); list(fs, "rename2"); //should not have any lease assertEquals(0, countLease(cluster)); } { // test non-existent destination Path dstPath = new Path("/c/d"); assertFalse(fs.exists(dstPath)); assertFalse(fs.rename(dir, dstPath)); } { // dst cannot be a file or directory under src // test rename /a/b/foo to /a/b/c Path src = new Path("/a/b"); Path dst = new Path("/a/b/c"); createFile(fs, new Path(src, "foo")); // dst cannot be a file under src assertFalse(fs.rename(src, dst)); // dst cannot be a directory under src assertFalse(fs.rename(src.getParent(), dst.getParent())); } { // dst can start with src, if it is not a directory or file under src // test rename /test /testfile Path src = new Path("/testPrefix"); Path dst = new Path("/testPrefixfile"); createFile(fs, src); assertTrue(fs.rename(src, dst)); } { // dst should not be same as src test rename /a/b/c to /a/b/c Path src = new Path("/a/b/c"); createFile(fs, src); assertTrue(fs.rename(src, src)); assertFalse(fs.rename(new Path("/a/b"), new Path("/a/b/"))); assertTrue(fs.rename(src, new Path("/a/b/c/"))); } fs.delete(dir, true); }
From source file:com.twitter.elephanttwin.lucene.indexing.AbstractLuceneIndexingReducer.java
License:Apache License
@Override public void cleanup(Reducer<KIN, VIN, NullWritable, NullWritable>.Context context) throws IOException { // This may take a while... indexer.close();// www. j a v a 2s . c o m LOG.info("Done finalizing index!"); LOG.info(cnt + " records added to the index"); LOG.info(skipped + " records skipped"); // Copy from local back to HDFS. Path destination = new Path(context.getConfiguration().get(HDFS_INDEX_LOCATION)); LOG.info("final index destination: " + destination); LOG.info("copying from " + tmpIndex + " to " + destination); FileSystem fs = FileSystem.get(context.getConfiguration()); if (!fs.exists(destination)) { fs.mkdirs(destination); } fs.copyFromLocalFile(new Path(tmpIndex.getAbsolutePath()), destination); LOG.info("copying complete!"); // Clean up local tmp directory. FileUtil.fullyDelete(tmpIndex); LOG.info("local directory " + tmpIndex + " removed!"); heartbeatThread.interrupt(); }
From source file:com.twitter.hraven.etl.JobFilePartitioner.java
License:Apache License
/** * @param hdfs/*from w w w.j a va 2 s. com*/ * FileSystem handle * @param outputPath * base directory where files to be written to * @param fileModTime * of the file that needs to be moved/copied to hdfs * @return the existing path in HDFS to write to the file to. Will be created * if it does not exist. * @throws IOException * if the year/month/day directory with cannot be created in * outputPath. */ private Path getTargetDirectory(FileSystem hdfs, Path outputPath, long fileModTime) throws IOException { String year = YEAR_FORMAT.format(new Date(fileModTime)); String month = MONTH_FORMAT.format(new Date(fileModTime)); String day = DAY_FORMAT.format(new Date(fileModTime)); Path yearDir = new Path(outputPath, year); Path monthDir = new Path(yearDir, month); Path dayDir = new Path(monthDir, day); // Check if the directory already exists, if not, then insert a record into // HBase for it. if (!hdfs.exists(dayDir)) { if (hdfs.mkdirs(dayDir)) { LOG.info("Created: " + dayDir.toString()); } else { throw new IOException("Unable to create target directory with date: " + dayDir.getName()); } } return dayDir; }
From source file:com.twitter.hraven.etl.TestFileLister.java
License:Apache License
@Test public void testPruneFileListBySize() throws IOException { long maxFileSize = 20L; FileStatus[] origList = new FileStatus[2]; FileSystem hdfs = FileSystem.get(UTIL.getConfiguration()); Path inputPath = new Path("/inputdir_filesize"); boolean os = hdfs.mkdirs(inputPath); assertTrue(os);/* ww w.j a v a 2 s . co m*/ assertTrue(hdfs.exists(inputPath)); final String JOB_HISTORY_FILE_NAME = "src/test/resources/job_1329348432655_0001-1329348443227-user-Sleep+job-1329348468601-10-1-SUCCEEDED-default.jhist"; File jobHistoryfile = new File(JOB_HISTORY_FILE_NAME); Path srcPath = new Path(jobHistoryfile.toURI()); hdfs.copyFromLocalFile(srcPath, inputPath); Path expPath = new Path(inputPath.toUri() + "/" + srcPath.getName()); assertTrue(hdfs.exists(expPath)); origList[0] = hdfs.getFileStatus(expPath); final String JOB_CONF_FILE_NAME = "src/test/resources/job_1329348432655_0001_conf.xml"; File jobConfFile = new File(JOB_CONF_FILE_NAME); srcPath = new Path(jobConfFile.toURI()); hdfs.copyFromLocalFile(srcPath, inputPath); expPath = new Path(inputPath.toUri() + "/" + srcPath.getName()); assertTrue(hdfs.exists(expPath)); origList[1] = hdfs.getFileStatus(expPath); FileStatus[] prunedList = FileLister.pruneFileListBySize(maxFileSize, origList, hdfs, inputPath); assertNotNull(prunedList); assertTrue(prunedList.length == 0); Path emptyFile = new Path( inputPath.toUri() + "/" + "job_1329341111111_0101-1329111113227-user2-Sleep.jhist"); os = hdfs.createNewFile(emptyFile); assertTrue(os); assertTrue(hdfs.exists(emptyFile)); origList[0] = hdfs.getFileStatus(emptyFile); Path emptyConfFile = new Path(inputPath.toUri() + "/" + "job_1329341111111_0101_conf.xml"); os = hdfs.createNewFile(emptyConfFile); assertTrue(os); assertTrue(hdfs.exists(emptyConfFile)); origList[1] = hdfs.getFileStatus(emptyConfFile); prunedList = FileLister.pruneFileListBySize(maxFileSize, origList, hdfs, inputPath); assertNotNull(prunedList); assertTrue(prunedList.length == 2); }