List of usage examples for org.apache.hadoop.fs FileSystem mkdirs
public boolean mkdirs(Path f) throws IOException
From source file:com.twitter.hraven.etl.TestFileLister.java
License:Apache License
/** * removes conf file which has already been put in prunedList * * @throws IOException//from ww w.j a va 2s . c om */ @Test public void testPruneFileListRemovingConfFromPruneList() throws IOException { long maxFileSize = 20L; FileStatus[] origList = new FileStatus[2]; FileSystem hdfs = FileSystem.get(UTIL.getConfiguration()); Path inputPath = new Path("/inputdir_filesize_pruneList"); boolean os = hdfs.mkdirs(inputPath); assertTrue(os); assertTrue(hdfs.exists(inputPath)); Path relocationPath = new Path("/relocation_filesize_pruneList"); os = hdfs.mkdirs(relocationPath); assertTrue(os); assertTrue(hdfs.exists(relocationPath)); Path emptyConfFile = new Path(inputPath.toUri() + "/" + "job_1329348432655_0001_conf.xml"); os = hdfs.createNewFile(emptyConfFile); assertTrue(os); assertTrue(hdfs.exists(emptyConfFile)); origList[0] = hdfs.getFileStatus(emptyConfFile); final String JOB_HISTORY_FILE_NAME = "src/test/resources/job_1329348432655_0001-1329348443227-user-Sleep+job-1329348468601-10-1-SUCCEEDED-default.jhist"; File jobHistoryfile = new File(JOB_HISTORY_FILE_NAME); Path srcPath = new Path(jobHistoryfile.toURI()); hdfs.copyFromLocalFile(srcPath, inputPath); Path expPath = new Path(inputPath.toUri() + "/" + srcPath.getName()); assertTrue(hdfs.exists(expPath)); origList[1] = hdfs.getFileStatus(expPath); FileStatus[] prunedList = FileLister.pruneFileListBySize(maxFileSize, origList, hdfs, inputPath); assertNotNull(prunedList); assertTrue(prunedList.length == 0); }
From source file:com.twitter.hraven.etl.TestFileLister.java
License:Apache License
/** * tests the case when several files are spread out in the dir and need to be removed * * @throws IOException//from w ww . j ava 2s. c o m */ @Test public void testPruneFileListMultipleFilesAlreadyMovedCases() throws IOException { long maxFileSize = 20L; FileStatus[] origList = new FileStatus[12]; FileSystem hdfs = FileSystem.get(UTIL.getConfiguration()); Path inputPath = new Path("/inputdir_filesize_multiple"); boolean os = hdfs.mkdirs(inputPath); assertTrue(os); assertTrue(hdfs.exists(inputPath)); Path relocationPath = new Path("/relocation_filesize_multiple"); os = hdfs.mkdirs(relocationPath); assertTrue(os); assertTrue(hdfs.exists(relocationPath)); Path emptyFile = new Path( inputPath.toUri() + "/" + "job_1329341111111_0101-1329111113227-user2-Sleep.jhist"); os = hdfs.createNewFile(emptyFile); assertTrue(os); assertTrue(hdfs.exists(emptyFile)); origList[0] = hdfs.getFileStatus(emptyFile); Path emptyConfFile = new Path(inputPath.toUri() + "/" + "job_1329341111111_0101_conf.xml"); os = hdfs.createNewFile(emptyConfFile); assertTrue(os); assertTrue(hdfs.exists(emptyConfFile)); origList[1] = hdfs.getFileStatus(emptyConfFile); final String JOB_HISTORY_FILE_NAME = "src/test/resources/job_1329348432655_0001-1329348443227-user-Sleep+job-1329348468601-10-1-SUCCEEDED-default.jhist"; File jobHistoryfile = new File(JOB_HISTORY_FILE_NAME); Path srcPath = new Path(jobHistoryfile.toURI()); hdfs.copyFromLocalFile(srcPath, inputPath); Path expPath = new Path(inputPath.toUri() + "/" + srcPath.getName()); assertTrue(hdfs.exists(expPath)); origList[2] = hdfs.getFileStatus(expPath); final String JOB_CONF_FILE_NAME = "src/test/resources/job_1329348432655_0001_conf.xml"; File jobConfFile = new File(JOB_CONF_FILE_NAME); srcPath = new Path(jobConfFile.toURI()); hdfs.copyFromLocalFile(srcPath, inputPath); expPath = new Path(inputPath.toUri() + "/" + srcPath.getName()); assertTrue(hdfs.exists(expPath)); origList[3] = hdfs.getFileStatus(expPath); Path inputPath2 = new Path(inputPath.toUri() + "/" + "job_1311222222255_0221-1311111143227-user10101-WordCount-1-SUCCEEDED-default.jhist"); hdfs.copyFromLocalFile(srcPath, inputPath2); assertTrue(hdfs.exists(inputPath2)); origList[4] = hdfs.getFileStatus(inputPath2); Path inputPath3 = new Path(inputPath.toUri() + "/" + "job_1399999999155_0991-1311111143227-user3321-TeraGen-1-SUCCEEDED-default.jhist"); hdfs.copyFromLocalFile(srcPath, inputPath3); assertTrue(hdfs.exists(inputPath3)); origList[5] = hdfs.getFileStatus(inputPath3); Path inputPath4 = new Path(inputPath.toUri() + "/" + "job_1399977777177_0771-1311111143227-user3321-TeraSort-1-SUCCEEDED-default.jhist"); hdfs.copyFromLocalFile(srcPath, inputPath4); assertTrue(hdfs.exists(inputPath4)); origList[6] = hdfs.getFileStatus(inputPath4); Path emptyFile2 = new Path( inputPath.toUri() + "/" + "job_1329343333333_5551-1329111113227-user2-SomethingElse.jhist"); os = hdfs.createNewFile(emptyFile2); assertTrue(os); assertTrue(hdfs.exists(emptyFile2)); origList[7] = hdfs.getFileStatus(emptyFile2); Path emptyConfFile2 = new Path(inputPath.toUri() + "/" + "job_1329343333333_5551_conf.xml"); os = hdfs.createNewFile(emptyConfFile2); assertTrue(os); assertTrue(hdfs.exists(emptyConfFile2)); origList[8] = hdfs.getFileStatus(emptyConfFile2); // this is an empty file which tests the toBeRemovedFileList // at the end of function pruneFileListBySize Path emptyConfFile3 = new Path(inputPath.toUri() + "/" + "job_1399999999155_0991_conf.xml"); os = hdfs.createNewFile(emptyConfFile3); assertTrue(os); assertTrue(hdfs.exists(emptyConfFile3)); origList[9] = hdfs.getFileStatus(emptyConfFile3); Path inputConfPath2 = new Path(inputPath.toUri() + "/" + "job_1311222222255_0221_conf.xml"); srcPath = new Path(jobConfFile.toURI()); hdfs.copyFromLocalFile(srcPath, inputConfPath2); assertTrue(hdfs.exists(inputConfPath2)); origList[10] = hdfs.getFileStatus(inputConfPath2); // this is an empty file which tests the toBeRemovedFileList // at the end of function pruneFileListBySize Path emptyConfFile4 = new Path(inputPath.toUri() + "/" + "job_1399977777177_0771_conf.xml"); os = hdfs.createNewFile(emptyConfFile4); assertTrue(os); assertTrue(hdfs.exists(emptyConfFile4)); origList[11] = hdfs.getFileStatus(emptyConfFile4); FileStatus[] prunedList = FileLister.pruneFileListBySize(maxFileSize, origList, hdfs, inputPath); assertNotNull(prunedList); assertTrue(prunedList.length == 4); }
From source file:com.twitter.pig.backend.hadoop.executionengine.tez.TezJobControlCompiler.java
License:Apache License
/** * Walks the temporary directory structure to move (rename) files * to their final location./*from w w w .j av a 2s . c om*/ */ private void moveResults(Path p, String rem, FileSystem fs) throws IOException { for (FileStatus fstat : fs.listStatus(p)) { Path src = fstat.getPath(); if (fstat.isDir()) { log.info("mkdir: " + src); fs.mkdirs(removePart(src, rem)); moveResults(fstat.getPath(), rem, fs); } else { Path dst = removePart(src, rem); log.info("mv: " + src + " " + dst); fs.rename(src, dst); } } }
From source file:com.uber.hoodie.common.model.HoodieTableMetadata.java
License:Apache License
private HoodieTableMetadata(FileSystem fs, String basePath, String tableName, boolean initOnMissing) { this.fs = fs; this.basePath = basePath; try {//from w ww . j a v a 2 s . c om Path basePathDir = new Path(this.basePath); if (!fs.exists(basePathDir)) { if (initOnMissing) { fs.mkdirs(basePathDir); } else { throw new DatasetNotFoundException(this.basePath); } } if (!fs.isDirectory(new Path(basePath))) { throw new DatasetNotFoundException(this.basePath); } this.metadataFolder = new Path(this.basePath, METAFOLDER_NAME); Path propertyPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE); if (!fs.exists(propertyPath)) { if (initOnMissing) { // create .hoodie folder if it does not exist. createHoodieProperties(metadataFolder, tableName); } else { throw new InvalidDatasetException(this.basePath); } } // Load meta data this.commits = new HoodieCommits(scanCommits(COMMIT_FILE_SUFFIX)); this.inflightCommits = scanCommits(INFLIGHT_FILE_SUFFIX); this.properties = readHoodieProperties(); log.info("All commits :" + commits); } catch (IOException e) { throw new HoodieIOException("Could not load HoodieMetadata from path " + basePath, e); } }
From source file:com.uber.hoodie.common.model.HoodieTestUtils.java
License:Apache License
public static final String createNewLogFile(FileSystem fs, String basePath, String partitionPath, String commitTime, String fileID, Optional<Integer> version) throws IOException { String folderPath = basePath + "/" + partitionPath + "/"; boolean makeDir = fs.mkdirs(new Path(folderPath)); if (!makeDir) { throw new IOException("cannot create directory for path " + folderPath); }/*from w w w . j a v a 2 s . c om*/ boolean createFile = fs.createNewFile(new Path(folderPath + FSUtils.makeLogFileName(fileID, ".log", commitTime, version.orElse(DEFAULT_LOG_VERSION), HoodieLogFormat.UNKNOWN_WRITE_TOKEN))); if (!createFile) { throw new IOException( StringUtils.format("cannot create data file for commit %s and fileId %s", commitTime, fileID)); } return fileID; }
From source file:com.uber.hoodie.common.table.HoodieTableConfig.java
License:Apache License
/** * Initialize the hoodie meta directory and any necessary files inside the meta (including the * hoodie.properties)/*from w ww. j ava 2 s .co m*/ */ public static void createHoodieProperties(FileSystem fs, Path metadataFolder, Properties properties) throws IOException { if (!fs.exists(metadataFolder)) { fs.mkdirs(metadataFolder); } Path propertyPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE); try (FSDataOutputStream outputStream = fs.create(propertyPath)) { if (!properties.containsKey(HOODIE_TABLE_NAME_PROP_NAME)) { throw new IllegalArgumentException(HOODIE_TABLE_NAME_PROP_NAME + " property needs to be specified"); } if (!properties.containsKey(HOODIE_TABLE_TYPE_PROP_NAME)) { properties.setProperty(HOODIE_TABLE_TYPE_PROP_NAME, DEFAULT_TABLE_TYPE.name()); } if (properties.getProperty(HOODIE_TABLE_TYPE_PROP_NAME) == HoodieTableType.MERGE_ON_READ.name() && !properties.containsKey(HOODIE_PAYLOAD_CLASS_PROP_NAME)) { properties.setProperty(HOODIE_PAYLOAD_CLASS_PROP_NAME, DEFAULT_PAYLOAD_CLASS); } if (!properties.containsKey(HOODIE_ARCHIVELOG_FOLDER_PROP_NAME)) { properties.setProperty(HOODIE_ARCHIVELOG_FOLDER_PROP_NAME, DEFAULT_ARCHIVELOG_FOLDER); } properties.store(outputStream, "Properties saved on " + new Date(System.currentTimeMillis())); } }
From source file:com.uber.hoodie.common.table.HoodieTableMetaClient.java
License:Apache License
/** * Helper method to initialize a given path as a hoodie dataset with configs passed in as as * Properties//from w w w .j av a 2 s . c o m * * @return Instance of HoodieTableMetaClient */ public static HoodieTableMetaClient initializePathAsHoodieDataset(Configuration hadoopConf, String basePath, Properties props) throws IOException { log.info("Initializing " + basePath + " as hoodie dataset " + basePath); Path basePathDir = new Path(basePath); final FileSystem fs = FSUtils.getFs(basePath, hadoopConf); if (!fs.exists(basePathDir)) { fs.mkdirs(basePathDir); } Path metaPathDir = new Path(basePath, METAFOLDER_NAME); if (!fs.exists(metaPathDir)) { fs.mkdirs(metaPathDir); } // if anything other than default archive log folder is specified, create that too String archiveLogPropVal = props.getProperty(HoodieTableConfig.HOODIE_ARCHIVELOG_FOLDER_PROP_NAME, HoodieTableConfig.DEFAULT_ARCHIVELOG_FOLDER); if (!archiveLogPropVal.equals(HoodieTableConfig.DEFAULT_ARCHIVELOG_FOLDER)) { Path archiveLogDir = new Path(metaPathDir, archiveLogPropVal); if (!fs.exists(archiveLogDir)) { fs.mkdirs(archiveLogDir); } } // Always create temporaryFolder which is needed for finalizeWrite for Hoodie tables final Path temporaryFolder = new Path(basePath, HoodieTableMetaClient.TEMPFOLDER_NAME); if (!fs.exists(temporaryFolder)) { fs.mkdirs(temporaryFolder); } // Always create auxiliary folder which is needed to track compaction workloads (stats and any metadata in future) final Path auxiliaryFolder = new Path(basePath, HoodieTableMetaClient.AUXILIARYFOLDER_NAME); if (!fs.exists(auxiliaryFolder)) { fs.mkdirs(auxiliaryFolder); } HoodieTableConfig.createHoodieProperties(fs, metaPathDir, props); // We should not use fs.getConf as this might be different from the original configuration // used to create the fs in unit tests HoodieTableMetaClient metaClient = new HoodieTableMetaClient(hadoopConf, basePath); log.info("Finished initializing Table of type " + metaClient.getTableConfig().getTableType() + " from " + basePath); return metaClient; }
From source file:com.uber.hoodie.common.table.log.HoodieLogFormatAppendFailureTest.java
License:Apache License
@Test(timeout = 60000) public void testFailedToGetAppendStreamFromHDFSNameNode() throws IOException, URISyntaxException, InterruptedException, TimeoutException { // Use some fs like LocalFileSystem, that does not support appends String uuid = UUID.randomUUID().toString(); Path localPartitionPath = new Path("/tmp/"); FileSystem fs = cluster.getFileSystem(); Path testPath = new Path(localPartitionPath, uuid); fs.mkdirs(testPath); // Some data & append. List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 10); Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap(); header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100"); header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString()); HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header); Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(testPath) .withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits" + ".archive") .overBaseCommit("").withFs(fs).build(); writer = writer.appendBlock(dataBlock); // get the current log file version to compare later int logFileVersion = writer.getLogFile().getLogVersion(); Path logFilePath = writer.getLogFile().getPath(); writer.close();/* w w w . j a v a2 s.c o m*/ // Wait for 3 times replication of file DFSTestUtil.waitReplication(fs, logFilePath, (short) 3); // Shut down all DNs that have the last block location for the file LocatedBlocks lbs = cluster.getFileSystem().getClient().getNamenode() .getBlockLocations("/tmp/" + uuid + "/" + logFilePath.getName(), 0, Long.MAX_VALUE); List<DataNode> dnsOfCluster = cluster.getDataNodes(); DatanodeInfo[] dnsWithLocations = lbs.getLastLocatedBlock().getLocations(); for (DataNode dn : dnsOfCluster) { for (DatanodeInfo loc : dnsWithLocations) { if (dn.getDatanodeId().equals(loc)) { dn.shutdown(); cluster.stopDataNode(dn.getDisplayName()); DFSTestUtil.waitForDatanodeDeath(dn); } } } // Wait for the replication of this file to go down to 0 DFSTestUtil.waitReplication(fs, logFilePath, (short) 0); // Opening a new Writer right now will throw IOException. The code should handle this, rollover the logfile and // return a new writer with a bumped up logVersion writer = HoodieLogFormat.newWriterBuilder().onParentPath(testPath) .withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits" + ".archive") .overBaseCommit("").withFs(fs).build(); // The log version should be different for this new writer Assert.assertFalse(writer.getLogFile().getLogVersion() == logFileVersion); }
From source file:com.uber.hoodie.common.table.log.HoodieLogFormatTest.java
License:Apache License
/** @Test/*from w w w. j a v a 2 s . co m*/ public void testLeaseRecovery() throws IOException, URISyntaxException, InterruptedException { Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath) .withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1") .overBaseCommit("100").withFs(fs).build(); List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 100); Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap(); header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100"); header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString()); HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header); writer = writer.appendBlock(dataBlock); long size1 = writer.getCurrentSize(); // do not close this writer - this simulates a data note appending to a log dying without closing the file // writer.close(); writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath) .withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100") .withFs(fs).build(); records = SchemaTestUtil.generateTestRecords(0, 100); header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString()); dataBlock = new HoodieAvroDataBlock(records, header); writer = writer.appendBlock(dataBlock); long size2 = writer.getCurrentSize(); assertTrue("We just wrote a new block - size2 should be > size1", size2 > size1); assertEquals("Write should be auto-flushed. The size reported by FileStatus and the writer should match", size2, fs.getFileStatus(writer.getLogFile().getPath()).getLen()); writer.close(); } **/ @Test public void testAppendNotSupported() throws IOException, URISyntaxException, InterruptedException { // Use some fs like LocalFileSystem, that does not support appends Path localPartitionPath = new Path("file://" + partitionPath); FileSystem localFs = FSUtils.getFs(localPartitionPath.toString(), HoodieTestUtils.getDefaultHadoopConf()); Path testPath = new Path(localPartitionPath, "append_test"); localFs.mkdirs(testPath); // Some data & append two times. List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 100); Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap(); header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100"); header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString()); HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header); for (int i = 0; i < 2; i++) { HoodieLogFormat.newWriterBuilder().onParentPath(testPath) .withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits.archive") .overBaseCommit("").withFs(localFs).build().appendBlock(dataBlock).close(); } // ensure there are two log file versions, with same data. FileStatus[] statuses = localFs.listStatus(testPath); assertEquals(2, statuses.length); }
From source file:com.uber.hoodie.common.util.FSUtils.java
License:Apache License
public static void createPathIfNotExists(FileSystem fs, Path partitionPath) throws IOException { if (!fs.exists(partitionPath)) { fs.mkdirs(partitionPath); }//w w w . j a va 2s. c om }