Example usage for org.apache.hadoop.fs FileSystem mkdirs

List of usage examples for org.apache.hadoop.fs FileSystem mkdirs

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem mkdirs.

Prototype

public boolean mkdirs(Path f) throws IOException 

Source Link

Document

Call #mkdirs(Path,FsPermission) with default permission.

Usage

From source file:com.twitter.hraven.etl.TestFileLister.java

License:Apache License

/**
 * removes conf file which has already been put in prunedList
 *
 * @throws IOException//from   ww w.j a  va 2s  .  c  om
 */
@Test
public void testPruneFileListRemovingConfFromPruneList() throws IOException {

    long maxFileSize = 20L;
    FileStatus[] origList = new FileStatus[2];
    FileSystem hdfs = FileSystem.get(UTIL.getConfiguration());
    Path inputPath = new Path("/inputdir_filesize_pruneList");
    boolean os = hdfs.mkdirs(inputPath);
    assertTrue(os);
    assertTrue(hdfs.exists(inputPath));

    Path relocationPath = new Path("/relocation_filesize_pruneList");
    os = hdfs.mkdirs(relocationPath);
    assertTrue(os);
    assertTrue(hdfs.exists(relocationPath));

    Path emptyConfFile = new Path(inputPath.toUri() + "/" + "job_1329348432655_0001_conf.xml");
    os = hdfs.createNewFile(emptyConfFile);
    assertTrue(os);
    assertTrue(hdfs.exists(emptyConfFile));
    origList[0] = hdfs.getFileStatus(emptyConfFile);

    final String JOB_HISTORY_FILE_NAME = "src/test/resources/job_1329348432655_0001-1329348443227-user-Sleep+job-1329348468601-10-1-SUCCEEDED-default.jhist";
    File jobHistoryfile = new File(JOB_HISTORY_FILE_NAME);
    Path srcPath = new Path(jobHistoryfile.toURI());
    hdfs.copyFromLocalFile(srcPath, inputPath);
    Path expPath = new Path(inputPath.toUri() + "/" + srcPath.getName());
    assertTrue(hdfs.exists(expPath));
    origList[1] = hdfs.getFileStatus(expPath);

    FileStatus[] prunedList = FileLister.pruneFileListBySize(maxFileSize, origList, hdfs, inputPath);
    assertNotNull(prunedList);
    assertTrue(prunedList.length == 0);
}

From source file:com.twitter.hraven.etl.TestFileLister.java

License:Apache License

/**
 * tests the case when several files are spread out in the dir and need to be removed
 *
 * @throws IOException//from  w ww . j  ava 2s. c o  m
 */
@Test
public void testPruneFileListMultipleFilesAlreadyMovedCases() throws IOException {

    long maxFileSize = 20L;
    FileStatus[] origList = new FileStatus[12];
    FileSystem hdfs = FileSystem.get(UTIL.getConfiguration());
    Path inputPath = new Path("/inputdir_filesize_multiple");
    boolean os = hdfs.mkdirs(inputPath);
    assertTrue(os);
    assertTrue(hdfs.exists(inputPath));

    Path relocationPath = new Path("/relocation_filesize_multiple");
    os = hdfs.mkdirs(relocationPath);
    assertTrue(os);
    assertTrue(hdfs.exists(relocationPath));

    Path emptyFile = new Path(
            inputPath.toUri() + "/" + "job_1329341111111_0101-1329111113227-user2-Sleep.jhist");
    os = hdfs.createNewFile(emptyFile);
    assertTrue(os);
    assertTrue(hdfs.exists(emptyFile));
    origList[0] = hdfs.getFileStatus(emptyFile);

    Path emptyConfFile = new Path(inputPath.toUri() + "/" + "job_1329341111111_0101_conf.xml");
    os = hdfs.createNewFile(emptyConfFile);

    assertTrue(os);
    assertTrue(hdfs.exists(emptyConfFile));
    origList[1] = hdfs.getFileStatus(emptyConfFile);

    final String JOB_HISTORY_FILE_NAME = "src/test/resources/job_1329348432655_0001-1329348443227-user-Sleep+job-1329348468601-10-1-SUCCEEDED-default.jhist";
    File jobHistoryfile = new File(JOB_HISTORY_FILE_NAME);
    Path srcPath = new Path(jobHistoryfile.toURI());
    hdfs.copyFromLocalFile(srcPath, inputPath);
    Path expPath = new Path(inputPath.toUri() + "/" + srcPath.getName());
    assertTrue(hdfs.exists(expPath));
    origList[2] = hdfs.getFileStatus(expPath);

    final String JOB_CONF_FILE_NAME = "src/test/resources/job_1329348432655_0001_conf.xml";
    File jobConfFile = new File(JOB_CONF_FILE_NAME);
    srcPath = new Path(jobConfFile.toURI());
    hdfs.copyFromLocalFile(srcPath, inputPath);
    expPath = new Path(inputPath.toUri() + "/" + srcPath.getName());
    assertTrue(hdfs.exists(expPath));
    origList[3] = hdfs.getFileStatus(expPath);

    Path inputPath2 = new Path(inputPath.toUri() + "/"
            + "job_1311222222255_0221-1311111143227-user10101-WordCount-1-SUCCEEDED-default.jhist");
    hdfs.copyFromLocalFile(srcPath, inputPath2);
    assertTrue(hdfs.exists(inputPath2));
    origList[4] = hdfs.getFileStatus(inputPath2);

    Path inputPath3 = new Path(inputPath.toUri() + "/"
            + "job_1399999999155_0991-1311111143227-user3321-TeraGen-1-SUCCEEDED-default.jhist");
    hdfs.copyFromLocalFile(srcPath, inputPath3);
    assertTrue(hdfs.exists(inputPath3));
    origList[5] = hdfs.getFileStatus(inputPath3);

    Path inputPath4 = new Path(inputPath.toUri() + "/"
            + "job_1399977777177_0771-1311111143227-user3321-TeraSort-1-SUCCEEDED-default.jhist");
    hdfs.copyFromLocalFile(srcPath, inputPath4);
    assertTrue(hdfs.exists(inputPath4));
    origList[6] = hdfs.getFileStatus(inputPath4);

    Path emptyFile2 = new Path(
            inputPath.toUri() + "/" + "job_1329343333333_5551-1329111113227-user2-SomethingElse.jhist");
    os = hdfs.createNewFile(emptyFile2);
    assertTrue(os);
    assertTrue(hdfs.exists(emptyFile2));
    origList[7] = hdfs.getFileStatus(emptyFile2);

    Path emptyConfFile2 = new Path(inputPath.toUri() + "/" + "job_1329343333333_5551_conf.xml");
    os = hdfs.createNewFile(emptyConfFile2);
    assertTrue(os);
    assertTrue(hdfs.exists(emptyConfFile2));
    origList[8] = hdfs.getFileStatus(emptyConfFile2);

    // this is an empty file which tests the toBeRemovedFileList
    // at the end of function pruneFileListBySize
    Path emptyConfFile3 = new Path(inputPath.toUri() + "/" + "job_1399999999155_0991_conf.xml");
    os = hdfs.createNewFile(emptyConfFile3);
    assertTrue(os);
    assertTrue(hdfs.exists(emptyConfFile3));
    origList[9] = hdfs.getFileStatus(emptyConfFile3);

    Path inputConfPath2 = new Path(inputPath.toUri() + "/" + "job_1311222222255_0221_conf.xml");
    srcPath = new Path(jobConfFile.toURI());
    hdfs.copyFromLocalFile(srcPath, inputConfPath2);
    assertTrue(hdfs.exists(inputConfPath2));
    origList[10] = hdfs.getFileStatus(inputConfPath2);

    // this is an empty file which tests the toBeRemovedFileList
    // at the end of function pruneFileListBySize
    Path emptyConfFile4 = new Path(inputPath.toUri() + "/" + "job_1399977777177_0771_conf.xml");
    os = hdfs.createNewFile(emptyConfFile4);
    assertTrue(os);
    assertTrue(hdfs.exists(emptyConfFile4));
    origList[11] = hdfs.getFileStatus(emptyConfFile4);

    FileStatus[] prunedList = FileLister.pruneFileListBySize(maxFileSize, origList, hdfs, inputPath);
    assertNotNull(prunedList);
    assertTrue(prunedList.length == 4);
}

From source file:com.twitter.pig.backend.hadoop.executionengine.tez.TezJobControlCompiler.java

License:Apache License

/**
 * Walks the temporary directory structure to move (rename) files
 * to their final location./*from w w  w  .j av  a  2s  . c om*/
 */
private void moveResults(Path p, String rem, FileSystem fs) throws IOException {
    for (FileStatus fstat : fs.listStatus(p)) {
        Path src = fstat.getPath();
        if (fstat.isDir()) {
            log.info("mkdir: " + src);
            fs.mkdirs(removePart(src, rem));
            moveResults(fstat.getPath(), rem, fs);
        } else {
            Path dst = removePart(src, rem);
            log.info("mv: " + src + " " + dst);
            fs.rename(src, dst);
        }
    }
}

From source file:com.uber.hoodie.common.model.HoodieTableMetadata.java

License:Apache License

private HoodieTableMetadata(FileSystem fs, String basePath, String tableName, boolean initOnMissing) {
    this.fs = fs;
    this.basePath = basePath;

    try {//from w  ww  .  j a  v a 2  s  .  c om
        Path basePathDir = new Path(this.basePath);
        if (!fs.exists(basePathDir)) {
            if (initOnMissing) {
                fs.mkdirs(basePathDir);
            } else {
                throw new DatasetNotFoundException(this.basePath);
            }
        }

        if (!fs.isDirectory(new Path(basePath))) {
            throw new DatasetNotFoundException(this.basePath);
        }

        this.metadataFolder = new Path(this.basePath, METAFOLDER_NAME);
        Path propertyPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE);
        if (!fs.exists(propertyPath)) {
            if (initOnMissing) {
                // create .hoodie folder if it does not exist.
                createHoodieProperties(metadataFolder, tableName);
            } else {
                throw new InvalidDatasetException(this.basePath);
            }
        }

        // Load meta data
        this.commits = new HoodieCommits(scanCommits(COMMIT_FILE_SUFFIX));
        this.inflightCommits = scanCommits(INFLIGHT_FILE_SUFFIX);
        this.properties = readHoodieProperties();
        log.info("All commits :" + commits);
    } catch (IOException e) {
        throw new HoodieIOException("Could not load HoodieMetadata from path " + basePath, e);
    }
}

From source file:com.uber.hoodie.common.model.HoodieTestUtils.java

License:Apache License

public static final String createNewLogFile(FileSystem fs, String basePath, String partitionPath,
        String commitTime, String fileID, Optional<Integer> version) throws IOException {
    String folderPath = basePath + "/" + partitionPath + "/";
    boolean makeDir = fs.mkdirs(new Path(folderPath));
    if (!makeDir) {
        throw new IOException("cannot create directory for path " + folderPath);
    }/*from w  w  w .  j  a  v  a  2 s  .  c  om*/
    boolean createFile = fs.createNewFile(new Path(folderPath + FSUtils.makeLogFileName(fileID, ".log",
            commitTime, version.orElse(DEFAULT_LOG_VERSION), HoodieLogFormat.UNKNOWN_WRITE_TOKEN)));
    if (!createFile) {
        throw new IOException(
                StringUtils.format("cannot create data file for commit %s and fileId %s", commitTime, fileID));
    }
    return fileID;
}

From source file:com.uber.hoodie.common.table.HoodieTableConfig.java

License:Apache License

/**
 * Initialize the hoodie meta directory and any necessary files inside the meta (including the
 * hoodie.properties)/*from  w ww.  j  ava  2 s .co  m*/
 */
public static void createHoodieProperties(FileSystem fs, Path metadataFolder, Properties properties)
        throws IOException {
    if (!fs.exists(metadataFolder)) {
        fs.mkdirs(metadataFolder);
    }
    Path propertyPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE);
    try (FSDataOutputStream outputStream = fs.create(propertyPath)) {
        if (!properties.containsKey(HOODIE_TABLE_NAME_PROP_NAME)) {
            throw new IllegalArgumentException(HOODIE_TABLE_NAME_PROP_NAME + " property needs to be specified");
        }
        if (!properties.containsKey(HOODIE_TABLE_TYPE_PROP_NAME)) {
            properties.setProperty(HOODIE_TABLE_TYPE_PROP_NAME, DEFAULT_TABLE_TYPE.name());
        }
        if (properties.getProperty(HOODIE_TABLE_TYPE_PROP_NAME) == HoodieTableType.MERGE_ON_READ.name()
                && !properties.containsKey(HOODIE_PAYLOAD_CLASS_PROP_NAME)) {
            properties.setProperty(HOODIE_PAYLOAD_CLASS_PROP_NAME, DEFAULT_PAYLOAD_CLASS);
        }
        if (!properties.containsKey(HOODIE_ARCHIVELOG_FOLDER_PROP_NAME)) {
            properties.setProperty(HOODIE_ARCHIVELOG_FOLDER_PROP_NAME, DEFAULT_ARCHIVELOG_FOLDER);
        }
        properties.store(outputStream, "Properties saved on " + new Date(System.currentTimeMillis()));
    }
}

From source file:com.uber.hoodie.common.table.HoodieTableMetaClient.java

License:Apache License

/**
 * Helper method to initialize a given path as a hoodie dataset with configs passed in as as
 * Properties//from w  w  w .j av a  2 s .  c o m
 *
 * @return Instance of HoodieTableMetaClient
 */
public static HoodieTableMetaClient initializePathAsHoodieDataset(Configuration hadoopConf, String basePath,
        Properties props) throws IOException {
    log.info("Initializing " + basePath + " as hoodie dataset " + basePath);
    Path basePathDir = new Path(basePath);
    final FileSystem fs = FSUtils.getFs(basePath, hadoopConf);
    if (!fs.exists(basePathDir)) {
        fs.mkdirs(basePathDir);
    }
    Path metaPathDir = new Path(basePath, METAFOLDER_NAME);
    if (!fs.exists(metaPathDir)) {
        fs.mkdirs(metaPathDir);
    }

    // if anything other than default archive log folder is specified, create that too
    String archiveLogPropVal = props.getProperty(HoodieTableConfig.HOODIE_ARCHIVELOG_FOLDER_PROP_NAME,
            HoodieTableConfig.DEFAULT_ARCHIVELOG_FOLDER);
    if (!archiveLogPropVal.equals(HoodieTableConfig.DEFAULT_ARCHIVELOG_FOLDER)) {
        Path archiveLogDir = new Path(metaPathDir, archiveLogPropVal);
        if (!fs.exists(archiveLogDir)) {
            fs.mkdirs(archiveLogDir);
        }
    }

    // Always create temporaryFolder which is needed for finalizeWrite for Hoodie tables
    final Path temporaryFolder = new Path(basePath, HoodieTableMetaClient.TEMPFOLDER_NAME);
    if (!fs.exists(temporaryFolder)) {
        fs.mkdirs(temporaryFolder);
    }

    // Always create auxiliary folder which is needed to track compaction workloads (stats and any metadata in future)
    final Path auxiliaryFolder = new Path(basePath, HoodieTableMetaClient.AUXILIARYFOLDER_NAME);
    if (!fs.exists(auxiliaryFolder)) {
        fs.mkdirs(auxiliaryFolder);
    }

    HoodieTableConfig.createHoodieProperties(fs, metaPathDir, props);
    // We should not use fs.getConf as this might be different from the original configuration
    // used to create the fs in unit tests
    HoodieTableMetaClient metaClient = new HoodieTableMetaClient(hadoopConf, basePath);
    log.info("Finished initializing Table of type " + metaClient.getTableConfig().getTableType() + " from "
            + basePath);
    return metaClient;
}

From source file:com.uber.hoodie.common.table.log.HoodieLogFormatAppendFailureTest.java

License:Apache License

@Test(timeout = 60000)
public void testFailedToGetAppendStreamFromHDFSNameNode()
        throws IOException, URISyntaxException, InterruptedException, TimeoutException {

    // Use some fs like LocalFileSystem, that does not support appends
    String uuid = UUID.randomUUID().toString();
    Path localPartitionPath = new Path("/tmp/");
    FileSystem fs = cluster.getFileSystem();
    Path testPath = new Path(localPartitionPath, uuid);
    fs.mkdirs(testPath);

    // Some data & append.
    List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 10);
    Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap();
    header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
    header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
    HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header);

    Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(testPath)
            .withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits" + ".archive")
            .overBaseCommit("").withFs(fs).build();

    writer = writer.appendBlock(dataBlock);
    // get the current log file version to compare later
    int logFileVersion = writer.getLogFile().getLogVersion();
    Path logFilePath = writer.getLogFile().getPath();
    writer.close();/*  w w  w . j  a  v a2 s.c  o  m*/

    // Wait for 3 times replication of file
    DFSTestUtil.waitReplication(fs, logFilePath, (short) 3);
    // Shut down all DNs that have the last block location for the file
    LocatedBlocks lbs = cluster.getFileSystem().getClient().getNamenode()
            .getBlockLocations("/tmp/" + uuid + "/" + logFilePath.getName(), 0, Long.MAX_VALUE);
    List<DataNode> dnsOfCluster = cluster.getDataNodes();
    DatanodeInfo[] dnsWithLocations = lbs.getLastLocatedBlock().getLocations();
    for (DataNode dn : dnsOfCluster) {
        for (DatanodeInfo loc : dnsWithLocations) {
            if (dn.getDatanodeId().equals(loc)) {
                dn.shutdown();
                cluster.stopDataNode(dn.getDisplayName());
                DFSTestUtil.waitForDatanodeDeath(dn);
            }
        }
    }
    // Wait for the replication of this file to go down to 0
    DFSTestUtil.waitReplication(fs, logFilePath, (short) 0);

    // Opening a new Writer right now will throw IOException. The code should handle this, rollover the logfile and
    // return a new writer with a bumped up logVersion
    writer = HoodieLogFormat.newWriterBuilder().onParentPath(testPath)
            .withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits" + ".archive")
            .overBaseCommit("").withFs(fs).build();
    // The log version should be different for this new writer
    Assert.assertFalse(writer.getLogFile().getLogVersion() == logFileVersion);
}

From source file:com.uber.hoodie.common.table.log.HoodieLogFormatTest.java

License:Apache License

/**
@Test/*from   w  w w. j a v  a  2  s . co  m*/
public void testLeaseRecovery() throws IOException, URISyntaxException, InterruptedException {
  Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
.overBaseCommit("100").withFs(fs).build();
  List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 100);
  Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap();
  header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
  header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
  HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header);
  writer = writer.appendBlock(dataBlock);
  long size1 = writer.getCurrentSize();
  // do not close this writer - this simulates a data note appending to a log dying without closing the file
  // writer.close();
        
  writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100")
.withFs(fs).build();
  records = SchemaTestUtil.generateTestRecords(0, 100);
  header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
  dataBlock = new HoodieAvroDataBlock(records, header);
  writer = writer.appendBlock(dataBlock);
  long size2 = writer.getCurrentSize();
  assertTrue("We just wrote a new block - size2 should be > size1", size2 > size1);
  assertEquals("Write should be auto-flushed. The size reported by FileStatus and the writer should match", size2,
fs.getFileStatus(writer.getLogFile().getPath()).getLen());
  writer.close();
}
**/

@Test
public void testAppendNotSupported() throws IOException, URISyntaxException, InterruptedException {
    // Use some fs like LocalFileSystem, that does not support appends
    Path localPartitionPath = new Path("file://" + partitionPath);
    FileSystem localFs = FSUtils.getFs(localPartitionPath.toString(), HoodieTestUtils.getDefaultHadoopConf());
    Path testPath = new Path(localPartitionPath, "append_test");
    localFs.mkdirs(testPath);

    // Some data & append two times.
    List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 100);
    Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap();
    header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
    header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
    HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header);

    for (int i = 0; i < 2; i++) {
        HoodieLogFormat.newWriterBuilder().onParentPath(testPath)
                .withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits.archive")
                .overBaseCommit("").withFs(localFs).build().appendBlock(dataBlock).close();
    }

    // ensure there are two log file versions, with same data.
    FileStatus[] statuses = localFs.listStatus(testPath);
    assertEquals(2, statuses.length);
}

From source file:com.uber.hoodie.common.util.FSUtils.java

License:Apache License

public static void createPathIfNotExists(FileSystem fs, Path partitionPath) throws IOException {
    if (!fs.exists(partitionPath)) {
        fs.mkdirs(partitionPath);
    }//w w  w .  j a  va 2s.  c  om
}