Example usage for org.apache.hadoop.fs Path getParent

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getParent.

Prototype

public Path getParent()

Source Link

Document

Returns the parent of a path or null if at root.

Usage

From source file:org.apache.hama.pipes.TestPipes.java

License:Apache License

static void runProgram(BSPJob bsp, Path program, Path inputPath, Path outputPath, int numBspTasks,
        int numOfGroom) throws IOException, ClassNotFoundException, InterruptedException {
    HamaConfiguration conf = (HamaConfiguration) bsp.getConfiguration();
    bsp.setJobName("Test Hama Pipes " + program.getName());
    bsp.setBspClass(PipesBSP.class);

    FileInputFormat.setInputPaths(bsp, inputPath);
    FileOutputFormat.setOutputPath(bsp, outputPath);

    BSPJobClient jobClient = new BSPJobClient(conf);

    // Set bspTaskNum
    ClusterStatus cluster = jobClient.getClusterStatus(false);
    assertEquals(numOfGroom, cluster.getGroomServers());
    bsp.setNumBspTask(numBspTasks);// ww w  .j av  a  2  s  .c o m

    // Copy binary to DFS
    Path testExec = new Path(EXAMPLE_TMP_OUTPUT + "testing/bin/application");
    fs.delete(testExec.getParent(), true);
    fs.copyFromLocalFile(program, testExec);

    // Set Executable
    Submitter.setExecutable(conf, fs.makeQualified(testExec).toString());

    // Run bspJob
    Submitter.runJob(bsp);

    LOG.info("Client finishes execution job");

    // check output
    FileStatus[] listStatus = fs.listStatus(outputPath);
    // TODO check whether you can force the number of tasks.
    System.out.println(listStatus.length + ", " + numBspTasks);
}

From source file:org.apache.hcatalog.mapreduce.FileOutputCommitterContainer.java

License:Apache License

private String getPartitionRootLocation(String ptnLocn, int numPtnKeys) {
    if (ptnRootLocation == null) {
        // we only need to calculate it once, it'll be the same for other partitions in this job.
        Path ptnRoot = new Path(ptnLocn);
        for (int i = 0; i < numPtnKeys; i++) {
            //          LOG.info("Getting parent of "+ptnRoot.getName());
            ptnRoot = ptnRoot.getParent();
        }/*from www. j a va 2  s  .c om*/
        ptnRootLocation = ptnRoot.toString();
    }
    //      LOG.info("Returning final parent : "+ptnRootLocation);
    return ptnRootLocation;
}

From source file:org.apache.hcatalog.mapreduce.FileOutputCommitterContainer.java

License:Apache License

/**
 * Move all of the files from the temp directory to the final location
 * @param fs the output file system//  www . j  a va 2s. co m
 * @param file the file to move
 * @param srcDir the source directory
 * @param destDir the target directory
 * @param dryRun - a flag that simply tests if this move would succeed or not based
 *                 on whether other files exist where we're trying to copy
 * @throws java.io.IOException
 */
private void moveTaskOutputs(FileSystem fs, Path file, Path srcDir, Path destDir, final boolean dryRun)
        throws IOException {

    if (file.getName().equals(TEMP_DIR_NAME) || file.getName().equals(LOGS_DIR_NAME)
            || file.getName().equals(SUCCEEDED_FILE_NAME)) {
        return;
    }
    final Path finalOutputPath = getFinalPath(file, srcDir, destDir);
    if (fs.isFile(file)) {
        if (dryRun) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Testing if moving file: [" + file + "] to [" + finalOutputPath
                        + "] would cause a problem");
            }
            if (fs.exists(finalOutputPath)) {
                throw new HCatException(ErrorType.ERROR_MOVE_FAILED,
                        "Data already exists in " + finalOutputPath + ", duplicate publish not possible.");
            }
        } else {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Moving file: [ " + file + "] to [" + finalOutputPath + "]");
            }
            // Make sure the parent directory exists.  It is not an error
            // to recreate an existing directory
            fs.mkdirs(finalOutputPath.getParent());
            if (!fs.rename(file, finalOutputPath)) {
                if (!fs.delete(finalOutputPath, true)) {
                    throw new HCatException(ErrorType.ERROR_MOVE_FAILED,
                            "Failed to delete existing path " + finalOutputPath);
                }
                if (!fs.rename(file, finalOutputPath)) {
                    throw new HCatException(ErrorType.ERROR_MOVE_FAILED,
                            "Failed to move output to " + finalOutputPath);
                }
            }
        }
    } else if (fs.getFileStatus(file).isDir()) {
        FileStatus[] children = fs.listStatus(file);
        FileStatus firstChild = null;
        if (children != null) {
            int index = 0;
            while (index < children.length) {
                if (!children[index].getPath().getName().equals(TEMP_DIR_NAME)
                        && !children[index].getPath().getName().equals(LOGS_DIR_NAME)
                        && !children[index].getPath().getName().equals(SUCCEEDED_FILE_NAME)) {
                    firstChild = children[index];
                    break;
                }
                index++;
            }
        }
        if (firstChild != null && firstChild.isDir()) {
            // If the first child is directory, then rest would be directory too according to HCatalog dir structure
            // recurse in that case
            for (FileStatus child : children) {
                moveTaskOutputs(fs, child.getPath(), srcDir, destDir, dryRun);
            }
        } else {

            if (!dryRun) {
                if (dynamicPartitioningUsed) {
                    // Optimization: if the first child is file, we have reached the leaf directory, move the parent directory itself
                    // instead of moving each file under the directory. See HCATALOG-538

                    final Path parentDir = finalOutputPath.getParent();
                    // Create the directory
                    Path placeholder = new Path(parentDir, "_placeholder");
                    if (fs.mkdirs(parentDir)) {
                        // It is weired but we need a placeholder, 
                        // otherwise rename cannot move file to the right place
                        fs.create(placeholder).close();
                    }
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Moving directory: " + file + " to " + parentDir);
                    }
                    if (!fs.rename(file, parentDir)) {
                        final String msg = "Failed to move file: " + file + " to " + parentDir;
                        LOG.error(msg);
                        throw new HCatException(ErrorType.ERROR_MOVE_FAILED, msg);
                    }
                    fs.delete(placeholder, false);
                } else {
                    // In case of no partition we have to move each file
                    for (FileStatus child : children) {
                        moveTaskOutputs(fs, child.getPath(), srcDir, destDir, dryRun);
                    }
                }
            } else {
                if (fs.exists(finalOutputPath)) {
                    throw new HCatException(ErrorType.ERROR_MOVE_FAILED,
                            "Data already exists in " + finalOutputPath + ", duplicate publish not possible.");
                }
            }
        }
    } else {
        // Should never happen
        final String msg = "Unknown file type being asked to be moved, erroring out";
        throw new HCatException(ErrorType.ERROR_MOVE_FAILED, msg);
    }
}

From source file:org.apache.hcatalog.mapreduce.TestHCatInputFormat.java

License:Apache License

/**
 * Create an input sequence file with 100 records; every 10th record is bad.
 * Load this table into Hive.//from www.ja v  a  2 s.co  m
 */
@Before
@Override
public void setUp() throws Exception {
    super.setUp();
    if (setUpComplete) {
        return;
    }

    Path intStringSeq = new Path(TEST_DATA_DIR + "/data/intString.seq");
    LOG.info("Creating data file: " + intStringSeq);
    SequenceFile.Writer seqFileWriter = SequenceFile.createWriter(intStringSeq.getFileSystem(hiveConf),
            hiveConf, intStringSeq, NullWritable.class, BytesWritable.class);

    ByteArrayOutputStream out = new ByteArrayOutputStream();
    TIOStreamTransport transport = new TIOStreamTransport(out);
    TBinaryProtocol protocol = new TBinaryProtocol(transport);

    for (int i = 1; i <= 100; i++) {
        if (i % 10 == 0) {
            seqFileWriter.append(NullWritable.get(), new BytesWritable("bad record".getBytes()));
        } else {
            out.reset();
            IntString intString = new IntString(i, Integer.toString(i), i);
            intString.write(protocol);
            BytesWritable bytesWritable = new BytesWritable(out.toByteArray());
            seqFileWriter.append(NullWritable.get(), bytesWritable);
        }
    }

    seqFileWriter.close();

    // Now let's load this file into a new Hive table.
    Assert.assertEquals(0, driver.run("drop table if exists test_bad_records").getResponseCode());
    Assert.assertEquals(0,
            driver.run("create table test_bad_records "
                    + "row format serde 'org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer' "
                    + "with serdeproperties ( "
                    + "  'serialization.class'='org.apache.hadoop.hive.serde2.thrift.test.IntString', "
                    + "  'serialization.format'='org.apache.thrift.protocol.TBinaryProtocol') " + "stored as"
                    + "  inputformat 'org.apache.hadoop.mapred.SequenceFileInputFormat'"
                    + "  outputformat 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'")
                    .getResponseCode());
    Assert.assertEquals(0,
            driver.run("load data local inpath '" + intStringSeq.getParent() + "' into table test_bad_records")
                    .getResponseCode());

    setUpComplete = true;
}

From source file:org.apache.hcatalog.mapreduce.TestHCatMultiOutputFormat.java

License:Apache License

/**
 * Simple test case.//from  w w  w  .  j a  v a 2s .  c om
 * <ol>
 * <li>Submits a mapred job which writes out one fixed line to each of the tables</li>
 * <li>uses hive fetch task to read the data and see if it matches what was written</li>
 * </ol>
 *
 * @throws Exception if any error occurs
 */
@Test
public void testOutputFormat() throws Throwable {
    HashMap<String, String> partitionValues = new HashMap<String, String>();
    partitionValues.put("ds", "1");
    partitionValues.put("cluster", "ag");
    ArrayList<OutputJobInfo> infoList = new ArrayList<OutputJobInfo>();
    infoList.add(OutputJobInfo.create("default", tableNames[0], partitionValues));
    infoList.add(OutputJobInfo.create("default", tableNames[1], partitionValues));
    infoList.add(OutputJobInfo.create("default", tableNames[2], partitionValues));

    Job job = new Job(hiveConf, "SampleJob");

    job.setMapperClass(MyMapper.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(MultiOutputFormat.class);
    job.setNumReduceTasks(0);

    JobConfigurer configurer = MultiOutputFormat.createConfigurer(job);

    for (int i = 0; i < tableNames.length; i++) {
        configurer.addOutputFormat(tableNames[i], HCatOutputFormat.class, BytesWritable.class,
                HCatRecord.class);
        HCatOutputFormat.setOutput(configurer.getJob(tableNames[i]), infoList.get(i));
        HCatOutputFormat.setSchema(configurer.getJob(tableNames[i]), schemaMap.get(tableNames[i]));
    }
    configurer.configure();

    Path filePath = createInputFile();
    FileInputFormat.addInputPath(job, filePath);
    Assert.assertTrue(job.waitForCompletion(true));

    ArrayList<String> outputs = new ArrayList<String>();
    for (String tbl : tableNames) {
        outputs.add(getTableData(tbl, "default").get(0));
    }
    Assert.assertEquals("Comparing output of table " + tableNames[0] + " is not correct", outputs.get(0),
            "a,a,1,ag");
    Assert.assertEquals("Comparing output of table " + tableNames[1] + " is not correct", outputs.get(1),
            "a,1,ag");
    Assert.assertEquals("Comparing output of table " + tableNames[2] + " is not correct", outputs.get(2),
            "a,a,extra,1,ag");

    // Check permisssion on partition dirs and files created
    for (int i = 0; i < tableNames.length; i++) {
        Path partitionFile = new Path(warehousedir + "/" + tableNames[i] + "/ds=1/cluster=ag/part-m-00000");
        FileSystem fs = partitionFile.getFileSystem(mrConf);
        Assert.assertEquals("File permissions of table " + tableNames[i] + " is not correct",
                fs.getFileStatus(partitionFile).getPermission(), new FsPermission(tablePerms[i]));
        Assert.assertEquals("File permissions of table " + tableNames[i] + " is not correct",
                fs.getFileStatus(partitionFile.getParent()).getPermission(), new FsPermission(tablePerms[i]));
        Assert.assertEquals("File permissions of table " + tableNames[i] + " is not correct",
                fs.getFileStatus(partitionFile.getParent().getParent()).getPermission(),
                new FsPermission(tablePerms[i]));

    }
    LOG.info("File permissions verified");
}

From source file:org.apache.hcatalog.security.HdfsAuthorizationProvider.java

License:Apache License

/**
 * Checks the permissions for the given path and current user on Hadoop FS. If the given path
 * does not exists, it checks for it's parent folder.
 *///from   w  w  w  . j a v a  2  s. c  om
protected static void checkPermissions(final Configuration conf, final Path path,
        final EnumSet<FsAction> actions) throws IOException, LoginException {

    if (path == null) {
        throw new IllegalArgumentException("path is null");
    }

    HadoopShims shims = ShimLoader.getHadoopShims();
    final UserGroupInformation ugi;
    if (conf.get(PROXY_USER_NAME) != null) {
        ugi = UserGroupInformation.createRemoteUser(conf.get(PROXY_USER_NAME));
    } else {
        ugi = shims.getUGIForConf(conf);
    }
    final String user = shims.getShortUserName(ugi);

    final FileSystem fs = path.getFileSystem(conf);

    if (fs.exists(path)) {
        checkPermissions(fs, path, actions, user, ugi.getGroupNames());
    } else if (path.getParent() != null) {
        // find the ancestor which exists to check it's permissions
        Path par = path.getParent();
        while (par != null) {
            if (fs.exists(par)) {
                break;
            }
            par = par.getParent();
        }

        checkPermissions(fs, par, actions, user, ugi.getGroupNames());
    }
}

From source file:org.apache.hive.hcatalog.mapreduce.FileOutputCommitterContainer.java

License:Apache License

private String getPartitionRootLocation(String ptnLocn, int numPtnKeys) {
    if (customDynamicLocationUsed) {
        return null;
    }/*from   ww  w.j av  a 2 s  .c o  m*/

    if (ptnRootLocation == null) {
        // we only need to calculate it once, it'll be the same for other partitions in this job.
        Path ptnRoot = new Path(ptnLocn);
        for (int i = 0; i < numPtnKeys; i++) {
            //          LOG.info("Getting parent of "+ptnRoot.getName());
            ptnRoot = ptnRoot.getParent();
        }
        ptnRootLocation = ptnRoot.toString();
    }
    //      LOG.info("Returning final parent : "+ptnRootLocation);
    return ptnRootLocation;
}

From source file:org.apache.hive.hcatalog.mapreduce.FileOutputCommitterContainer.java

License:Apache License

/**
 * Move all of the files from the temp directory to the final location
 * @param fs the output file system//from w  w  w  .  j ava2 s. c  o  m
 * @param file the file to move
 * @param srcDir the source directory
 * @param destDir the target directory
 * @param dryRun - a flag that simply tests if this move would succeed or not based
 *                 on whether other files exist where we're trying to copy
 * @throws java.io.IOException
 */
private void moveTaskOutputs(FileSystem fs, Path file, Path srcDir, Path destDir, final boolean dryRun,
        boolean immutable) throws IOException {
    if (LOG.isDebugEnabled()) {
        LOG.debug("moveTaskOutputs " + file + " from: " + srcDir + " to: " + destDir + " dry: " + dryRun
                + " immutable: " + immutable);
    }

    if (dynamicPartitioningUsed) {
        immutable = true; // Making sure we treat dynamic partitioning jobs as if they were immutable.
    }

    if (file.getName().equals(TEMP_DIR_NAME) || file.getName().equals(LOGS_DIR_NAME)
            || file.getName().equals(SUCCEEDED_FILE_NAME)) {
        return;
    }

    final Path finalOutputPath = getFinalPath(fs, file, srcDir, destDir, immutable);
    FileStatus fileStatus = fs.getFileStatus(file);

    if (!fileStatus.isDir()) {
        if (dryRun) {
            if (immutable) {
                // Dryrun checks are meaningless for mutable table - we should always succeed
                // unless there is a runtime IOException.
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Testing if moving file: [" + file + "] to [" + finalOutputPath
                            + "] would cause a problem");
                }
                if (fs.exists(finalOutputPath)) {
                    throw new HCatException(ErrorType.ERROR_MOVE_FAILED,
                            "Data already exists in " + finalOutputPath + ", duplicate publish not possible.");
                }
            }
        } else {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Moving file: [ " + file + "] to [" + finalOutputPath + "]");
            }
            // Make sure the parent directory exists.  It is not an error
            // to recreate an existing directory
            fs.mkdirs(finalOutputPath.getParent());
            if (!fs.rename(file, finalOutputPath)) {
                if (!fs.delete(finalOutputPath, true)) {
                    throw new HCatException(ErrorType.ERROR_MOVE_FAILED,
                            "Failed to delete existing path " + finalOutputPath);
                }
                if (!fs.rename(file, finalOutputPath)) {
                    throw new HCatException(ErrorType.ERROR_MOVE_FAILED,
                            "Failed to move output to " + finalOutputPath);
                }
            }
        }
    } else {

        FileStatus[] children = fs.listStatus(file);
        FileStatus firstChild = null;
        if (children != null) {
            int index = 0;
            while (index < children.length) {
                if (!children[index].getPath().getName().equals(TEMP_DIR_NAME)
                        && !children[index].getPath().getName().equals(LOGS_DIR_NAME)
                        && !children[index].getPath().getName().equals(SUCCEEDED_FILE_NAME)) {
                    firstChild = children[index];
                    break;
                }
                index++;
            }
        }
        if (firstChild != null && firstChild.isDir()) {
            // If the first child is directory, then rest would be directory too according to HCatalog dir structure
            // recurse in that case
            for (FileStatus child : children) {
                moveTaskOutputs(fs, child.getPath(), srcDir, destDir, dryRun, immutable);
            }
        } else {

            if (!dryRun) {
                if (dynamicPartitioningUsed) {

                    // Optimization: if the first child is file, we have reached the leaf directory, move the parent directory itself
                    // instead of moving each file under the directory. See HCATALOG-538
                    // Note for future Append implementation : This optimization is another reason dynamic
                    // partitioning is currently incompatible with append on mutable tables.

                    final Path parentDir = finalOutputPath.getParent();
                    // Create the directory
                    Path placeholder = new Path(parentDir, "_placeholder");
                    if (fs.mkdirs(parentDir)) {
                        // It is weired but we need a placeholder, 
                        // otherwise rename cannot move file to the right place
                        fs.create(placeholder).close();
                    }
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Moving directory: " + file + " to " + parentDir);
                    }

                    // If custom dynamic location provided, need to rename to final output path
                    Path dstPath = !customDynamicLocationUsed ? parentDir : finalOutputPath;
                    if (!fs.rename(file, dstPath)) {
                        final String msg = "Failed to move file: " + file + " to " + dstPath;
                        LOG.error(msg);
                        throw new HCatException(ErrorType.ERROR_MOVE_FAILED, msg);
                    }
                    fs.delete(placeholder, false);
                } else {

                    // In case of no partition we have to move each file
                    for (FileStatus child : children) {
                        moveTaskOutputs(fs, child.getPath(), srcDir, destDir, dryRun, immutable);
                    }

                }

            } else {
                if (immutable && fs.exists(finalOutputPath)
                        && !MetaStoreUtils.isDirEmpty(fs, finalOutputPath)) {

                    throw new HCatException(ErrorType.ERROR_DUPLICATE_PARTITION,
                            "Data already exists in " + finalOutputPath + ", duplicate publish not possible.");
                }

            }
        }
    }
}

From source file:org.apache.hive.hcatalog.mapreduce.HCatFileUtil.java

License:Apache License

public static void setCustomPath(String customPathFormat, OutputJobInfo jobInfo) {
    // find the root of all custom paths from custom pattern. The root is the
    // largest prefix in input pattern string that doesn't match customPathPattern
    Path customPath = new Path(customPathFormat);
    URI customURI = customPath.toUri();
    while (customPath != null && !customPath.toString().isEmpty()) {
        Matcher m = customPathPattern.matcher(customPath.toString());
        if (!m.find()) {
            break;
        }/*from  ww  w .j av  a2 s. co  m*/
        customPath = customPath.getParent();
    }

    URI rootURI = customPath.toUri();
    URI childURI = rootURI.relativize(customURI);
    jobInfo.setCustomDynamicLocation(rootURI.getPath(), childURI.getPath());
}

From source file:org.apache.hive.hcatalog.streaming.TestStreaming.java

License:Apache License

private void corruptDataFile(final String file, final Configuration conf, final int addRemoveBytes)
        throws Exception {
    Path bPath = new Path(file);
    Path cPath = new Path(bPath.getParent(), bPath.getName() + ".corrupt");
    FileSystem fs = bPath.getFileSystem(conf);
    FileStatus fileStatus = fs.getFileStatus(bPath);
    int len = addRemoveBytes == Integer.MIN_VALUE ? 0 : (int) fileStatus.getLen() + addRemoveBytes;
    byte[] buffer = new byte[len];
    FSDataInputStream fdis = fs.open(bPath);
    fdis.readFully(0, buffer, 0, (int) Math.min(fileStatus.getLen(), buffer.length));
    fdis.close();/* w ww  . j  av  a2 s. c o  m*/
    FSDataOutputStream fdos = fs.create(cPath, true);
    fdos.write(buffer, 0, buffer.length);
    fdos.close();
    fs.delete(bPath, false);
    fs.rename(cPath, bPath);
}