List of usage examples for org.apache.hadoop.fs Path getParent
public Path getParent()
From source file:org.apache.hama.pipes.TestPipes.java
License:Apache License
static void runProgram(BSPJob bsp, Path program, Path inputPath, Path outputPath, int numBspTasks, int numOfGroom) throws IOException, ClassNotFoundException, InterruptedException { HamaConfiguration conf = (HamaConfiguration) bsp.getConfiguration(); bsp.setJobName("Test Hama Pipes " + program.getName()); bsp.setBspClass(PipesBSP.class); FileInputFormat.setInputPaths(bsp, inputPath); FileOutputFormat.setOutputPath(bsp, outputPath); BSPJobClient jobClient = new BSPJobClient(conf); // Set bspTaskNum ClusterStatus cluster = jobClient.getClusterStatus(false); assertEquals(numOfGroom, cluster.getGroomServers()); bsp.setNumBspTask(numBspTasks);// ww w .j av a 2 s .c o m // Copy binary to DFS Path testExec = new Path(EXAMPLE_TMP_OUTPUT + "testing/bin/application"); fs.delete(testExec.getParent(), true); fs.copyFromLocalFile(program, testExec); // Set Executable Submitter.setExecutable(conf, fs.makeQualified(testExec).toString()); // Run bspJob Submitter.runJob(bsp); LOG.info("Client finishes execution job"); // check output FileStatus[] listStatus = fs.listStatus(outputPath); // TODO check whether you can force the number of tasks. System.out.println(listStatus.length + ", " + numBspTasks); }
From source file:org.apache.hcatalog.mapreduce.FileOutputCommitterContainer.java
License:Apache License
private String getPartitionRootLocation(String ptnLocn, int numPtnKeys) { if (ptnRootLocation == null) { // we only need to calculate it once, it'll be the same for other partitions in this job. Path ptnRoot = new Path(ptnLocn); for (int i = 0; i < numPtnKeys; i++) { // LOG.info("Getting parent of "+ptnRoot.getName()); ptnRoot = ptnRoot.getParent(); }/*from www. j a va 2 s .c om*/ ptnRootLocation = ptnRoot.toString(); } // LOG.info("Returning final parent : "+ptnRootLocation); return ptnRootLocation; }
From source file:org.apache.hcatalog.mapreduce.FileOutputCommitterContainer.java
License:Apache License
/** * Move all of the files from the temp directory to the final location * @param fs the output file system// www . j a va 2s. co m * @param file the file to move * @param srcDir the source directory * @param destDir the target directory * @param dryRun - a flag that simply tests if this move would succeed or not based * on whether other files exist where we're trying to copy * @throws java.io.IOException */ private void moveTaskOutputs(FileSystem fs, Path file, Path srcDir, Path destDir, final boolean dryRun) throws IOException { if (file.getName().equals(TEMP_DIR_NAME) || file.getName().equals(LOGS_DIR_NAME) || file.getName().equals(SUCCEEDED_FILE_NAME)) { return; } final Path finalOutputPath = getFinalPath(file, srcDir, destDir); if (fs.isFile(file)) { if (dryRun) { if (LOG.isDebugEnabled()) { LOG.debug("Testing if moving file: [" + file + "] to [" + finalOutputPath + "] would cause a problem"); } if (fs.exists(finalOutputPath)) { throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Data already exists in " + finalOutputPath + ", duplicate publish not possible."); } } else { if (LOG.isDebugEnabled()) { LOG.debug("Moving file: [ " + file + "] to [" + finalOutputPath + "]"); } // Make sure the parent directory exists. It is not an error // to recreate an existing directory fs.mkdirs(finalOutputPath.getParent()); if (!fs.rename(file, finalOutputPath)) { if (!fs.delete(finalOutputPath, true)) { throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Failed to delete existing path " + finalOutputPath); } if (!fs.rename(file, finalOutputPath)) { throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Failed to move output to " + finalOutputPath); } } } } else if (fs.getFileStatus(file).isDir()) { FileStatus[] children = fs.listStatus(file); FileStatus firstChild = null; if (children != null) { int index = 0; while (index < children.length) { if (!children[index].getPath().getName().equals(TEMP_DIR_NAME) && !children[index].getPath().getName().equals(LOGS_DIR_NAME) && !children[index].getPath().getName().equals(SUCCEEDED_FILE_NAME)) { firstChild = children[index]; break; } index++; } } if (firstChild != null && firstChild.isDir()) { // If the first child is directory, then rest would be directory too according to HCatalog dir structure // recurse in that case for (FileStatus child : children) { moveTaskOutputs(fs, child.getPath(), srcDir, destDir, dryRun); } } else { if (!dryRun) { if (dynamicPartitioningUsed) { // Optimization: if the first child is file, we have reached the leaf directory, move the parent directory itself // instead of moving each file under the directory. See HCATALOG-538 final Path parentDir = finalOutputPath.getParent(); // Create the directory Path placeholder = new Path(parentDir, "_placeholder"); if (fs.mkdirs(parentDir)) { // It is weired but we need a placeholder, // otherwise rename cannot move file to the right place fs.create(placeholder).close(); } if (LOG.isDebugEnabled()) { LOG.debug("Moving directory: " + file + " to " + parentDir); } if (!fs.rename(file, parentDir)) { final String msg = "Failed to move file: " + file + " to " + parentDir; LOG.error(msg); throw new HCatException(ErrorType.ERROR_MOVE_FAILED, msg); } fs.delete(placeholder, false); } else { // In case of no partition we have to move each file for (FileStatus child : children) { moveTaskOutputs(fs, child.getPath(), srcDir, destDir, dryRun); } } } else { if (fs.exists(finalOutputPath)) { throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Data already exists in " + finalOutputPath + ", duplicate publish not possible."); } } } } else { // Should never happen final String msg = "Unknown file type being asked to be moved, erroring out"; throw new HCatException(ErrorType.ERROR_MOVE_FAILED, msg); } }
From source file:org.apache.hcatalog.mapreduce.TestHCatInputFormat.java
License:Apache License
/** * Create an input sequence file with 100 records; every 10th record is bad. * Load this table into Hive.//from www.ja v a 2 s.co m */ @Before @Override public void setUp() throws Exception { super.setUp(); if (setUpComplete) { return; } Path intStringSeq = new Path(TEST_DATA_DIR + "/data/intString.seq"); LOG.info("Creating data file: " + intStringSeq); SequenceFile.Writer seqFileWriter = SequenceFile.createWriter(intStringSeq.getFileSystem(hiveConf), hiveConf, intStringSeq, NullWritable.class, BytesWritable.class); ByteArrayOutputStream out = new ByteArrayOutputStream(); TIOStreamTransport transport = new TIOStreamTransport(out); TBinaryProtocol protocol = new TBinaryProtocol(transport); for (int i = 1; i <= 100; i++) { if (i % 10 == 0) { seqFileWriter.append(NullWritable.get(), new BytesWritable("bad record".getBytes())); } else { out.reset(); IntString intString = new IntString(i, Integer.toString(i), i); intString.write(protocol); BytesWritable bytesWritable = new BytesWritable(out.toByteArray()); seqFileWriter.append(NullWritable.get(), bytesWritable); } } seqFileWriter.close(); // Now let's load this file into a new Hive table. Assert.assertEquals(0, driver.run("drop table if exists test_bad_records").getResponseCode()); Assert.assertEquals(0, driver.run("create table test_bad_records " + "row format serde 'org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer' " + "with serdeproperties ( " + " 'serialization.class'='org.apache.hadoop.hive.serde2.thrift.test.IntString', " + " 'serialization.format'='org.apache.thrift.protocol.TBinaryProtocol') " + "stored as" + " inputformat 'org.apache.hadoop.mapred.SequenceFileInputFormat'" + " outputformat 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'") .getResponseCode()); Assert.assertEquals(0, driver.run("load data local inpath '" + intStringSeq.getParent() + "' into table test_bad_records") .getResponseCode()); setUpComplete = true; }
From source file:org.apache.hcatalog.mapreduce.TestHCatMultiOutputFormat.java
License:Apache License
/** * Simple test case.//from w w w . j a v a 2s . c om * <ol> * <li>Submits a mapred job which writes out one fixed line to each of the tables</li> * <li>uses hive fetch task to read the data and see if it matches what was written</li> * </ol> * * @throws Exception if any error occurs */ @Test public void testOutputFormat() throws Throwable { HashMap<String, String> partitionValues = new HashMap<String, String>(); partitionValues.put("ds", "1"); partitionValues.put("cluster", "ag"); ArrayList<OutputJobInfo> infoList = new ArrayList<OutputJobInfo>(); infoList.add(OutputJobInfo.create("default", tableNames[0], partitionValues)); infoList.add(OutputJobInfo.create("default", tableNames[1], partitionValues)); infoList.add(OutputJobInfo.create("default", tableNames[2], partitionValues)); Job job = new Job(hiveConf, "SampleJob"); job.setMapperClass(MyMapper.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(MultiOutputFormat.class); job.setNumReduceTasks(0); JobConfigurer configurer = MultiOutputFormat.createConfigurer(job); for (int i = 0; i < tableNames.length; i++) { configurer.addOutputFormat(tableNames[i], HCatOutputFormat.class, BytesWritable.class, HCatRecord.class); HCatOutputFormat.setOutput(configurer.getJob(tableNames[i]), infoList.get(i)); HCatOutputFormat.setSchema(configurer.getJob(tableNames[i]), schemaMap.get(tableNames[i])); } configurer.configure(); Path filePath = createInputFile(); FileInputFormat.addInputPath(job, filePath); Assert.assertTrue(job.waitForCompletion(true)); ArrayList<String> outputs = new ArrayList<String>(); for (String tbl : tableNames) { outputs.add(getTableData(tbl, "default").get(0)); } Assert.assertEquals("Comparing output of table " + tableNames[0] + " is not correct", outputs.get(0), "a,a,1,ag"); Assert.assertEquals("Comparing output of table " + tableNames[1] + " is not correct", outputs.get(1), "a,1,ag"); Assert.assertEquals("Comparing output of table " + tableNames[2] + " is not correct", outputs.get(2), "a,a,extra,1,ag"); // Check permisssion on partition dirs and files created for (int i = 0; i < tableNames.length; i++) { Path partitionFile = new Path(warehousedir + "/" + tableNames[i] + "/ds=1/cluster=ag/part-m-00000"); FileSystem fs = partitionFile.getFileSystem(mrConf); Assert.assertEquals("File permissions of table " + tableNames[i] + " is not correct", fs.getFileStatus(partitionFile).getPermission(), new FsPermission(tablePerms[i])); Assert.assertEquals("File permissions of table " + tableNames[i] + " is not correct", fs.getFileStatus(partitionFile.getParent()).getPermission(), new FsPermission(tablePerms[i])); Assert.assertEquals("File permissions of table " + tableNames[i] + " is not correct", fs.getFileStatus(partitionFile.getParent().getParent()).getPermission(), new FsPermission(tablePerms[i])); } LOG.info("File permissions verified"); }
From source file:org.apache.hcatalog.security.HdfsAuthorizationProvider.java
License:Apache License
/** * Checks the permissions for the given path and current user on Hadoop FS. If the given path * does not exists, it checks for it's parent folder. *///from w w w . j a v a 2 s. c om protected static void checkPermissions(final Configuration conf, final Path path, final EnumSet<FsAction> actions) throws IOException, LoginException { if (path == null) { throw new IllegalArgumentException("path is null"); } HadoopShims shims = ShimLoader.getHadoopShims(); final UserGroupInformation ugi; if (conf.get(PROXY_USER_NAME) != null) { ugi = UserGroupInformation.createRemoteUser(conf.get(PROXY_USER_NAME)); } else { ugi = shims.getUGIForConf(conf); } final String user = shims.getShortUserName(ugi); final FileSystem fs = path.getFileSystem(conf); if (fs.exists(path)) { checkPermissions(fs, path, actions, user, ugi.getGroupNames()); } else if (path.getParent() != null) { // find the ancestor which exists to check it's permissions Path par = path.getParent(); while (par != null) { if (fs.exists(par)) { break; } par = par.getParent(); } checkPermissions(fs, par, actions, user, ugi.getGroupNames()); } }
From source file:org.apache.hive.hcatalog.mapreduce.FileOutputCommitterContainer.java
License:Apache License
private String getPartitionRootLocation(String ptnLocn, int numPtnKeys) { if (customDynamicLocationUsed) { return null; }/*from ww w.j av a 2 s .c o m*/ if (ptnRootLocation == null) { // we only need to calculate it once, it'll be the same for other partitions in this job. Path ptnRoot = new Path(ptnLocn); for (int i = 0; i < numPtnKeys; i++) { // LOG.info("Getting parent of "+ptnRoot.getName()); ptnRoot = ptnRoot.getParent(); } ptnRootLocation = ptnRoot.toString(); } // LOG.info("Returning final parent : "+ptnRootLocation); return ptnRootLocation; }
From source file:org.apache.hive.hcatalog.mapreduce.FileOutputCommitterContainer.java
License:Apache License
/** * Move all of the files from the temp directory to the final location * @param fs the output file system//from w w w . j ava2 s. c o m * @param file the file to move * @param srcDir the source directory * @param destDir the target directory * @param dryRun - a flag that simply tests if this move would succeed or not based * on whether other files exist where we're trying to copy * @throws java.io.IOException */ private void moveTaskOutputs(FileSystem fs, Path file, Path srcDir, Path destDir, final boolean dryRun, boolean immutable) throws IOException { if (LOG.isDebugEnabled()) { LOG.debug("moveTaskOutputs " + file + " from: " + srcDir + " to: " + destDir + " dry: " + dryRun + " immutable: " + immutable); } if (dynamicPartitioningUsed) { immutable = true; // Making sure we treat dynamic partitioning jobs as if they were immutable. } if (file.getName().equals(TEMP_DIR_NAME) || file.getName().equals(LOGS_DIR_NAME) || file.getName().equals(SUCCEEDED_FILE_NAME)) { return; } final Path finalOutputPath = getFinalPath(fs, file, srcDir, destDir, immutable); FileStatus fileStatus = fs.getFileStatus(file); if (!fileStatus.isDir()) { if (dryRun) { if (immutable) { // Dryrun checks are meaningless for mutable table - we should always succeed // unless there is a runtime IOException. if (LOG.isDebugEnabled()) { LOG.debug("Testing if moving file: [" + file + "] to [" + finalOutputPath + "] would cause a problem"); } if (fs.exists(finalOutputPath)) { throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Data already exists in " + finalOutputPath + ", duplicate publish not possible."); } } } else { if (LOG.isDebugEnabled()) { LOG.debug("Moving file: [ " + file + "] to [" + finalOutputPath + "]"); } // Make sure the parent directory exists. It is not an error // to recreate an existing directory fs.mkdirs(finalOutputPath.getParent()); if (!fs.rename(file, finalOutputPath)) { if (!fs.delete(finalOutputPath, true)) { throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Failed to delete existing path " + finalOutputPath); } if (!fs.rename(file, finalOutputPath)) { throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Failed to move output to " + finalOutputPath); } } } } else { FileStatus[] children = fs.listStatus(file); FileStatus firstChild = null; if (children != null) { int index = 0; while (index < children.length) { if (!children[index].getPath().getName().equals(TEMP_DIR_NAME) && !children[index].getPath().getName().equals(LOGS_DIR_NAME) && !children[index].getPath().getName().equals(SUCCEEDED_FILE_NAME)) { firstChild = children[index]; break; } index++; } } if (firstChild != null && firstChild.isDir()) { // If the first child is directory, then rest would be directory too according to HCatalog dir structure // recurse in that case for (FileStatus child : children) { moveTaskOutputs(fs, child.getPath(), srcDir, destDir, dryRun, immutable); } } else { if (!dryRun) { if (dynamicPartitioningUsed) { // Optimization: if the first child is file, we have reached the leaf directory, move the parent directory itself // instead of moving each file under the directory. See HCATALOG-538 // Note for future Append implementation : This optimization is another reason dynamic // partitioning is currently incompatible with append on mutable tables. final Path parentDir = finalOutputPath.getParent(); // Create the directory Path placeholder = new Path(parentDir, "_placeholder"); if (fs.mkdirs(parentDir)) { // It is weired but we need a placeholder, // otherwise rename cannot move file to the right place fs.create(placeholder).close(); } if (LOG.isDebugEnabled()) { LOG.debug("Moving directory: " + file + " to " + parentDir); } // If custom dynamic location provided, need to rename to final output path Path dstPath = !customDynamicLocationUsed ? parentDir : finalOutputPath; if (!fs.rename(file, dstPath)) { final String msg = "Failed to move file: " + file + " to " + dstPath; LOG.error(msg); throw new HCatException(ErrorType.ERROR_MOVE_FAILED, msg); } fs.delete(placeholder, false); } else { // In case of no partition we have to move each file for (FileStatus child : children) { moveTaskOutputs(fs, child.getPath(), srcDir, destDir, dryRun, immutable); } } } else { if (immutable && fs.exists(finalOutputPath) && !MetaStoreUtils.isDirEmpty(fs, finalOutputPath)) { throw new HCatException(ErrorType.ERROR_DUPLICATE_PARTITION, "Data already exists in " + finalOutputPath + ", duplicate publish not possible."); } } } } }
From source file:org.apache.hive.hcatalog.mapreduce.HCatFileUtil.java
License:Apache License
public static void setCustomPath(String customPathFormat, OutputJobInfo jobInfo) { // find the root of all custom paths from custom pattern. The root is the // largest prefix in input pattern string that doesn't match customPathPattern Path customPath = new Path(customPathFormat); URI customURI = customPath.toUri(); while (customPath != null && !customPath.toString().isEmpty()) { Matcher m = customPathPattern.matcher(customPath.toString()); if (!m.find()) { break; }/*from ww w .j av a2 s. co m*/ customPath = customPath.getParent(); } URI rootURI = customPath.toUri(); URI childURI = rootURI.relativize(customURI); jobInfo.setCustomDynamicLocation(rootURI.getPath(), childURI.getPath()); }
From source file:org.apache.hive.hcatalog.streaming.TestStreaming.java
License:Apache License
private void corruptDataFile(final String file, final Configuration conf, final int addRemoveBytes) throws Exception { Path bPath = new Path(file); Path cPath = new Path(bPath.getParent(), bPath.getName() + ".corrupt"); FileSystem fs = bPath.getFileSystem(conf); FileStatus fileStatus = fs.getFileStatus(bPath); int len = addRemoveBytes == Integer.MIN_VALUE ? 0 : (int) fileStatus.getLen() + addRemoveBytes; byte[] buffer = new byte[len]; FSDataInputStream fdis = fs.open(bPath); fdis.readFully(0, buffer, 0, (int) Math.min(fileStatus.getLen(), buffer.length)); fdis.close();/* w ww . j av a2 s. c o m*/ FSDataOutputStream fdos = fs.create(cPath, true); fdos.write(buffer, 0, buffer.length); fdos.close(); fs.delete(bPath, false); fs.rename(cPath, bPath); }