List of usage examples for org.apache.hadoop.fs FileStatus getPath
public Path getPath()
From source file:colossal.pipe.ColFile.java
License:Apache License
public void clearAndPrepareOutput(Configuration conf) { try {/*from w w w . ja v a 2s .c o m*/ Path dfsPath = new Path(path); FileSystem fs = dfsPath.getFileSystem(conf); if (fs.exists(dfsPath)) { FileStatus[] statuses = fs.listStatus(dfsPath); for (FileStatus status : statuses) { if (status.isDir()) { if (!status.getPath().toString().endsWith("/_logs") && !status.getPath().toString().endsWith("/_temporary")) { throw new IllegalArgumentException( "Trying to overwrite directory with child directories: " + path); } } } } else { fs.mkdirs(dfsPath); } fs.delete(dfsPath, true); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:com.ailk.oci.ocnosql.tools.load.mutiple.MutipleColumnImportTsv.java
License:Apache License
public boolean execute(Connection conn, OciTableRef table) { if (conn == null) { msg = "Connection object must not be null"; retMap.put(FAILED_REASON, msg);//w w w. ja v a 2 s .c om LOG.error(msg); throw new ClientRuntimeException(msg); } Configuration conf = conn.getConf(); if (table == null) { msg = "table must not be null"; retMap.put(FAILED_REASON, msg); LOG.error(msg); throw new ClientRuntimeException(msg); } String tableName = table.getName(); String column = table.getColumns(); String seperator = table.getSeperator(); String inputPath = table.getInputPath(); String tmpOutPut = table.getImportTmpOutputPath(); String skipBadLine = table.getSkipBadLine(); String compressor = table.getCompressor(); String rowkeyUnique = table.getRowKeyUnique(); String algoColumn = table.getAlgoColumn(); String rowkeyGenerator = table.getRowkeyGenerator(); String rowkeyColumn = table.getRowkeyColumn(); String callback = table.getCallback(); if (StringUtils.isEmpty(tableName)) { msg = "No " + CommonConstants.TABLE_NAME + " specified. Please check config,then try again after refreshing cache"; retMap.put(FAILED_REASON, msg); LOG.error(msg); throw new ConfigException(msg); } conf.set(CommonConstants.TABLE_NAME, tableName); if (StringUtils.isEmpty(seperator)) { msg = "No " + CommonConstants.SEPARATOR + " specified. Please check config,then try again after refreshing cache"; retMap.put(FAILED_REASON, msg); LOG.error(msg); throw new ConfigException(msg); } conf.set(CommonConstants.SEPARATOR, seperator); // Make sure columns are specified String columns[] = StringUtils.splitByWholeSeparatorPreserveAllTokens(column, ","); if (columns == null) { msg = "No " + CommonConstants.COLUMNS + " specified. Please check config,then try again after refreshing cache"; retMap.put(FAILED_REASON, msg); LOG.error(msg); throw new ConfigException(msg); } conf.set(CommonConstants.COLUMNS, column); // int rowkeysFound = 0; // for (String col : columns) { // if (col.equals(CommonConstants.ROW_KEY)) // rowkeysFound++; // } // if (rowkeysFound != 1) { // msg = "Must specify exactly one column as " + CommonConstants.ROW_KEY + ". Please check config,then again after refreshing cache"; // retMap.put(FAILED_REASON, msg); // LOG.error(msg); // throw new ConfigException(msg); // } if (columns.length < 2) { msg = "One or more columns in addition to the row key are required. Please check config,then try again after refreshing cache"; retMap.put(FAILED_REASON, msg); LOG.error(msg); throw new ConfigException(msg); } String[] columnTmp = null; for (int i = 0; i < columns.length; i++) { columnTmp = columns[i].split(":"); if (columnTmp != null && columnTmp.length == 2) { break; } } conf.set(CommonConstants.SINGLE_FAMILY, columnTmp[0]); if (!StringUtils.isEmpty(skipBadLine)) { conf.set(CommonConstants.SKIPBADLINE, skipBadLine); } //? conf.set(CommonConstants.COMPRESSOR, (compressor == null) ? DEFAULT_COMPRESSOR : compressor); conf.set(CommonConstants.ALGOCOLUMN, algoColumn); conf.set(CommonConstants.ROWKEY_GENERATOR, rowkeyGenerator); conf.set(CommonConstants.ROWKEYCOLUMN, rowkeyColumn); conf.set(CommonConstants.ROWKEYCALLBACK, callback); boolean ret = false; Counter failCounter = null; try { hbaseAdmin = new HBaseAdmin(conf); TableConfiguration.getInstance().writeTableConfiguration(tableName, column, seperator, conf); conf.set(CommonConstants.TABLE_NAME, tableName); String hdfs_url = conf.get(CommonConstants.HDFS_URL); FileSystem fs = FileSystem.get(URI.create(hdfs_url), conf); FileStatus[] fileStatusArr = fs.listStatus(new Path(hdfs_url + inputPath)); if (fileStatusArr != null && fileStatusArr.length > 0) { if (fileStatusArr[0].isFile()) { ret = (Boolean) runJob(conf, tableName, inputPath, tmpOutPut)[0]; } int inputPathNum = 0; for (FileStatus everyInputPath : fileStatusArr) { Path inputPathStr = everyInputPath.getPath(); String absoluteInputPathStr = inputPath + "/" + inputPathStr.getName(); boolean retCode = (Boolean) runJob(conf, tableName, absoluteInputPathStr, tmpOutPut + "/" + inputPathStr.getName())[0]; if (retCode) { String base64Seperator = conf.get(CommonConstants.SEPARATOR); conf.set(CommonConstants.SEPARATOR, new String(Base64.decode(base64Seperator))); //?separator if (inputPathNum == fileStatusArr.length - 1) { ret = true; } inputPathNum++; continue; } else { // ret = false; inputPathNum++; break; } } } } catch (Exception e) { msg = "job execute failed,nested exception is " + e; retMap.put(FAILED_REASON, msg); LOG.error(msg); throw new ClientRuntimeException(msg); } if (!ret) { msg = "execute job failed,please check map/reduce log in jobtracker page"; retMap.put(FAILED_REASON, msg); return false; } return true; }
From source file:com.ailk.oci.ocnosql.tools.load.mutiple.MutipleColumnImportTsv.java
License:Apache License
/** * Main entry point./*ww w .j av a 2 s .com*/ * * @param args The command line parameters. * @throws Exception When running the job fails. */ public static void main(String[] args) throws Exception { long inputLineNum = 0L; long badLineNum = 0L; long outputLineNum = 0L; Configuration conf = HBaseConfiguration.create(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { usage("Wrong number of arguments: " + otherArgs.length); System.exit(-1); } // Make sure columns are specified String columns = conf.get(CommonConstants.COLUMNS); if (columns == null) { usage("No columns specified. Please specify with -D" + CommonConstants.COLUMNS + "=..."); System.exit(-1); } String seperator = conf.get(CommonConstants.SEPARATOR); if (StringUtils.isEmpty(seperator)) { conf.set(CommonConstants.SEPARATOR, CommonConstants.DEFAULT_SEPARATOR); seperator = CommonConstants.DEFAULT_SEPARATOR; } // Make sure one or more columns are specified if (columns.split(",").length < 2) { usage("One or more columns in addition to the row key are required"); System.exit(-1); } //make sure tableName and columns are upper to used by phoenix. columns = columns.toUpperCase(); String notNeedLoadColumnsStr = conf.get(CommonConstants.NOTNEEDLOADCOLUMNS); String notNeedLoadColumns = null; if (!StringUtils.isEmpty(notNeedLoadColumnsStr)) { notNeedLoadColumns = notNeedLoadColumnsStr.toUpperCase(); conf.set(CommonConstants.NOTNEEDLOADCOLUMNS, notNeedLoadColumns); } String writeTableConfigColumns = getWriteConfigColumn(columns, notNeedLoadColumns); hbaseAdmin = new HBaseAdmin(conf); String tableName = otherArgs[0].toUpperCase(); String inputPath = otherArgs[1]; String tmpOutputPath = conf.get(CommonConstants.IMPORT_TMP_OUTPUT); conf.set(CommonConstants.TABLE_NAME, tableName); conf.set(CommonConstants.COLUMNS, columns); String pathStr = conf.get(CommonConstants.HDFS_URL) + inputPath; FileSystem fs = FileSystem.get(URI.create(conf.get(CommonConstants.HDFS_URL)), conf); FileStatus[] fileStatusArr = fs.listStatus(new Path(pathStr)); if (fileStatusArr != null && fileStatusArr.length > 0) { TableConfiguration.getInstance().writeTableConfiguration(tableName, writeTableConfigColumns, seperator, conf); if (fileStatusArr[0].isFile()) { Object[] resObjs = runJob(conf, tableName, inputPath, tmpOutputPath); inputLineNum = (Long) resObjs[1]; outputLineNum = (Long) resObjs[2]; badLineNum = (Long) resObjs[3]; LOG.info("Bulkload Result={inputLine:" + inputLineNum + ",outputLine:" + outputLineNum + ",badLine:" + badLineNum + "}"); boolean result = (Boolean) resObjs[0]; if (result) { System.exit(0); } System.exit(-1); } for (FileStatus everyInputPath : fileStatusArr) { Path inputPathStr = everyInputPath.getPath(); String absoluteInputPathStr = inputPath + "/" + inputPathStr.getName(); FileStatus[] subFileStatusArr = fs .listStatus(new Path(conf.get(CommonConstants.HDFS_URL) + absoluteInputPathStr)); if (subFileStatusArr == null || subFileStatusArr.length == 0)//?job continue; Object[] resObjs = runJob(conf, tableName, absoluteInputPathStr, tmpOutputPath + "/" + inputPathStr.getName()); boolean ret = (Boolean) resObjs[0]; if (ret) { inputLineNum += (Long) resObjs[1]; outputLineNum += (Long) resObjs[2]; badLineNum += (Long) resObjs[3]; String seperatorStr = conf.get(CommonConstants.SEPARATOR); conf.set(CommonConstants.SEPARATOR, new String(Base64.decode(seperatorStr))); //?separator continue; } else { // LOG.error("Bulkload Result={inputLine:" + inputLineNum + ",outputLine:" + outputLineNum + ",badLine:" + badLineNum + "}"); System.exit(-1); } } LOG.info("Bulkload Result={inputLine:" + inputLineNum + ",outputLine:" + outputLineNum + ",badLine:" + badLineNum + "}"); } LOG.info("Bulkload Result={inputLine:" + inputLineNum + ",outputLine:" + outputLineNum + ",badLine:" + badLineNum + "}"); System.exit(0);// }
From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImportTsv.java
License:Apache License
public boolean execute(Connection conn, OciTableRef table) { if (conn == null) { msg = "Connection object must not be null"; retMap.put(FAILED_REASON, msg);/* w w w. ja v a 2 s. co m*/ LOG.error(msg); throw new ClientRuntimeException(msg); } Configuration conf = conn.getConf(); if (table == null) { msg = "table must not be null"; retMap.put(FAILED_REASON, msg); LOG.error(msg); throw new ClientRuntimeException(msg); } String tableName = table.getName(); String column = table.getColumns(); String seperator = table.getSeperator(); String inputPath = table.getInputPath(); String tmpOutPut = table.getImportTmpOutputPath(); String skipBadLine = table.getSkipBadLine(); String compressor = table.getCompressor(); String rowkeyUnique = table.getRowKeyUnique(); String algoColumn = table.getAlgoColumn(); String rowkeyGenerator = table.getRowkeyGenerator(); String rowkeyColumn = table.getRowkeyColumn(); String callback = table.getCallback(); if (StringUtils.isEmpty(tableName)) { msg = "No " + CommonConstants.TABLE_NAME + " specified. Please check config,then try again after refreshing cache"; retMap.put(FAILED_REASON, msg); LOG.error(msg); throw new ConfigException(msg); } conf.set(CommonConstants.TABLE_NAME, tableName); // if(StringUtils.isEmpty(seperator)){ // msg = "No " + CommonConstants.SEPARATOR + " specified. Please check config,then try again after refreshing cache"; // retMap.put(FAILED_REASON, msg); // LOG.error(msg); // throw new ConfigException(msg); // } // conf.set(CommonConstants.SEPARATOR, seperator); if (StringUtils.isEmpty(seperator)) { conf.set(CommonConstants.SEPARATOR, CommonConstants.DEFAULT_SEPARATOR); } // Make sure columns are specified, splited by "," String columns[] = StringUtils.splitByWholeSeparatorPreserveAllTokens(column, ","); if (columns == null) { msg = "No " + CommonConstants.COLUMNS + " specified. Please check config,then try again after refreshing cache"; retMap.put(FAILED_REASON, msg); LOG.error(msg); throw new ConfigException(msg); } conf.set(CommonConstants.COLUMNS, column); if (StringUtils.isEmpty(rowkeyColumn) && StringUtils.isEmpty(algoColumn)) { msg = "No " + CommonConstants.ROW_KEY + " rule specified. Please check config,then try again after refreshing cache"; retMap.put(FAILED_REASON, msg); LOG.error(msg); throw new ConfigException(msg); } conf.set(CommonConstants.SEPARATOR, seperator); // int rowkeysFound = 0; // for (String col : columns) { // if (col.equals(CommonConstants.ROW_KEY)) // rowkeysFound++; // } // //HBASE_ROW_KEY? // if (rowkeysFound != 1) { // msg = "Must specify exactly one column as " + CommonConstants.ROW_KEY + ". Please check config,then again after refreshing cache"; // retMap.put(FAILED_REASON, msg); // LOG.error(msg); // throw new ConfigException(msg); // } //HBASE_ROW_KEY?column if (columns.length < 2) { msg = "One or more columns in addition to the row key are required. Please check config,then try again after refreshing cache"; retMap.put(FAILED_REASON, msg); LOG.error(msg); throw new ConfigException(msg); } //":" String[] columnTmp = null; for (int i = 0; i < columns.length; i++) { columnTmp = columns[i].split(":"); if (columnTmp != null && columnTmp.length == 2) { break; } } //??? conf.set(CommonConstants.SINGLE_FAMILY, columnTmp[0]); //? if (!StringUtils.isEmpty(skipBadLine)) { conf.set(CommonConstants.SKIPBADLINE, skipBadLine); } //? conf.set(CommonConstants.COMPRESSOR, (compressor == null) ? DEFAULT_COMPRESSOR : compressor); conf.set(CommonConstants.ALGOCOLUMN, algoColumn); conf.set(CommonConstants.ROWKEY_GENERATOR, rowkeyGenerator); conf.set(CommonConstants.ROWKEYCOLUMN, rowkeyColumn); conf.set(CommonConstants.ROWKEYCALLBACK, callback); boolean ret = false; // Counter failCounter = null; try { hbaseAdmin = new HBaseAdmin(conf); TableConfiguration.getInstance().writeTableConfiguration(tableName, column, seperator, conf); // Job job = createSubmittableJob(conf, tableName, inputPath, tmpOutPut); // //job // ret = job.waitForCompletion(true); // Counters counters = job.getCounters(); // for (String groupName : counters.getGroupNames()) { // failCounter = counters.findCounter(groupName, "NUM_FAILED_MAPS"); // if(failCounter != null){ // break; // } // } conf.set(CommonConstants.TABLE_NAME, tableName); String hdfs_url = conf.get(CommonConstants.HDFS_URL); FileSystem fs = FileSystem.get(URI.create(hdfs_url), conf); FileStatus[] fileStatusArr = fs.listStatus(new Path(hdfs_url + inputPath)); if (fileStatusArr != null && fileStatusArr.length > 0) { if (fileStatusArr[0].isFile()) { ret = runJob(conf, tableName, inputPath, tmpOutPut); } int inputPathNum = 0; for (FileStatus everyInputPath : fileStatusArr) { Path inputPathStr = everyInputPath.getPath(); String absoluteInputPathStr = inputPath + "/" + inputPathStr.getName(); boolean retCode = runJob(conf, tableName, absoluteInputPathStr, tmpOutPut + "/" + inputPathStr.getName()); if (retCode) { String base64Seperator = conf.get(CommonConstants.SEPARATOR); conf.set(CommonConstants.SEPARATOR, new String(Base64.decode(base64Seperator))); //?separator if (inputPathNum == fileStatusArr.length - 1) { ret = true; } inputPathNum++; continue; } else { // ret = false; inputPathNum++; break; } } } } catch (Exception e) { msg = "job execute failed,nested exception is " + e; retMap.put(FAILED_REASON, msg); LOG.error(msg); throw new ClientRuntimeException(msg); } boolean result = true; if (!ret) { msg = "execute job failed,please check map/reduce log in jobtracker page"; retMap.put(FAILED_REASON, msg); result = false; } /* else { String[] params = new String[2]; params[0] = tmpOutPut; params[1] = tableName; int retrunCode = -1; try { //bulkload complete retrunCode = ToolRunner.run(new LoadIncrementalHFiles(conf), params); } catch (Exception e) { msg = "job execute failed,nested exception is " + e; retMap.put(FAILED_REASON, msg); LOG.error(msg); throw new ClientRuntimeException(msg); } if(retrunCode != 0) result = false; } */ return result; }
From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImportTsv.java
License:Apache License
/** * Main entry point.// w ww . j a v a2 s. com * * @param args The command line parameters. * @throws Exception When running the job fails. */ public static void main(String[] args) throws Exception { Map<String, String> map = getProperty(); if (map == null || map.size() == 0) { System.err.println("Error: read conf file " + CONF_FILE + " occur error."); System.exit(0); } Configuration conf = Connection.getInstance().getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { usage("Wrong number of arguments: " + otherArgs.length); System.exit(-1); } // Make sure columns are specified String columns = conf.get(CommonConstants.COLUMNS); if (columns == null) { usage("No columns specified. Please specify with -D" + CommonConstants.COLUMNS + "=..."); System.exit(-1); } String seperator = conf.get(CommonConstants.SEPARATOR); if (seperator == null) { conf.set(CommonConstants.SEPARATOR, CommonConstants.DEFAULT_SEPARATOR); seperator = CommonConstants.DEFAULT_SEPARATOR; } // Make sure one or more columns are specified if (columns.split(",").length < 2) { usage("One or more columns in addition to the row key are required"); System.exit(-1); } //make sure tableName and columns are upper to used by phoenix. columns = columns.toUpperCase(); String tableName = otherArgs[0].toUpperCase(); String inputPath = otherArgs[1]; hbaseAdmin = new HBaseAdmin(conf); String tmpOutputPath = conf.get(CommonConstants.IMPORT_TMP_OUTPUT); conf.set(CommonConstants.TABLE_NAME, tableName); conf.set(CommonConstants.COLUMNS, columns); String hdfs_url = conf.get(CommonConstants.HDFS_URL); FileSystem fs = FileSystem.get(URI.create(hdfs_url), conf); FileStatus[] fileStatusArr = fs.listStatus(new Path(hdfs_url + inputPath)); if (fileStatusArr != null && fileStatusArr.length > 0) { TableConfiguration.getInstance().writeTableConfiguration(tableName, columns, seperator, conf); if (fileStatusArr[0].isFile()) { //?? boolean result = runJob(conf, tableName, inputPath, tmpOutputPath); if (result) { System.exit(0); } System.exit(-1); } for (FileStatus everyInputPath : fileStatusArr) { //?? Path inputPathStr = everyInputPath.getPath(); String absoluteInputPathStr = inputPath + "/" + inputPathStr.getName(); FileStatus[] subFileStatusArr = fs.listStatus(new Path(hdfs_url + absoluteInputPathStr)); if (subFileStatusArr == null || subFileStatusArr.length == 0)//?job continue; boolean ret = runJob(conf, tableName, absoluteInputPathStr, tmpOutputPath + "/" + inputPathStr.getName()); if (ret) { String base64Seperator = conf.get(CommonConstants.SEPARATOR); conf.set(CommonConstants.SEPARATOR, new String(Base64.decode(base64Seperator))); //?separator continue; } else // System.exit(-1); } } System.exit(0); // }
From source file:com.alexholmes.hadooputils.combine.avro.mapred.CombineAvroInputFormat.java
License:Apache License
@Override protected FileStatus[] listStatus(JobConf job) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); for (FileStatus file : super.listStatus(job)) if (file.getPath().getName().endsWith(AvroOutputFormat.EXT)) result.add(file);/*from ww w .jav a 2 s. c o m*/ return result.toArray(new FileStatus[0]); }
From source file:com.alexholmes.hadooputils.combine.seqfile.mapred.CombineSequenceFileInputFormat.java
License:Apache License
@Override protected FileStatus[] listStatus(JobConf job) throws IOException { FileStatus[] files = super.listStatus(job); for (int i = 0; i < files.length; i++) { FileStatus file = files[i]; if (file.isDir()) { // it's a MapFile Path dataFile = new Path(file.getPath(), MapFile.DATA_FILE_NAME); FileSystem fs = file.getPath().getFileSystem(job); // use the data file files[i] = fs.getFileStatus(dataFile); }//w ww . j a v a2 s . co m } return files; }
From source file:com.alexholmes.hadooputils.combine.seqfile.mapreduce.CombineSequenceFileInputFormat.java
License:Apache License
@Override @SuppressWarnings("unchecked") protected List<FileStatus> listStatus(JobContext job) throws IOException { List<FileStatus> files = super.listStatus(job); int len = files.size(); for (int i = 0; i < len; ++i) { FileStatus file = files.get(i); if (file.isDir()) { // it's a MapFile Path p = file.getPath(); FileSystem fs = p.getFileSystem(job.getConfiguration()); // use the data file files.set(i, fs.getFileStatus(new Path(p, MapFile.DATA_FILE_NAME))); }/*from w w w . java 2 s. c o m*/ } return files; }
From source file:com.alexholmes.hadooputils.test.TextIOJobBuilder.java
License:Apache License
/** * Called after the MapReduce job has completed, to verify that the outputs * generated by the MapReduce job align with the expected outputs that were * set with calls to {@link #addExpectedOutput(String)} and * {@link #addExpectedOutput(String...)}. * * @return a reference to this object// ww w . j av a 2 s.com * @throws IOException if something goes wrong */ public TextIOJobBuilder verifyResults() throws IOException { FileStatus[] outputFiles = fs.listStatus(outputPath, new PathFilter() { @Override public boolean accept(final Path path) { return path.getName().startsWith("part"); } }); System.out.println("Output files: " + StringUtils.join(outputFiles)); int i = 0; for (FileStatus file : outputFiles) { List<String> actualLines = FileUtils.readLines(fs, file.getPath()); for (String actualLine : actualLines) { String expectedLine = expectedOutputs.get(i++); assertEquals(expectedLine, actualLine); } } assertEquals(expectedOutputs.size(), i); return this; }
From source file:com.alexholmes.hdfsslurper.FileSystemManager.java
License:Apache License
public FileStatus getInboundFile() throws IOException, InterruptedException { try {/* w ww. j av a 2s . c om*/ inboundDirLock.lockInterruptibly(); for (FileStatus fs : config.getSrcFs().listStatus(config.getSrcDir())) { if (!fs.isDir()) { if (fs.getPath().getName().startsWith(".")) { log.debug("event#Ignoring hidden file '" + fs.getPath() + "'"); continue; } // move file into work directory // Path workPath = new Path(config.getWorkDir(), fs.getPath().getName()); config.getSrcFs().rename(fs.getPath(), workPath); return config.getSrcFs().getFileStatus(workPath); } } return null; } finally { inboundDirLock.unlock(); } }