List of usage examples for org.apache.hadoop.fs Path SEPARATOR
String SEPARATOR
To view the source code for org.apache.hadoop.fs Path SEPARATOR.
Click Source Link
From source file:jp.ac.u.tokyo.m.dpc.pig.udf.load.path.DirectoryNameElements.java
License:Apache License
public DirectoryNameElements(String aFileNameAndParentDirectoriesString, LinkedHashMap<String, String> aReplaceWordsOriginal, LinkedHashMap<String, String> aReplaceWordsQuoted) { mReplaceWordsSequence = new ArrayList<String>(); int tIndexOfLastDirectoryEnd = aFileNameAndParentDirectoriesString.lastIndexOf(Path.SEPARATOR); if (tIndexOfLastDirectoryEnd == -1) { mCurrentContentNameOriginal = aFileNameAndParentDirectoriesString; setReplaceWordSequence(aFileNameAndParentDirectoriesString, aReplaceWordsOriginal, mReplaceWordsSequence);/*from w w w. ja va 2s .c o m*/ mCurrentContentName = StringUtil.replaceWords(aFileNameAndParentDirectoriesString, aReplaceWordsQuoted); mParentDirectory = null; } else { String tCurrentContentNameOriginal = aFileNameAndParentDirectoriesString .substring(tIndexOfLastDirectoryEnd + 1); mCurrentContentNameOriginal = tCurrentContentNameOriginal; setReplaceWordSequence(tCurrentContentNameOriginal, aReplaceWordsOriginal, mReplaceWordsSequence); mCurrentContentName = StringUtil.replaceWords(tCurrentContentNameOriginal, aReplaceWordsQuoted); mParentDirectory = new DirectoryNameElements( aFileNameAndParentDirectoriesString.substring(0, tIndexOfLastDirectoryEnd), aReplaceWordsOriginal, aReplaceWordsQuoted); } }
From source file:kafka.etl.impl.DataGenerator.java
License:Apache License
protected void generateOffsets() throws Exception { JobConf conf = new JobConf(); conf.set("hadoop.job.ugi", _props.getProperty("hadoop.job.ugi")); conf.setCompressMapOutput(false);/*from w w w .j av a 2s. c o m*/ Path outPath = new Path(_offsetsDir + Path.SEPARATOR + "1.dat"); FileSystem fs = outPath.getFileSystem(conf); if (fs.exists(outPath)) fs.delete(outPath); KafkaETLRequest request = new KafkaETLRequest(_topic, "tcp://" + _uri.getHost() + ":" + _uri.getPort(), 0); System.out.println("Dump " + request.toString() + " to " + outPath.toUri().toString()); byte[] bytes = request.toString().getBytes("UTF-8"); KafkaETLKey dummyKey = new KafkaETLKey(); SequenceFile.setCompressionType(conf, SequenceFile.CompressionType.NONE); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, outPath, KafkaETLKey.class, BytesWritable.class); writer.append(dummyKey, new BytesWritable(bytes)); writer.close(); }
From source file:kafka.etl.KafkaETLUtils.java
License:Apache License
public static String getFileName(Path path) throws IOException { String fullname = path.toUri().toString(); String[] parts = fullname.split(Path.SEPARATOR); if (parts.length < 1) throw new IOException("Invalid path " + fullname); return parts[parts.length - 1]; }
From source file:kafka.etl.tweet.producer.TweetProducer.java
License:Apache License
protected void generateOffsets() throws Exception { JobConf conf = new JobConf(); java.util.Date date = new java.util.Date(); conf.set("hadoop.job.ugi", _props.getProperty("hadoop.job.ugi")); conf.setCompressMapOutput(false);/*from w w w . j a v a 2 s . co m*/ Calendar cal = Calendar.getInstance(); Path outPath = new Path(_offsetsDir + Path.SEPARATOR + "1.dat"); FileSystem fs = outPath.getFileSystem(conf); if (fs.exists(outPath)) fs.delete(outPath); KafkaETLRequest request = new KafkaETLRequest(_topic, "tcp://" + _uri.getHost() + ":" + _uri.getPort(), 0); System.out.println("Dump " + request.toString() + " to " + outPath.toUri().toString()); byte[] bytes = request.toString().getBytes("UTF-8"); KafkaETLKey dummyKey = new KafkaETLKey(); SequenceFile.setDefaultCompressionType(conf, SequenceFile.CompressionType.NONE); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, outPath, KafkaETLKey.class, BytesWritable.class); writer.append(dummyKey, new BytesWritable(bytes)); writer.close(); }
From source file:ml.shifu.guagua.hadoop.ZooKeeperMasterInterceptor.java
License:Apache License
private String getZookeeperServerFolder(MasterContext<MASTER_RESULT, WORKER_RESULT> context) { String defaultZooKeeperServePath = new StringBuilder(200).append("tmp").append(Path.SEPARATOR) .append("_guagua").append(Path.SEPARATOR).append(context.getAppId()).append(Path.SEPARATOR) .toString();/*from w w w. j av a2s .c o m*/ String hdfsZookeeperServerPath = context.getProps() .getProperty(GuaguaConstants.GUAGUA_ZK_CLUSTER_SERVER_FOLDER, defaultZooKeeperServePath); return hdfsZookeeperServerPath; }
From source file:ml.shifu.guagua.hadoop.ZooKeeperWorkerInterceptor.java
License:Apache License
private String getZookeeperServerFolder(WorkerContext<MASTER_RESULT, WORKER_RESULT> context) { String defaultZooKeeperServePath = new StringBuilder(200).append("tmp").append(Path.SEPARATOR) .append("_guagua").append(Path.SEPARATOR).append(context.getAppId()).append(Path.SEPARATOR) .toString();//from ww w .j a v a2s . c o m String hdfsZookeeperServerPath = context.getProps() .getProperty(GuaguaConstants.GUAGUA_ZK_CLUSTER_SERVER_FOLDER, defaultZooKeeperServePath); return hdfsZookeeperServerPath; }
From source file:ml.shifu.guagua.yarn.util.InputSplitUtils.java
License:Apache License
/** * Expand folder to all files to support all files in that folder *///from w ww. j a va2 s .c om public static String expandInputFolder(Configuration conf) throws IOException { Path path = new Path(conf.get(GuaguaYarnConstants.GUAGUA_YARN_INPUT_DIR)); String newPath = path.toString(); return FileSystem.get(conf).getFileStatus(path).isDirectory() ? newPath + Path.SEPARATOR + "*" : newPath; }
From source file:ml.shifu.shifu.core.processor.ExportModelProcessor.java
License:Apache License
private double getColumnMetric(ColumnConfig config, PostCorrelationMetric metric) throws IOException { if (metric == null || metric.equals(PostCorrelationMetric.IV)) { // default is iv, if no PostCorrelationMetric specified return (config.getIv() == null ? Double.NaN : config.getIv()); } else if (metric.equals(PostCorrelationMetric.KS)) { return (config.getKs() == null ? Double.NaN : config.getKs()); } else if (metric.equals(PostCorrelationMetric.SE)) { if (this.seStatsMap == null) { SourceType source = this.modelConfig.getDataSet().getSource(); String varSelectMSEOutputPath = pathFinder.getVarSelectMSEOutputPath(source); this.seStatsMap = readSEValuesToMap( varSelectMSEOutputPath + Path.SEPARATOR + Constants.SHIFU_VARSELECT_SE_OUTPUT_NAME + "-*", source);/* w w w. j av a 2 s. c o m*/ } return this.seStatsMap.get(config.getColumnNum()).getRms(); } return -1.0d; }
From source file:ml.shifu.shifu.core.processor.InitModelProcessor.java
License:Apache License
private Map<Integer, Long> getDistinctCountMap(SourceType source, String autoTypePath) throws IOException { String outputFilePattern = autoTypePath + Path.SEPARATOR + "part-*"; if (!ShifuFileUtils.isFileExists(outputFilePattern, source)) { throw new RuntimeException("Auto type checking output file not exist."); }/* w w w . j av a 2 s .c o m*/ Map<Integer, Long> distinctCountMap = new HashMap<Integer, Long>(); List<Scanner> scanners = null; try { // here only works for 1 reducer FileStatus[] globStatus = ShifuFileUtils.getFileSystemBySourceType(source) .globStatus(new Path(outputFilePattern)); if (globStatus == null || globStatus.length == 0) { throw new RuntimeException("Auto type checking output file not exist."); } scanners = ShifuFileUtils.getDataScanners(globStatus[0].getPath().toString(), source); Scanner scanner = scanners.get(0); String str = null; while (scanner.hasNext()) { str = scanner.nextLine().trim(); if (str.contains(TAB_STR)) { String[] splits = str.split(TAB_STR); distinctCountMap.put(Integer.valueOf(splits[0]), Long.valueOf(splits[1])); } } return distinctCountMap; } finally { if (scanners != null) { for (Scanner scanner : scanners) { if (scanner != null) { scanner.close(); } } } } }
From source file:ml.shifu.shifu.core.processor.VarSelectModelProcessor.java
License:Apache License
private void postProcess4SEVarSelect(SourceType source, String varSelectMSEOutputPath) throws IOException { String outputFilePattern = varSelectMSEOutputPath + Path.SEPARATOR + "part-r-*"; if (!ShifuFileUtils.isFileExists(outputFilePattern, source)) { throw new RuntimeException("Var select MSE stats output file not exist."); }// ww w . ja v a2 s . c om int selectCnt = 0; for (ColumnConfig config : super.columnConfigList) { if (config.isFinalSelect()) { config.setFinalSelect(false); } // enable ForceSelect if (config.isForceSelect()) { config.setFinalSelect(true); selectCnt++; log.info("Variable {} is selected, since it is in ForceSelect list.", config.getColumnName()); } } List<Scanner> scanners = null; try { // here only works for 1 reducer FileStatus[] globStatus = ShifuFileUtils.getFileSystemBySourceType(source) .globStatus(new Path(outputFilePattern)); if (globStatus == null || globStatus.length == 0) { throw new RuntimeException("Var select MSE stats output file not exist."); } scanners = ShifuFileUtils.getDataScanners(globStatus[0].getPath().toString(), source); String str = null; int targetCnt = 0; // total variable count that user want to select List<Integer> candidateColumnIdList = new ArrayList<Integer>(); Scanner scanner = scanners.get(0); while (scanner.hasNext()) { ++targetCnt; str = scanner.nextLine().trim(); candidateColumnIdList.add(Integer.parseInt(str)); } int i = 0; // try to select another (targetCnt - selectCnt) variables, but we need to exclude those // force-selected variables while (selectCnt < targetCnt && i < targetCnt) { Integer columnId = candidateColumnIdList.get(i++); ColumnConfig columnConfig = this.columnConfigList.get(columnId); if (!columnConfig.isForceSelect() && !columnConfig.isForceRemove()) { columnConfig.setFinalSelect(true); selectCnt++; log.info("Variable {} is selected.", columnConfig.getColumnName()); } } log.info("{} variables are selected.", selectCnt); log.info( "Sensitivity analysis report is in {}/{}-* file(s) with format 'column_index\tcolumn_name\tmean\trms\tvariance'.", varSelectMSEOutputPath, Constants.SHIFU_VARSELECT_SE_OUTPUT_NAME); } finally { if (scanners != null) { for (Scanner scanner : scanners) { if (scanner != null) { scanner.close(); } } } } this.saveColumnConfigListAndColumnStats(); this.syncDataToHdfs(this.modelConfig.getDataSet().getSource()); }