Example usage for org.apache.hadoop.fs Path SEPARATOR

List of usage examples for org.apache.hadoop.fs Path SEPARATOR

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path SEPARATOR.

Prototype

String SEPARATOR

To view the source code for org.apache.hadoop.fs Path SEPARATOR.

Click Source Link

Document

The directory separator, a slash.

Usage

From source file:jp.ac.u.tokyo.m.dpc.pig.udf.load.path.DirectoryNameElements.java

License:Apache License

public DirectoryNameElements(String aFileNameAndParentDirectoriesString,
        LinkedHashMap<String, String> aReplaceWordsOriginal,
        LinkedHashMap<String, String> aReplaceWordsQuoted) {
    mReplaceWordsSequence = new ArrayList<String>();
    int tIndexOfLastDirectoryEnd = aFileNameAndParentDirectoriesString.lastIndexOf(Path.SEPARATOR);
    if (tIndexOfLastDirectoryEnd == -1) {
        mCurrentContentNameOriginal = aFileNameAndParentDirectoriesString;
        setReplaceWordSequence(aFileNameAndParentDirectoriesString, aReplaceWordsOriginal,
                mReplaceWordsSequence);/*from   w  w w. ja  va  2s  .c  o m*/
        mCurrentContentName = StringUtil.replaceWords(aFileNameAndParentDirectoriesString, aReplaceWordsQuoted);
        mParentDirectory = null;
    } else {
        String tCurrentContentNameOriginal = aFileNameAndParentDirectoriesString
                .substring(tIndexOfLastDirectoryEnd + 1);
        mCurrentContentNameOriginal = tCurrentContentNameOriginal;
        setReplaceWordSequence(tCurrentContentNameOriginal, aReplaceWordsOriginal, mReplaceWordsSequence);
        mCurrentContentName = StringUtil.replaceWords(tCurrentContentNameOriginal, aReplaceWordsQuoted);
        mParentDirectory = new DirectoryNameElements(
                aFileNameAndParentDirectoriesString.substring(0, tIndexOfLastDirectoryEnd),
                aReplaceWordsOriginal, aReplaceWordsQuoted);
    }
}

From source file:kafka.etl.impl.DataGenerator.java

License:Apache License

protected void generateOffsets() throws Exception {
    JobConf conf = new JobConf();
    conf.set("hadoop.job.ugi", _props.getProperty("hadoop.job.ugi"));
    conf.setCompressMapOutput(false);/*from   w w  w .j  av  a  2s. c o m*/
    Path outPath = new Path(_offsetsDir + Path.SEPARATOR + "1.dat");
    FileSystem fs = outPath.getFileSystem(conf);
    if (fs.exists(outPath))
        fs.delete(outPath);

    KafkaETLRequest request = new KafkaETLRequest(_topic, "tcp://" + _uri.getHost() + ":" + _uri.getPort(), 0);

    System.out.println("Dump " + request.toString() + " to " + outPath.toUri().toString());
    byte[] bytes = request.toString().getBytes("UTF-8");
    KafkaETLKey dummyKey = new KafkaETLKey();
    SequenceFile.setCompressionType(conf, SequenceFile.CompressionType.NONE);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, outPath, KafkaETLKey.class,
            BytesWritable.class);
    writer.append(dummyKey, new BytesWritable(bytes));
    writer.close();
}

From source file:kafka.etl.KafkaETLUtils.java

License:Apache License

public static String getFileName(Path path) throws IOException {
    String fullname = path.toUri().toString();
    String[] parts = fullname.split(Path.SEPARATOR);
    if (parts.length < 1)
        throw new IOException("Invalid path " + fullname);
    return parts[parts.length - 1];
}

From source file:kafka.etl.tweet.producer.TweetProducer.java

License:Apache License

protected void generateOffsets() throws Exception {
    JobConf conf = new JobConf();
    java.util.Date date = new java.util.Date();
    conf.set("hadoop.job.ugi", _props.getProperty("hadoop.job.ugi"));
    conf.setCompressMapOutput(false);/*from   w  w  w . j  a v  a 2 s  . co m*/
    Calendar cal = Calendar.getInstance();
    Path outPath = new Path(_offsetsDir + Path.SEPARATOR + "1.dat");
    FileSystem fs = outPath.getFileSystem(conf);
    if (fs.exists(outPath))
        fs.delete(outPath);

    KafkaETLRequest request = new KafkaETLRequest(_topic, "tcp://" + _uri.getHost() + ":" + _uri.getPort(), 0);

    System.out.println("Dump " + request.toString() + " to " + outPath.toUri().toString());

    byte[] bytes = request.toString().getBytes("UTF-8");
    KafkaETLKey dummyKey = new KafkaETLKey();
    SequenceFile.setDefaultCompressionType(conf, SequenceFile.CompressionType.NONE);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, outPath, KafkaETLKey.class,
            BytesWritable.class);
    writer.append(dummyKey, new BytesWritable(bytes));
    writer.close();
}

From source file:ml.shifu.guagua.hadoop.ZooKeeperMasterInterceptor.java

License:Apache License

private String getZookeeperServerFolder(MasterContext<MASTER_RESULT, WORKER_RESULT> context) {
    String defaultZooKeeperServePath = new StringBuilder(200).append("tmp").append(Path.SEPARATOR)
            .append("_guagua").append(Path.SEPARATOR).append(context.getAppId()).append(Path.SEPARATOR)
            .toString();/*from w w  w.  j av  a2s  .c o m*/
    String hdfsZookeeperServerPath = context.getProps()
            .getProperty(GuaguaConstants.GUAGUA_ZK_CLUSTER_SERVER_FOLDER, defaultZooKeeperServePath);
    return hdfsZookeeperServerPath;
}

From source file:ml.shifu.guagua.hadoop.ZooKeeperWorkerInterceptor.java

License:Apache License

private String getZookeeperServerFolder(WorkerContext<MASTER_RESULT, WORKER_RESULT> context) {
    String defaultZooKeeperServePath = new StringBuilder(200).append("tmp").append(Path.SEPARATOR)
            .append("_guagua").append(Path.SEPARATOR).append(context.getAppId()).append(Path.SEPARATOR)
            .toString();//from  ww  w  .j  a  v a2s  .  c o  m
    String hdfsZookeeperServerPath = context.getProps()
            .getProperty(GuaguaConstants.GUAGUA_ZK_CLUSTER_SERVER_FOLDER, defaultZooKeeperServePath);
    return hdfsZookeeperServerPath;
}

From source file:ml.shifu.guagua.yarn.util.InputSplitUtils.java

License:Apache License

/**
 * Expand folder to all files to support all files in that folder
 *///from w  ww. j  a  va2 s  .c  om
public static String expandInputFolder(Configuration conf) throws IOException {
    Path path = new Path(conf.get(GuaguaYarnConstants.GUAGUA_YARN_INPUT_DIR));
    String newPath = path.toString();
    return FileSystem.get(conf).getFileStatus(path).isDirectory() ? newPath + Path.SEPARATOR + "*" : newPath;
}

From source file:ml.shifu.shifu.core.processor.ExportModelProcessor.java

License:Apache License

private double getColumnMetric(ColumnConfig config, PostCorrelationMetric metric) throws IOException {
    if (metric == null || metric.equals(PostCorrelationMetric.IV)) {
        // default is iv, if no PostCorrelationMetric specified
        return (config.getIv() == null ? Double.NaN : config.getIv());
    } else if (metric.equals(PostCorrelationMetric.KS)) {
        return (config.getKs() == null ? Double.NaN : config.getKs());
    } else if (metric.equals(PostCorrelationMetric.SE)) {
        if (this.seStatsMap == null) {
            SourceType source = this.modelConfig.getDataSet().getSource();
            String varSelectMSEOutputPath = pathFinder.getVarSelectMSEOutputPath(source);
            this.seStatsMap = readSEValuesToMap(
                    varSelectMSEOutputPath + Path.SEPARATOR + Constants.SHIFU_VARSELECT_SE_OUTPUT_NAME + "-*",
                    source);/*  w w w. j  av a  2 s.  c  o m*/
        }

        return this.seStatsMap.get(config.getColumnNum()).getRms();
    }
    return -1.0d;
}

From source file:ml.shifu.shifu.core.processor.InitModelProcessor.java

License:Apache License

private Map<Integer, Long> getDistinctCountMap(SourceType source, String autoTypePath) throws IOException {
    String outputFilePattern = autoTypePath + Path.SEPARATOR + "part-*";
    if (!ShifuFileUtils.isFileExists(outputFilePattern, source)) {
        throw new RuntimeException("Auto type checking output file not exist.");
    }/* w  w  w .  j  av a 2  s  .c o  m*/

    Map<Integer, Long> distinctCountMap = new HashMap<Integer, Long>();
    List<Scanner> scanners = null;
    try {
        // here only works for 1 reducer
        FileStatus[] globStatus = ShifuFileUtils.getFileSystemBySourceType(source)
                .globStatus(new Path(outputFilePattern));
        if (globStatus == null || globStatus.length == 0) {
            throw new RuntimeException("Auto type checking output file not exist.");
        }
        scanners = ShifuFileUtils.getDataScanners(globStatus[0].getPath().toString(), source);
        Scanner scanner = scanners.get(0);
        String str = null;
        while (scanner.hasNext()) {
            str = scanner.nextLine().trim();
            if (str.contains(TAB_STR)) {
                String[] splits = str.split(TAB_STR);
                distinctCountMap.put(Integer.valueOf(splits[0]), Long.valueOf(splits[1]));
            }
        }
        return distinctCountMap;
    } finally {
        if (scanners != null) {
            for (Scanner scanner : scanners) {
                if (scanner != null) {
                    scanner.close();
                }
            }
        }
    }
}

From source file:ml.shifu.shifu.core.processor.VarSelectModelProcessor.java

License:Apache License

private void postProcess4SEVarSelect(SourceType source, String varSelectMSEOutputPath) throws IOException {
    String outputFilePattern = varSelectMSEOutputPath + Path.SEPARATOR + "part-r-*";
    if (!ShifuFileUtils.isFileExists(outputFilePattern, source)) {
        throw new RuntimeException("Var select MSE stats output file not exist.");
    }// ww  w . ja v a2  s  . c om

    int selectCnt = 0;
    for (ColumnConfig config : super.columnConfigList) {
        if (config.isFinalSelect()) {
            config.setFinalSelect(false);
        }

        // enable ForceSelect
        if (config.isForceSelect()) {
            config.setFinalSelect(true);
            selectCnt++;
            log.info("Variable {} is selected, since it is in ForceSelect list.", config.getColumnName());
        }
    }

    List<Scanner> scanners = null;
    try {
        // here only works for 1 reducer
        FileStatus[] globStatus = ShifuFileUtils.getFileSystemBySourceType(source)
                .globStatus(new Path(outputFilePattern));
        if (globStatus == null || globStatus.length == 0) {
            throw new RuntimeException("Var select MSE stats output file not exist.");
        }
        scanners = ShifuFileUtils.getDataScanners(globStatus[0].getPath().toString(), source);
        String str = null;
        int targetCnt = 0; // total variable count that user want to select
        List<Integer> candidateColumnIdList = new ArrayList<Integer>();
        Scanner scanner = scanners.get(0);
        while (scanner.hasNext()) {
            ++targetCnt;
            str = scanner.nextLine().trim();
            candidateColumnIdList.add(Integer.parseInt(str));
        }

        int i = 0;
        // try to select another (targetCnt - selectCnt) variables, but we need to exclude those
        // force-selected variables
        while (selectCnt < targetCnt && i < targetCnt) {
            Integer columnId = candidateColumnIdList.get(i++);
            ColumnConfig columnConfig = this.columnConfigList.get(columnId);
            if (!columnConfig.isForceSelect() && !columnConfig.isForceRemove()) {
                columnConfig.setFinalSelect(true);
                selectCnt++;
                log.info("Variable {} is selected.", columnConfig.getColumnName());
            }
        }

        log.info("{} variables are selected.", selectCnt);
        log.info(
                "Sensitivity analysis report is in {}/{}-* file(s) with format 'column_index\tcolumn_name\tmean\trms\tvariance'.",
                varSelectMSEOutputPath, Constants.SHIFU_VARSELECT_SE_OUTPUT_NAME);
    } finally {
        if (scanners != null) {
            for (Scanner scanner : scanners) {
                if (scanner != null) {
                    scanner.close();
                }
            }
        }
    }

    this.saveColumnConfigListAndColumnStats();
    this.syncDataToHdfs(this.modelConfig.getDataSet().getSource());
}