Example usage for org.apache.hadoop.fs Path getName

List of usage examples for org.apache.hadoop.fs Path getName

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getName.

Prototype

public String getName() 

Source Link

Document

Returns the final component of this path.

Usage

From source file:co.nubetech.hiho.mapreduce.lib.input.FileStreamRecordReader.java

License:Apache License

@Override
public boolean nextKeyValue() throws IOException {
    logger.debug("Inside nextKeyValue");
    if (!isRead) {
        Path file = split.getPath();
        logger.debug("Path is " + file);
        fileName = file.getName();
        FileSystem fs = file.getFileSystem(context.getConfiguration());
        stream = fs.open(file);/* ww  w.  ja v a2 s .c  o  m*/
        logger.debug("Opened stream");
        isRead = true;
        return true;
    }
    return false;
}

From source file:co.nubetech.hiho.mapreduce.lib.output.FTPTextOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {

    Configuration conf = job.getConfiguration();

    String ip = conf.get(HIHOConf.FTP_ADDRESS);
    String portno = conf.get(HIHOConf.FTP_PORT);
    String usr = conf.get(HIHOConf.FTP_USER);
    String pwd = conf.get(HIHOConf.FTP_PASSWORD);
    String dir = getOutputPath(job).toString();
    System.out.println("\n\ninside ftpoutputformat" + ip + " " + portno + " " + usr + " " + pwd + " " + dir);
    String keyValueSeparator = conf.get("mapred.textoutputformat.separator", "\t");
    FTPClient f = new FTPClient();
    f.connect(ip, Integer.parseInt(portno));
    f.login(usr, pwd);//from w w  w.j  a  va 2  s . c  o m
    f.changeWorkingDirectory(dir);
    f.setFileType(FTP.BINARY_FILE_TYPE);

    boolean isCompressed = getCompressOutput(job);
    CompressionCodec codec = null;
    String extension = "";
    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }
    Path file = getDefaultWorkFile(job, extension);
    FileSystem fs = file.getFileSystem(conf);
    String filename = file.getName();
    if (!isCompressed) {
        // FSDataOutputStream fileOut = fs.create(file, false);
        OutputStream os = f.appendFileStream(filename);
        DataOutputStream fileOut = new DataOutputStream(os);
        return new FTPLineRecordWriter<K, V>(fileOut, new String(keyValueSeparator), f);

    } else {
        // FSDataOutputStream fileOut = fs.create(file, false);
        OutputStream os = f.appendFileStream(filename);
        DataOutputStream fileOut = new DataOutputStream(os);
        return new FTPLineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)),
                keyValueSeparator, f);
    }
}

From source file:com.ailk.oci.ocnosql.tools.load.mutiple.MutipleColumnImportTsv.java

License:Apache License

public boolean execute(Connection conn, OciTableRef table) {
    if (conn == null) {
        msg = "Connection object must not be null";
        retMap.put(FAILED_REASON, msg);/*from   ww  w  .ja  v  a  2 s .co m*/
        LOG.error(msg);
        throw new ClientRuntimeException(msg);
    }
    Configuration conf = conn.getConf();
    if (table == null) {
        msg = "table must not be null";
        retMap.put(FAILED_REASON, msg);
        LOG.error(msg);
        throw new ClientRuntimeException(msg);
    }

    String tableName = table.getName();
    String column = table.getColumns();
    String seperator = table.getSeperator();
    String inputPath = table.getInputPath();
    String tmpOutPut = table.getImportTmpOutputPath();
    String skipBadLine = table.getSkipBadLine();
    String compressor = table.getCompressor();
    String rowkeyUnique = table.getRowKeyUnique();
    String algoColumn = table.getAlgoColumn();
    String rowkeyGenerator = table.getRowkeyGenerator();
    String rowkeyColumn = table.getRowkeyColumn();
    String callback = table.getCallback();

    if (StringUtils.isEmpty(tableName)) {
        msg = "No " + CommonConstants.TABLE_NAME
                + " specified. Please check config,then try again after refreshing cache";
        retMap.put(FAILED_REASON, msg);
        LOG.error(msg);
        throw new ConfigException(msg);
    }
    conf.set(CommonConstants.TABLE_NAME, tableName);

    if (StringUtils.isEmpty(seperator)) {
        msg = "No " + CommonConstants.SEPARATOR
                + " specified. Please check config,then try again after refreshing cache";
        retMap.put(FAILED_REASON, msg);
        LOG.error(msg);
        throw new ConfigException(msg);
    }
    conf.set(CommonConstants.SEPARATOR, seperator);

    // Make sure columns are specified
    String columns[] = StringUtils.splitByWholeSeparatorPreserveAllTokens(column, ",");
    if (columns == null) {
        msg = "No " + CommonConstants.COLUMNS
                + " specified. Please check config,then try again after refreshing cache";
        retMap.put(FAILED_REASON, msg);
        LOG.error(msg);
        throw new ConfigException(msg);
    }
    conf.set(CommonConstants.COLUMNS, column);

    //      int rowkeysFound = 0;
    //      for (String col : columns) {
    //         if (col.equals(CommonConstants.ROW_KEY))
    //            rowkeysFound++;
    //      }
    //      if (rowkeysFound != 1) {
    //         msg = "Must specify exactly one column as " + CommonConstants.ROW_KEY + ". Please check config,then again after refreshing cache";
    //         retMap.put(FAILED_REASON, msg);
    //         LOG.error(msg);
    //         throw new ConfigException(msg);
    //      }

    if (columns.length < 2) {
        msg = "One or more columns in addition to the row key are required. Please check config,then try again after refreshing cache";
        retMap.put(FAILED_REASON, msg);
        LOG.error(msg);
        throw new ConfigException(msg);
    }

    String[] columnTmp = null;
    for (int i = 0; i < columns.length; i++) {
        columnTmp = columns[i].split(":");
        if (columnTmp != null && columnTmp.length == 2) {
            break;
        }
    }
    conf.set(CommonConstants.SINGLE_FAMILY, columnTmp[0]);
    if (!StringUtils.isEmpty(skipBadLine)) {
        conf.set(CommonConstants.SKIPBADLINE, skipBadLine);
    }
    //?
    conf.set(CommonConstants.COMPRESSOR, (compressor == null) ? DEFAULT_COMPRESSOR : compressor);
    conf.set(CommonConstants.ALGOCOLUMN, algoColumn);
    conf.set(CommonConstants.ROWKEY_GENERATOR, rowkeyGenerator);
    conf.set(CommonConstants.ROWKEYCOLUMN, rowkeyColumn);
    conf.set(CommonConstants.ROWKEYCALLBACK, callback);

    boolean ret = false;
    Counter failCounter = null;
    try {
        hbaseAdmin = new HBaseAdmin(conf);
        TableConfiguration.getInstance().writeTableConfiguration(tableName, column, seperator, conf);
        conf.set(CommonConstants.TABLE_NAME, tableName);
        String hdfs_url = conf.get(CommonConstants.HDFS_URL);
        FileSystem fs = FileSystem.get(URI.create(hdfs_url), conf);
        FileStatus[] fileStatusArr = fs.listStatus(new Path(hdfs_url + inputPath));
        if (fileStatusArr != null && fileStatusArr.length > 0) {
            if (fileStatusArr[0].isFile()) {
                ret = (Boolean) runJob(conf, tableName, inputPath, tmpOutPut)[0];
            }
            int inputPathNum = 0;
            for (FileStatus everyInputPath : fileStatusArr) {
                Path inputPathStr = everyInputPath.getPath();
                String absoluteInputPathStr = inputPath + "/" + inputPathStr.getName();
                boolean retCode = (Boolean) runJob(conf, tableName, absoluteInputPathStr,
                        tmpOutPut + "/" + inputPathStr.getName())[0];
                if (retCode) {
                    String base64Seperator = conf.get(CommonConstants.SEPARATOR);
                    conf.set(CommonConstants.SEPARATOR, new String(Base64.decode(base64Seperator))); //?separator
                    if (inputPathNum == fileStatusArr.length - 1) {
                        ret = true;
                    }
                    inputPathNum++;
                    continue;
                } else { //
                    ret = false;
                    inputPathNum++;
                    break;
                }
            }
        }
    } catch (Exception e) {
        msg = "job execute failed,nested exception is " + e;
        retMap.put(FAILED_REASON, msg);
        LOG.error(msg);
        throw new ClientRuntimeException(msg);
    }

    if (!ret) {
        msg = "execute job failed,please check map/reduce log in jobtracker page";
        retMap.put(FAILED_REASON, msg);
        return false;
    }
    return true;
}

From source file:com.ailk.oci.ocnosql.tools.load.mutiple.MutipleColumnImportTsv.java

License:Apache License

/**
 * Main entry point./*from w  w w  .  j a v a  2  s  .c o m*/
 *
 * @param args  The command line parameters.
 * @throws Exception When running the job fails.
 */
public static void main(String[] args) throws Exception {
    long inputLineNum = 0L;
    long badLineNum = 0L;
    long outputLineNum = 0L;
    Configuration conf = HBaseConfiguration.create();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        usage("Wrong number of arguments: " + otherArgs.length);
        System.exit(-1);
    }
    // Make sure columns are specified
    String columns = conf.get(CommonConstants.COLUMNS);
    if (columns == null) {
        usage("No columns specified. Please specify with -D" + CommonConstants.COLUMNS + "=...");
        System.exit(-1);
    }
    String seperator = conf.get(CommonConstants.SEPARATOR);
    if (StringUtils.isEmpty(seperator)) {
        conf.set(CommonConstants.SEPARATOR, CommonConstants.DEFAULT_SEPARATOR);
        seperator = CommonConstants.DEFAULT_SEPARATOR;
    }
    // Make sure one or more columns are specified
    if (columns.split(",").length < 2) {
        usage("One or more columns in addition to the row key are required");
        System.exit(-1);
    }
    //make sure tableName and columns are upper to used by phoenix.
    columns = columns.toUpperCase();
    String notNeedLoadColumnsStr = conf.get(CommonConstants.NOTNEEDLOADCOLUMNS);
    String notNeedLoadColumns = null;
    if (!StringUtils.isEmpty(notNeedLoadColumnsStr)) {
        notNeedLoadColumns = notNeedLoadColumnsStr.toUpperCase();
        conf.set(CommonConstants.NOTNEEDLOADCOLUMNS, notNeedLoadColumns);
    }

    String writeTableConfigColumns = getWriteConfigColumn(columns, notNeedLoadColumns);
    hbaseAdmin = new HBaseAdmin(conf);
    String tableName = otherArgs[0].toUpperCase();
    String inputPath = otherArgs[1];
    String tmpOutputPath = conf.get(CommonConstants.IMPORT_TMP_OUTPUT);
    conf.set(CommonConstants.TABLE_NAME, tableName);
    conf.set(CommonConstants.COLUMNS, columns);
    String pathStr = conf.get(CommonConstants.HDFS_URL) + inputPath;
    FileSystem fs = FileSystem.get(URI.create(conf.get(CommonConstants.HDFS_URL)), conf);
    FileStatus[] fileStatusArr = fs.listStatus(new Path(pathStr));
    if (fileStatusArr != null && fileStatusArr.length > 0) {
        TableConfiguration.getInstance().writeTableConfiguration(tableName, writeTableConfigColumns, seperator,
                conf);
        if (fileStatusArr[0].isFile()) {
            Object[] resObjs = runJob(conf, tableName, inputPath, tmpOutputPath);
            inputLineNum = (Long) resObjs[1];
            outputLineNum = (Long) resObjs[2];
            badLineNum = (Long) resObjs[3];
            LOG.info("Bulkload Result={inputLine:" + inputLineNum + ",outputLine:" + outputLineNum + ",badLine:"
                    + badLineNum + "}");
            boolean result = (Boolean) resObjs[0];
            if (result) {
                System.exit(0);
            }
            System.exit(-1);
        }
        for (FileStatus everyInputPath : fileStatusArr) {
            Path inputPathStr = everyInputPath.getPath();
            String absoluteInputPathStr = inputPath + "/" + inputPathStr.getName();
            FileStatus[] subFileStatusArr = fs
                    .listStatus(new Path(conf.get(CommonConstants.HDFS_URL) + absoluteInputPathStr));
            if (subFileStatusArr == null || subFileStatusArr.length == 0)//?job
                continue;
            Object[] resObjs = runJob(conf, tableName, absoluteInputPathStr,
                    tmpOutputPath + "/" + inputPathStr.getName());
            boolean ret = (Boolean) resObjs[0];
            if (ret) {
                inputLineNum += (Long) resObjs[1];
                outputLineNum += (Long) resObjs[2];
                badLineNum += (Long) resObjs[3];
                String seperatorStr = conf.get(CommonConstants.SEPARATOR);
                conf.set(CommonConstants.SEPARATOR, new String(Base64.decode(seperatorStr))); //?separator
                continue;
            } else { //
                LOG.error("Bulkload Result={inputLine:" + inputLineNum + ",outputLine:" + outputLineNum
                        + ",badLine:" + badLineNum + "}");
                System.exit(-1);
            }
        }
        LOG.info("Bulkload Result={inputLine:" + inputLineNum + ",outputLine:" + outputLineNum + ",badLine:"
                + badLineNum + "}");
    }
    LOG.info("Bulkload Result={inputLine:" + inputLineNum + ",outputLine:" + outputLineNum + ",badLine:"
            + badLineNum + "}");
    System.exit(0);//
}

From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImportTsv.java

License:Apache License

public boolean execute(Connection conn, OciTableRef table) {
    if (conn == null) {
        msg = "Connection object must not be null";
        retMap.put(FAILED_REASON, msg);//  w ww.jav a2 s  .com
        LOG.error(msg);
        throw new ClientRuntimeException(msg);
    }
    Configuration conf = conn.getConf();
    if (table == null) {
        msg = "table must not be null";
        retMap.put(FAILED_REASON, msg);
        LOG.error(msg);
        throw new ClientRuntimeException(msg);
    }

    String tableName = table.getName();
    String column = table.getColumns();
    String seperator = table.getSeperator();
    String inputPath = table.getInputPath();
    String tmpOutPut = table.getImportTmpOutputPath();
    String skipBadLine = table.getSkipBadLine();
    String compressor = table.getCompressor();
    String rowkeyUnique = table.getRowKeyUnique();
    String algoColumn = table.getAlgoColumn();
    String rowkeyGenerator = table.getRowkeyGenerator();
    String rowkeyColumn = table.getRowkeyColumn();
    String callback = table.getCallback();

    if (StringUtils.isEmpty(tableName)) {
        msg = "No " + CommonConstants.TABLE_NAME
                + " specified. Please check config,then try again after refreshing cache";
        retMap.put(FAILED_REASON, msg);
        LOG.error(msg);
        throw new ConfigException(msg);
    }
    conf.set(CommonConstants.TABLE_NAME, tableName);

    //      if(StringUtils.isEmpty(seperator)){
    //         msg = "No " + CommonConstants.SEPARATOR + " specified. Please check config,then try again after refreshing cache";
    //         retMap.put(FAILED_REASON, msg);
    //         LOG.error(msg);
    //         throw new ConfigException(msg);
    //      }
    //      conf.set(CommonConstants.SEPARATOR, seperator);

    if (StringUtils.isEmpty(seperator)) {
        conf.set(CommonConstants.SEPARATOR, CommonConstants.DEFAULT_SEPARATOR);
    }

    // Make sure columns are specified, splited by ","
    String columns[] = StringUtils.splitByWholeSeparatorPreserveAllTokens(column, ",");
    if (columns == null) {
        msg = "No " + CommonConstants.COLUMNS
                + " specified. Please check config,then try again after refreshing cache";
        retMap.put(FAILED_REASON, msg);
        LOG.error(msg);
        throw new ConfigException(msg);
    }
    conf.set(CommonConstants.COLUMNS, column);

    if (StringUtils.isEmpty(rowkeyColumn) && StringUtils.isEmpty(algoColumn)) {
        msg = "No " + CommonConstants.ROW_KEY
                + " rule specified. Please check config,then try again after refreshing cache";
        retMap.put(FAILED_REASON, msg);
        LOG.error(msg);
        throw new ConfigException(msg);
    }
    conf.set(CommonConstants.SEPARATOR, seperator);

    //      int rowkeysFound = 0;
    //      for (String col : columns) {
    //         if (col.equals(CommonConstants.ROW_KEY))
    //            rowkeysFound++;
    //      }
    //      //HBASE_ROW_KEY?
    //      if (rowkeysFound != 1) {
    //         msg = "Must specify exactly one column as " + CommonConstants.ROW_KEY + ". Please check config,then again after refreshing cache";
    //         retMap.put(FAILED_REASON, msg);
    //         LOG.error(msg);
    //         throw new ConfigException(msg);
    //      }

    //HBASE_ROW_KEY?column
    if (columns.length < 2) {
        msg = "One or more columns in addition to the row key are required. Please check config,then try again after refreshing cache";
        retMap.put(FAILED_REASON, msg);
        LOG.error(msg);
        throw new ConfigException(msg);
    }

    //":"
    String[] columnTmp = null;
    for (int i = 0; i < columns.length; i++) {
        columnTmp = columns[i].split(":");
        if (columnTmp != null && columnTmp.length == 2) {
            break;
        }
    }

    //???
    conf.set(CommonConstants.SINGLE_FAMILY, columnTmp[0]);

    //?
    if (!StringUtils.isEmpty(skipBadLine)) {
        conf.set(CommonConstants.SKIPBADLINE, skipBadLine);
    }
    //?
    conf.set(CommonConstants.COMPRESSOR, (compressor == null) ? DEFAULT_COMPRESSOR : compressor);
    conf.set(CommonConstants.ALGOCOLUMN, algoColumn);
    conf.set(CommonConstants.ROWKEY_GENERATOR, rowkeyGenerator);
    conf.set(CommonConstants.ROWKEYCOLUMN, rowkeyColumn);
    conf.set(CommonConstants.ROWKEYCALLBACK, callback);

    boolean ret = false;
    //      Counter failCounter = null;
    try {
        hbaseAdmin = new HBaseAdmin(conf);
        TableConfiguration.getInstance().writeTableConfiguration(tableName, column, seperator, conf);
        //         Job job = createSubmittableJob(conf, tableName, inputPath, tmpOutPut);
        //         //job
        //         ret = job.waitForCompletion(true);
        //         Counters counters = job.getCounters();
        //         for (String groupName : counters.getGroupNames()) {
        //            failCounter = counters.findCounter(groupName, "NUM_FAILED_MAPS");
        //            if(failCounter != null){
        //               break;
        //            }
        //         }
        conf.set(CommonConstants.TABLE_NAME, tableName);
        String hdfs_url = conf.get(CommonConstants.HDFS_URL);
        FileSystem fs = FileSystem.get(URI.create(hdfs_url), conf);
        FileStatus[] fileStatusArr = fs.listStatus(new Path(hdfs_url + inputPath));
        if (fileStatusArr != null && fileStatusArr.length > 0) {
            if (fileStatusArr[0].isFile()) {
                ret = runJob(conf, tableName, inputPath, tmpOutPut);
            }
            int inputPathNum = 0;
            for (FileStatus everyInputPath : fileStatusArr) {
                Path inputPathStr = everyInputPath.getPath();
                String absoluteInputPathStr = inputPath + "/" + inputPathStr.getName();
                boolean retCode = runJob(conf, tableName, absoluteInputPathStr,
                        tmpOutPut + "/" + inputPathStr.getName());
                if (retCode) {
                    String base64Seperator = conf.get(CommonConstants.SEPARATOR);
                    conf.set(CommonConstants.SEPARATOR, new String(Base64.decode(base64Seperator))); //?separator
                    if (inputPathNum == fileStatusArr.length - 1) {
                        ret = true;
                    }
                    inputPathNum++;
                    continue;
                } else { //
                    ret = false;
                    inputPathNum++;
                    break;
                }
            }
        }

    } catch (Exception e) {
        msg = "job execute failed,nested exception is " + e;
        retMap.put(FAILED_REASON, msg);
        LOG.error(msg);
        throw new ClientRuntimeException(msg);
    }

    boolean result = true;
    if (!ret) {
        msg = "execute job failed,please check map/reduce log in jobtracker page";
        retMap.put(FAILED_REASON, msg);
        result = false;
    }
    /*
    else {
     String[] params = new String[2];
     params[0] = tmpOutPut;
     params[1] = tableName;
     int retrunCode = -1;
     try {
    //bulkload complete
    retrunCode = ToolRunner.run(new LoadIncrementalHFiles(conf),
          params);
     } catch (Exception e) {
    msg = "job execute failed,nested exception is " + e;
    retMap.put(FAILED_REASON, msg);
    LOG.error(msg);
    throw new ClientRuntimeException(msg);
     }
     if(retrunCode != 0) result = false;
    }
    */
    return result;
}

From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImportTsv.java

License:Apache License

/**
 * Main entry point.//from  w  w  w  .jav  a 2  s .  co m
 *
 * @param args  The command line parameters.
 * @throws Exception When running the job fails.
 */
public static void main(String[] args) throws Exception {
    Map<String, String> map = getProperty();
    if (map == null || map.size() == 0) {
        System.err.println("Error: read conf file " + CONF_FILE + " occur error.");
        System.exit(0);
    }
    Configuration conf = Connection.getInstance().getConf();

    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        usage("Wrong number of arguments: " + otherArgs.length);
        System.exit(-1);
    }

    // Make sure columns are specified
    String columns = conf.get(CommonConstants.COLUMNS);
    if (columns == null) {
        usage("No columns specified. Please specify with -D" + CommonConstants.COLUMNS + "=...");
        System.exit(-1);
    }
    String seperator = conf.get(CommonConstants.SEPARATOR);
    if (seperator == null) {
        conf.set(CommonConstants.SEPARATOR, CommonConstants.DEFAULT_SEPARATOR);
        seperator = CommonConstants.DEFAULT_SEPARATOR;
    }
    // Make sure one or more columns are specified
    if (columns.split(",").length < 2) {
        usage("One or more columns in addition to the row key are required");
        System.exit(-1);
    }
    //make sure tableName and columns are upper to used by phoenix.
    columns = columns.toUpperCase();
    String tableName = otherArgs[0].toUpperCase();
    String inputPath = otherArgs[1];

    hbaseAdmin = new HBaseAdmin(conf);
    String tmpOutputPath = conf.get(CommonConstants.IMPORT_TMP_OUTPUT);
    conf.set(CommonConstants.TABLE_NAME, tableName);
    conf.set(CommonConstants.COLUMNS, columns);
    String hdfs_url = conf.get(CommonConstants.HDFS_URL);
    FileSystem fs = FileSystem.get(URI.create(hdfs_url), conf);
    FileStatus[] fileStatusArr = fs.listStatus(new Path(hdfs_url + inputPath));
    if (fileStatusArr != null && fileStatusArr.length > 0) {
        TableConfiguration.getInstance().writeTableConfiguration(tableName, columns, seperator, conf);
        if (fileStatusArr[0].isFile()) { //??
            boolean result = runJob(conf, tableName, inputPath, tmpOutputPath);
            if (result) {
                System.exit(0);
            }
            System.exit(-1);
        }
        for (FileStatus everyInputPath : fileStatusArr) { //??
            Path inputPathStr = everyInputPath.getPath();
            String absoluteInputPathStr = inputPath + "/" + inputPathStr.getName();
            FileStatus[] subFileStatusArr = fs.listStatus(new Path(hdfs_url + absoluteInputPathStr));
            if (subFileStatusArr == null || subFileStatusArr.length == 0)//?job
                continue;
            boolean ret = runJob(conf, tableName, absoluteInputPathStr,
                    tmpOutputPath + "/" + inputPathStr.getName());
            if (ret) {
                String base64Seperator = conf.get(CommonConstants.SEPARATOR);
                conf.set(CommonConstants.SEPARATOR, new String(Base64.decode(base64Seperator))); //?separator
                continue;
            } else //
                System.exit(-1);

        }
    }
    System.exit(0); //
}

From source file:com.alexholmes.hadooputils.test.TextIOJobBuilder.java

License:Apache License

/**
 * Called after the MapReduce job has completed, to verify that the outputs
 * generated by the MapReduce job align with the expected outputs that were
 * set with calls to {@link #addExpectedOutput(String)} and
 * {@link #addExpectedOutput(String...)}.
 *
 * @return a reference to this object// w w w  . j  a va  2s .co  m
 * @throws IOException if something goes wrong
 */
public TextIOJobBuilder verifyResults() throws IOException {

    FileStatus[] outputFiles = fs.listStatus(outputPath, new PathFilter() {
        @Override
        public boolean accept(final Path path) {
            return path.getName().startsWith("part");
        }
    });

    System.out.println("Output files: " + StringUtils.join(outputFiles));

    int i = 0;
    for (FileStatus file : outputFiles) {
        List<String> actualLines = FileUtils.readLines(fs, file.getPath());

        for (String actualLine : actualLines) {
            String expectedLine = expectedOutputs.get(i++);
            assertEquals(expectedLine, actualLine);
        }
    }

    assertEquals(expectedOutputs.size(), i);

    return this;
}

From source file:com.alexholmes.hdfsslurper.WorkerThread.java

License:Apache License

private void process(FileStatus srcFileStatus) throws IOException, InterruptedException {

    Path stagingFile = null;/* ww w.j a v  a  2 s.  c  o m*/
    FileSystem destFs = null;
    String filenameBatchidDelimiter = config.getFileNameBatchIdDelimiter();

    try {
        FileSystem srcFs = srcFileStatus.getPath().getFileSystem(config.getConfig());

        // run a script which can change the name of the file as well as
        // write out a new version of the file
        //
        if (config.getWorkScript() != null) {
            Path newSrcFile = stageSource(srcFileStatus);
            srcFileStatus = srcFileStatus.getPath().getFileSystem(config.getConfig()).getFileStatus(newSrcFile);
        }

        Path srcFile = srcFileStatus.getPath();

        // get the target HDFS file
        //
        Path destFile = getHdfsTargetPath(srcFileStatus);

        if (config.getCodec() != null) {
            String ext = config.getCodec().getDefaultExtension();
            if (!destFile.getName().endsWith(ext)) {
                destFile = new Path(destFile.toString() + ext);
            }
        }

        destFs = destFile.getFileSystem(config.getConfig());

        // get the staging HDFS file
        //
        stagingFile = fileSystemManager.getStagingFile(srcFileStatus, destFile);
        String batchId = srcFile.toString().substring(
                srcFile.toString().lastIndexOf(filenameBatchidDelimiter) + 1, srcFile.toString().length());

        log.info("event#Copying source file '" + srcFile + "' to staging destination '" + stagingFile + "'"
                + "$batchId#" + batchId);

        // if the directory of the target file doesn't exist, attempt to
        // create it
        //
        Path destParentDir = destFile.getParent();
        if (!destFs.exists(destParentDir)) {
            log.info("event#Attempting creation of target directory: " + destParentDir.toUri());
            if (!destFs.mkdirs(destParentDir)) {
                throw new IOException("event#Failed to create target directory: " + destParentDir.toUri());
            }
        }

        // if the staging directory doesn't exist, attempt to create it
        //
        Path destStagingParentDir = stagingFile.getParent();
        if (!destFs.exists(destStagingParentDir)) {
            log.info("event#Attempting creation of staging directory: " + destStagingParentDir.toUri());
            if (!destFs.mkdirs(destStagingParentDir)) {
                throw new IOException("event#Failed to create staging directory: " + destParentDir.toUri());
            }
        }

        // copy the file
        //
        InputStream is = null;
        OutputStream os = null;
        CRC32 crc = new CRC32();
        try {
            is = new BufferedInputStream(srcFs.open(srcFile));
            if (config.isVerify()) {
                is = new CheckedInputStream(is, crc);
            }
            os = destFs.create(stagingFile);

            if (config.getCodec() != null) {
                os = config.getCodec().createOutputStream(os);
            }

            IOUtils.copyBytes(is, os, 4096, false);
        } finally {
            IOUtils.closeStream(is);
            IOUtils.closeStream(os);
        }

        long srcFileSize = srcFs.getFileStatus(srcFile).getLen();
        long destFileSize = destFs.getFileStatus(stagingFile).getLen();
        if (config.getCodec() == null && srcFileSize != destFileSize) {
            throw new IOException(
                    "event#File sizes don't match, source = " + srcFileSize + ", dest = " + destFileSize);
        }

        log.info("event#Local file size = " + srcFileSize + ", HDFS file size = " + destFileSize + "$batchId#"
                + batchId);

        if (config.isVerify()) {
            verify(stagingFile, crc.getValue());
        }

        if (destFs.exists(destFile)) {
            destFs.delete(destFile, false);
        }

        log.info("event#Moving staging file '" + stagingFile + "' to destination '" + destFile + "'"
                + "$batchId#" + batchId);
        if (!destFs.rename(stagingFile, destFile)) {
            throw new IOException("event#Failed to rename file");
        }

        if (config.isCreateLzopIndex() && destFile.getName().endsWith(lzopExt)) {
            Path lzoIndexPath = new Path(destFile.toString() + LzoIndex.LZO_INDEX_SUFFIX);
            if (destFs.exists(lzoIndexPath)) {
                log.info("event#Deleting index file as it already exists");
                destFs.delete(lzoIndexPath, false);
            }
            indexer.index(destFile);
        }

        fileSystemManager.fileCopyComplete(srcFileStatus);

    } catch (Throwable t) {
        log.error("event#Caught exception working on file " + srcFileStatus.getPath(), t);

        // delete the staging file if it still exists
        //
        try {
            if (destFs != null && destFs.exists(stagingFile)) {
                destFs.delete(stagingFile, false);
            }
        } catch (Throwable t2) {
            log.error("event#Failed to delete staging file " + stagingFile, t2);
        }

        fileSystemManager.fileCopyError(srcFileStatus);
    }

}

From source file:com.alibaba.jstorm.hdfs.blobstore.HdfsBlobStoreFile.java

License:Apache License

public HdfsBlobStoreFile(Path base, String name, Configuration hconf) {
    if (BLOBSTORE_DATA_FILE.equals(name)) {
        _isTmp = false;/*from   ww  w . j  a v a  2  s .  co m*/
    } else {
        Matcher m = TMP_NAME_PATTERN.matcher(name);
        if (!m.matches()) {
            throw new IllegalArgumentException(
                    "File name does not match '" + name + "' !~ " + TMP_NAME_PATTERN);
        }
        _isTmp = true;
    }
    _hadoopConf = hconf;
    _key = base.getName();
    _path = new Path(base, name);
    _mustBeNew = false;
    try {
        _fs = _path.getFileSystem(_hadoopConf);
    } catch (IOException e) {
        throw new RuntimeException("Error getting filesystem for path: " + _path, e);
    }
}

From source file:com.alibaba.jstorm.hdfs.blobstore.HdfsBlobStoreFile.java

License:Apache License

public HdfsBlobStoreFile(Path base, boolean isTmp, boolean mustBeNew, Configuration hconf) {
    _key = base.getName();
    _hadoopConf = hconf;//  w  w w.  ja v a 2 s.  co  m
    _isTmp = isTmp;
    _mustBeNew = mustBeNew;
    if (_isTmp) {
        _path = new Path(base, System.currentTimeMillis() + TMP_EXT);
    } else {
        _path = new Path(base, BLOBSTORE_DATA_FILE);
    }
    try {
        _fs = _path.getFileSystem(_hadoopConf);
    } catch (IOException e) {
        throw new RuntimeException("Error getting filesystem for path: " + _path, e);
    }
}