Example usage for org.apache.hadoop.conf Configuration getStrings

List of usage examples for org.apache.hadoop.conf Configuration getStrings

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getStrings.

Prototype

public String[] getStrings(String name) 

Source Link

Document

Get the comma delimited values of the name property as an array of Strings.

Usage

From source file:ImportTsv.java

License:Apache License

/**
 * Sets up the actual job.//w ww  .  j  av  a2 s.  c o m
 *
 * @param conf  The current configuration.
 * @param args  The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args)
        throws IOException, ClassNotFoundException {
    Job job = null;
    try (Connection connection = ConnectionFactory.createConnection(conf)) {
        try (Admin admin = connection.getAdmin()) {
            // Support non-XML supported characters
            // by re-encoding the passed separator as a Base64 string.
            String actualSeparator = conf.get(SEPARATOR_CONF_KEY);
            if (actualSeparator != null) {
                conf.set(SEPARATOR_CONF_KEY, Base64.encodeBytes(actualSeparator.getBytes()));
            }

            // See if a non-default Mapper was set
            String mapperClassName = conf.get(MAPPER_CONF_KEY);
            Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER;

            TableName tableName = TableName.valueOf(args[0]);
            Path inputDir = new Path(args[1]);

            // set filter
            conf.set(EASTCOM_FILTER_PARAMS, args[3]);
            conf.set(EASTCOM_FILTER_DEFINE, args[4]);

            String jobName = conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName.getNameAsString());
            job = Job.getInstance(conf, jobName);
            job.setJarByClass(mapperClass);
            FileInputFormat.setInputPaths(job, inputDir);
            job.setInputFormatClass(TextInputFormat.class);
            job.setMapperClass(mapperClass);
            String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
            String columns[] = conf.getStrings(COLUMNS_CONF_KEY);
            if (StringUtils.isNotEmpty(conf.get(CREDENTIALS_LOCATION))) {
                String fileLoc = conf.get(CREDENTIALS_LOCATION);
                Credentials cred = Credentials.readTokenStorageFile(new File(fileLoc), conf);
                job.getCredentials().addAll(cred);
            }

            if (hfileOutPath != null) {
                if (!admin.tableExists(tableName)) {
                    String errorMsg = format("Table '%s' does not exist.", tableName);
                    if ("yes".equalsIgnoreCase(conf.get(CREATE_TABLE_CONF_KEY, "yes"))) {
                        LOG.warn(errorMsg);
                        // TODO: this is backwards. Instead of depending on the existence of a table,
                        // create a sane splits file for HFileOutputFormat based on data sampling.
                        createTable(admin, tableName, columns);
                    } else {
                        LOG.error(errorMsg);
                        throw new TableNotFoundException(errorMsg);
                    }
                }
                try (HTable table = (HTable) connection.getTable(tableName)) {
                    boolean noStrict = conf.getBoolean(NO_STRICT_COL_FAMILY, false);
                    // if no.strict is false then check column family
                    if (!noStrict) {
                        ArrayList<String> unmatchedFamilies = new ArrayList<String>();
                        Set<String> cfSet = getColumnFamilies(columns);
                        HTableDescriptor tDesc = table.getTableDescriptor();
                        for (String cf : cfSet) {
                            if (tDesc.getFamily(Bytes.toBytes(cf)) == null) {
                                unmatchedFamilies.add(cf);
                            }
                        }
                        if (unmatchedFamilies.size() > 0) {
                            ArrayList<String> familyNames = new ArrayList<String>();
                            for (HColumnDescriptor family : table.getTableDescriptor().getFamilies()) {
                                familyNames.add(family.getNameAsString());
                            }
                            String msg = "Column Families " + unmatchedFamilies + " specified in "
                                    + COLUMNS_CONF_KEY + " does not match with any of the table " + tableName
                                    + " column families " + familyNames + ".\n"
                                    + "To disable column family check, use -D" + NO_STRICT_COL_FAMILY
                                    + "=true.\n";
                            usage(msg);
                            System.exit(-1);
                        }
                    }
                    job.setReducerClass(PutSortReducer.class);
                    Path outputDir = new Path(hfileOutPath);
                    FileOutputFormat.setOutputPath(job, outputDir);
                    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
                    if (mapperClass.equals(TsvImporterTextMapper.class)) {
                        job.setMapOutputValueClass(Text.class);
                        job.setReducerClass(TextSortReducer.class);
                    } else {
                        job.setMapOutputValueClass(Put.class);
                        job.setCombinerClass(PutCombiner.class);
                    }
                    HFileOutputFormat2.configureIncrementalLoad(job, table, table);
                }
            } else {
                if (!admin.tableExists(tableName)) {
                    String errorMsg = format("Table '%s' does not exist.", tableName);
                    LOG.error(errorMsg);
                    throw new TableNotFoundException(errorMsg);
                }
                if (mapperClass.equals(TsvImporterTextMapper.class)) {
                    usage(TsvImporterTextMapper.class.toString()
                            + " should not be used for non bulkloading case. use "
                            + TsvImporterMapper.class.toString()
                            + " or custom mapper whose value type is Put.");
                    System.exit(-1);
                }
                // No reducers. Just write straight to table. Call initTableReducerJob
                // to set up the TableOutputFormat.
                TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job);
                job.setNumReduceTasks(0);
            }

            TableMapReduceUtil.addDependencyJars(job);
            TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
                    com.google.common.base.Function.class /* Guava used by TsvParser */);
        }
    }
    return job;
}

From source file:backup.namenode.NameNodeBackupBlockCheckProcessor.java

License:Apache License

public NameNodeBackupBlockCheckProcessor(Configuration conf, NameNodeRestoreProcessor processor,
        NameNode namenode, UserGroupInformation ugi) throws Exception {
    String[] nnStorageLocations = conf.getStrings(DFS_NAMENODE_NAME_DIR);
    URI uri = new URI(nnStorageLocations[0]);
    _reportPath = new File(new File(uri.getPath()).getParent(), "backup-reports");
    _reportPath.mkdirs();/*from w ww  .  j  a va  2  s.  c o m*/
    if (!_reportPath.exists()) {
        throw new IOException("Report path " + _reportPath + " does not exist");
    }

    this.ugi = ugi;
    this.namenode = namenode;
    this.conf = conf;
    this.processor = processor;
    backupStore = BackupStore.create(BackupUtil.convert(conf));
    this.fileSystem = (DistributedFileSystem) FileSystem.get(conf);
    this.ignorePath = conf.get(DFS_BACKUP_IGNORE_PATH_FILE_KEY, DFS_BACKUP_IGNORE_PATH_FILE_DEFAULT);
    this.batchSize = conf.getInt(DFS_BACKUP_REMOTE_BACKUP_BATCH_KEY, DFS_BACKUP_REMOTE_BACKUP_BATCH_DEFAULT);
    this.checkInterval = conf.getLong(DFS_BACKUP_NAMENODE_BLOCK_CHECK_INTERVAL_KEY,
            DFS_BACKUP_NAMENODE_BLOCK_CHECK_INTERVAL_DEFAULT);
    this.initInterval = conf.getLong(DFS_BACKUP_NAMENODE_BLOCK_CHECK_INTERVAL_DELAY_KEY,
            DFS_BACKUP_NAMENODE_BLOCK_CHECK_INTERVAL_DELAY_DEFAULT);
    start();
}

From source file:bulkload.ImportTsv.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        usage("Wrong number of arguments: " + args.length);
        return -1;
    }// w  w  w.  j a va  2 s . c  o  m
    setConf(HBaseConfiguration.create(getConf()));
    Configuration conf = getConf();
    // Make sure columns are specified
    String columns[] = conf.getStrings(COLUMNS_CONF_KEY);
    if (columns == null) {
        usage("No columns specified. Please specify with -D" + COLUMNS_CONF_KEY + "=...");
        return -1;
    }
    // Make sure rowkey is specified
    String rowkey = conf.get(ROWKEY_CONF_KEY);
    if (StringUtil.isEmpty(rowkey)) {
        usage("No rowkey specified or rowkey is empty. Please specify with -D" + ROWKEY_CONF_KEY + "=...");
        return -1;
    }
    // Make sure rowkey handler is specified
    String rowKeyGenerator = conf.get(ROWKEY_GENERATOR_CONF_KEY);
    if (StringUtil.isEmpty(rowKeyGenerator)) {
        usage("No rowkey_handler specified or rowkey generator is empty. Please specify with -D"
                + ROWKEY_GENERATOR_CONF_KEY + "=...");
        return -1;
    }
    // Make sure they specify exactly one column as the row key
    int rowkeysFound = 0;
    for (String col : columns) {
        String[] parts = col.split(":", 3);
        if (parts.length > 1 && rowkey.equals(parts[1])) {
            rowkeysFound++;
        }
    }
    if (rowkeysFound != 1) {
        usage("Must specify exactly one column as " + rowkey);
        return -1;
    }
    // Make sure at least one columns are specified
    if (columns.length < 1) {
        usage("One or more columns in addition to the row key are required");
        System.exit(-1);
    }

    Job job = createSubmittableJob(conf, args);
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.ailk.oci.ocnosql.tools.load.mutiple.MutipleColumnImportTsv.java

License:Apache License

private static void createTable(Configuration conf, String tableName) throws IOException {
    HTableDescriptor htd = new HTableDescriptor(tableName.getBytes());
    String columns[] = conf.getStrings(CommonConstants.COLUMNS);
    Set<String> cfSet = new HashSet<String>();
    for (String aColumn : columns) {
        // if (TsvParser.ROWKEY_COLUMN_SPEC.equals(aColumn)) continue;
        // we are only concerned with the first one (in case this is a cf:cq)
        cfSet.add(aColumn.split(":", 2)[0]);
    }//from  w  ww  . j  a v  a2  s.co  m
    for (String cf : cfSet) {
        HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toBytes(cf));
        htd.addFamily(hcd);
    }
    hbaseAdmin.createTable(htd);
}

From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImportTsv.java

License:Apache License

private static void createTable(Configuration conf, String tableName) throws IOException {
    HTableDescriptor htd = new HTableDescriptor(tableName.getBytes());
    String columns[] = conf.getStrings(CommonConstants.COLUMNS);
    Set<String> cfSet = new HashSet<String>();
    //columns?// w  w  w  .  j av  a 2 s .  co m
    for (String aColumn : columns) {
        //if (TsvParser.ROWKEY_COLUMN_SPEC.equals(aColumn)) continue;
        // we are only concerned with the first one (in case this is a cf:cq)
        cfSet.add(aColumn.split(":", 2)[0]);
    }
    for (String cf : cfSet) {
        HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toBytes(cf));
        htd.addFamily(hcd);
    }
    hbaseAdmin.createTable(htd);
}

From source file:com.asakusafw.runtime.stage.resource.StageResourceDriver.java

License:Apache License

private static ArrayList<String> restoreStrings(Configuration conf, String key) {
    assert conf != null;
    assert key != null;
    ArrayList<String> results = new ArrayList<>();
    String[] old = conf.getStrings(key);
    if (old != null && old.length >= 1) {
        Collections.addAll(results, old);
    }// w  w  w. j a v a2 s  .  c  om
    return results;
}

From source file:com.asp.tranlog.ImportTsv.java

License:Apache License

/**
 * Main entry point.//  w  w  w .  j  a v  a 2 s  .c  o m
 * 
 * @param args
 *            The command line parameters.
 * @throws Exception
 *             When running the job fails.
 */
public static void main(String[] args) throws Exception {
    System.out.println("==============================================");
    Configuration conf = HBaseConfiguration.create();

    LOG.error(PRE + "conf.toString() == " + conf.toString());

    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        usage("Wrong number of arguments: " + otherArgs.length);
        System.exit(-1);
    }

    String columns[] = conf.getStrings(COLUMNS_CONF_KEY);

    if (columns == null) {
        usage("No columns specified. Please specify with -D" + COLUMNS_CONF_KEY + "=...");
        System.exit(-1);
    }

    // Make sure one or more columns are specified
    if (columns.length < 2) {
        usage("One or more columns in addition to the row key are required");
        System.exit(-1);
    }
    columns = conf.getStrings(COLUMNS_CONF_KEY);
    if (columns == null) {
        usage("One or more key columns are required");
        System.exit(-1);
    }

    Job job = createSubmittableJob(conf, otherArgs);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.asp.tranlog.TsvImporterMapper.java

License:Apache License

/**
 * Handles initializing this class with objects specific to it (i.e., the
 * parser). Common initialization that might be leveraged by a subsclass is
 * done in <code>doSetup</code>. Hence a subclass may choose to override
 * this method and call <code>doSetup</code> as well before handling it's
 * own custom params./*from w  w  w . j a  v a2s .co m*/
 * 
 * @param context
 */
@Override
protected void setup(Context context) {
    doSetup(context);

    Configuration conf = context.getConfiguration();

    charset = conf.get(ImportTsv.CHARSET_CONF_KEY);

    parser = new ImportTsv.TsvParser(conf.get(ImportTsv.COLUMNS_CONF_KEY),
            conf.getStrings(ImportTsv.KEYCOLUMNS_CONF_KEY), separator);
    keyColIndex = parser.getRowKeyColumnIndex();
    keyColLen = parser.getRowKeyColumnLen();
    if (keyColIndex == null) {
        throw new RuntimeException("No row key column specified");
    }
    columnTypes = parser.getColType();
    if (columnTypes != null) {
        colDatetimeFormater = new char[columnTypes.length];
        for (int i = 0; i < columnTypes.length; i++)
            colDatetimeFormater[i] = 0;
    }
}

From source file:com.atlantbh.nutch.filter.xpath.DOMContentUtils.java

License:Apache License

public void setConf(Configuration conf) {
    // forceTags is used to override configurable tag ignoring, later on
    Collection<String> forceTags = new ArrayList<String>(1);

    this.conf = conf;
    linkParams.clear();//from   ww  w. j  a v  a  2 s. com
    linkParams.put("a", new LinkParams("a", "href", 1));
    linkParams.put("area", new LinkParams("area", "href", 0));
    if (conf.getBoolean("parser.html.form.use_action", true)) {
        linkParams.put("form", new LinkParams("form", "action", 1));
        if (conf.get("parser.html.form.use_action") != null)
            forceTags.add("form");
    }
    linkParams.put("frame", new LinkParams("frame", "src", 0));
    linkParams.put("iframe", new LinkParams("iframe", "src", 0));
    linkParams.put("script", new LinkParams("script", "src", 0));
    linkParams.put("link", new LinkParams("link", "href", 0));
    linkParams.put("img", new LinkParams("img", "src", 0));

    // remove unwanted link tags from the linkParams map
    String[] ignoreTags = conf.getStrings("parser.html.outlinks.ignore_tags");
    for (int i = 0; ignoreTags != null && i < ignoreTags.length; i++) {
        if (!forceTags.contains(ignoreTags[i]))
            linkParams.remove(ignoreTags[i]);
    }
}

From source file:com.bah.culvert.hive.CulvertHiveUtils.java

License:Apache License

/**
 * Get the hive column names in the conf. Corresponds 1-1 to the mappings.
 * /*from  ww  w  .j av  a 2 s. com*/
 * @param conf The configuration to get the the names out of.
 * @return The hive column names.
 */
public static String[] getHiveColumnNamesFromConf(Configuration conf) {
    return conf.getStrings(CULVERT_HIVE_COLUMN_NAMES_CONF_KEY);
}