Example usage for org.apache.hadoop.mapred JobConf get

List of usage examples for org.apache.hadoop.mapred JobConf get

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf get.

Prototype

public String get(String name, String defaultValue) 

Source Link

Document

Get the value of the name.

Usage

From source file:edu.stolaf.cs.wmrserver.streaming.StreamUtil.java

License:Apache License

public static boolean isLocalJobTracker(JobConf job) {
    return job.get("mapred.job.tracker", "local").equals("local");
}

From source file:edu.uci.ics.fuzzyjoin.hadoop.FuzzyJoinDriver.java

License:Apache License

public static void run(JobConf job) throws IOException {
    job.setJarByClass(FuzzyJoinDriver.class);
    ///*from   w  ww.ja  va2  s .  c o m*/
    // print info
    //
    String ret = "FuzzyJoinDriver(" + job.getJobName() + ")\n" + "  Input Path:  {";
    Path inputs[] = FileInputFormat.getInputPaths(job);
    for (int ctr = 0; ctr < inputs.length; ctr++) {
        if (ctr > 0) {
            ret += "\n                ";
        }
        ret += inputs[ctr].toString();
    }
    ret += "}\n";
    ret += "  Output Path: " + FileOutputFormat.getOutputPath(job) + "\n" + "  Map Jobs:    "
            + job.getNumMapTasks() + "\n" + "  Reduce Jobs: " + job.getNumReduceTasks() + "\n"
            + "  Properties:  {";
    String[][] properties = new String[][] {
            new String[] { FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY, FuzzyJoinConfig.SIMILARITY_NAME_VALUE },
            new String[] { FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY,
                    "" + FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE },
            new String[] { FuzzyJoinConfig.TOKENIZER_PROPERTY, FuzzyJoinConfig.TOKENIZER_VALUE },
            new String[] { TOKENS_PACKAGE_PROPERTY, TOKENS_PACKAGE_VALUE },
            new String[] { TOKENS_LENGTHSTATS_PROPERTY, "" + TOKENS_LENGTHSTATS_VALUE },
            new String[] { RIDPAIRS_GROUP_CLASS_PROPERTY, RIDPAIRS_GROUP_CLASS_VALUE },
            new String[] { RIDPAIRS_GROUP_FACTOR_PROPERTY, "" + RIDPAIRS_GROUP_FACTOR_VALUE },
            new String[] { FuzzyJoinConfig.DATA_TOKENS_PROPERTY, "" },
            new String[] { DATA_JOININDEX_PROPERTY, "" }, };
    for (int crt = 0; crt < properties.length; crt++) {
        if (crt > 0) {
            ret += "\n                ";
        }
        ret += properties[crt][0] + "=" + job.get(properties[crt][0], properties[crt][1]);
    }
    ret += "}";
    System.out.println(ret);
    //
    // run job
    //
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    JobClient.runJob(job);
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println(
            "The job took " + (end_time.getTime() - startTime.getTime()) / (float) 1000.0 + " seconds.");
}

From source file:edu.uci.ics.fuzzyjoin.hadoop.recordpairs.MapBasicJoin.java

License:Apache License

@Override
public void configure(JobConf job) {
    ///*from  ww  w  . ja va 2  s .  co  m*/
    // get suffix for second relation
    //
    suffixSecond = job.get(FuzzyJoinDriver.DATA_SUFFIX_INPUT_PROPERTY, "")
            .split(FuzzyJoinDriver.SEPSARATOR_REGEX)[1];
}

From source file:edu.uci.ics.fuzzyjoin.hadoop.recordpairs.Reduce.java

License:Apache License

@Override
public void configure(JobConf job) {
    isSelfJoin = "".equals(job.get(FuzzyJoinDriver.DATA_SUFFIX_INPUT_PROPERTY, ""));
}

From source file:edu.uci.ics.fuzzyjoin.hadoop.ridpairs.ppjoin.MapJoin.java

License:Apache License

@Override
public void configure(JobConf job) {
    ////from   ww w .j a  v a2  s . com
    // set Tokenizer and SimilarityFilters
    //
    tokenizer = TokenizerFactory.getTokenizer(
            job.get(FuzzyJoinConfig.TOKENIZER_PROPERTY, FuzzyJoinConfig.TOKENIZER_VALUE),
            FuzzyJoinConfig.WORD_SEPARATOR_REGEX, FuzzyJoinConfig.TOKEN_SEPARATOR);
    String similarityName = job.get(FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_NAME_VALUE);
    float similarityThreshold = job.getFloat(FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE);
    similarityFilters = SimilarityFiltersFactory.getSimilarityFilters(similarityName, similarityThreshold);
    //
    // set TokenRank and TokenGroup
    //
    Path tokensPath;
    Path lengthstatsPath = null;
    try {
        Path[] cache = DistributedCache.getLocalCacheFiles(job);
        if (cache == null) {
            tokensPath = new Path(job.get(FuzzyJoinConfig.DATA_TOKENS_PROPERTY));
            try {
                lengthstatsPath = new Path(job.get(FuzzyJoinDriver.DATA_LENGTHSTATS_PROPERTY));
            } catch (IllegalArgumentException e) {
            }
        } else {
            tokensPath = cache[0];
            if (job.getBoolean(FuzzyJoinDriver.TOKENS_LENGTHSTATS_PROPERTY,
                    FuzzyJoinDriver.TOKENS_LENGTHSTATS_VALUE)) {
                lengthstatsPath = cache[1];
            }
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    String recordGroupClass = job.get(FuzzyJoinDriver.RIDPAIRS_GROUP_CLASS_PROPERTY,
            FuzzyJoinDriver.RIDPAIRS_GROUP_CLASS_VALUE);
    int recordGroupFactor = job.getInt(FuzzyJoinDriver.RIDPAIRS_GROUP_FACTOR_PROPERTY,
            FuzzyJoinDriver.RIDPAIRS_GROUP_FACTOR_VALUE);
    recordGroup = RecordGroupFactory.getRecordGroup(recordGroupClass,
            Math.max(1, job.getNumReduceTasks() * recordGroupFactor), similarityFilters, "" + lengthstatsPath);
    TokenLoad tokenLoad = new TokenLoad(tokensPath.toString(), tokenRank);
    tokenLoad.loadTokenRank();
    //
    // set dataColumn
    //
    dataColumns[0] = FuzzyJoinUtil
            .getDataColumns(job.get(FuzzyJoinConfig.RECORD_DATA_PROPERTY, FuzzyJoinConfig.RECORD_DATA_VALUE));
    dataColumns[1] = FuzzyJoinUtil
            .getDataColumns(job.get(FuzzyJoinConfig.RECORD_DATA1_PROPERTY, FuzzyJoinConfig.RECORD_DATA1_VALUE));
    //
    // get suffix for second relation
    //
    suffixSecond = job.get(FuzzyJoinDriver.DATA_SUFFIX_INPUT_PROPERTY, "")
            .split(FuzzyJoinDriver.SEPSARATOR_REGEX)[1];
}

From source file:edu.uci.ics.fuzzyjoin.hadoop.ridpairs.ppjoin.MapSelfJoin.java

License:Apache License

@Override
public void configure(JobConf job) {
    ///*ww w . j a  v a2  s.  co  m*/
    // set Tokenizer and SimilarityFilters
    //
    tokenizer = TokenizerFactory.getTokenizer(
            job.get(FuzzyJoinConfig.TOKENIZER_PROPERTY, FuzzyJoinConfig.TOKENIZER_VALUE),
            FuzzyJoinConfig.WORD_SEPARATOR_REGEX, FuzzyJoinConfig.TOKEN_SEPARATOR);
    String similarityName = job.get(FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_NAME_VALUE);
    float similarityThreshold = job.getFloat(FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE);
    similarityFilters = SimilarityFiltersFactory.getSimilarityFilters(similarityName, similarityThreshold);
    //
    // set TokenRank and TokenGroup
    //
    Path tokensPath;
    Path lengthstatsPath = null;
    try {
        Path[] cache = DistributedCache.getLocalCacheFiles(job);
        if (cache == null) {
            tokensPath = new Path(job.get(FuzzyJoinConfig.DATA_TOKENS_PROPERTY));
            try {
                lengthstatsPath = new Path(job.get(FuzzyJoinDriver.DATA_LENGTHSTATS_PROPERTY));
            } catch (IllegalArgumentException e) {
            }
        } else {
            tokensPath = cache[0];
            if (job.getBoolean(FuzzyJoinDriver.TOKENS_LENGTHSTATS_PROPERTY,
                    FuzzyJoinDriver.TOKENS_LENGTHSTATS_VALUE)) {
                lengthstatsPath = cache[1];
            }
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    String recordGroupClass = job.get(FuzzyJoinDriver.RIDPAIRS_GROUP_CLASS_PROPERTY,
            FuzzyJoinDriver.RIDPAIRS_GROUP_CLASS_VALUE);
    int recordGroupFactor = job.getInt(FuzzyJoinDriver.RIDPAIRS_GROUP_FACTOR_PROPERTY,
            FuzzyJoinDriver.RIDPAIRS_GROUP_FACTOR_VALUE);
    recordGroup = RecordGroupFactory.getRecordGroup(recordGroupClass,
            Math.max(1, job.getNumReduceTasks() * recordGroupFactor), similarityFilters, "" + lengthstatsPath);
    TokenLoad tokenLoad = new TokenLoad(tokensPath.toString(), tokenRank);
    tokenLoad.loadTokenRank();
    //
    // set dataColumn
    //
    dataColumns = FuzzyJoinUtil
            .getDataColumns(job.get(FuzzyJoinConfig.RECORD_DATA_PROPERTY, FuzzyJoinConfig.RECORD_DATA_VALUE));
}

From source file:edu.uci.ics.fuzzyjoin.hadoop.ridpairs.token.MapJoin.java

License:Apache License

@Override
public void configure(JobConf job) {
    ///*from w w w  . j a v a2 s.c o  m*/
    // set Tokenizer and SimilarityFilters
    //
    tokenizer = TokenizerFactory.getTokenizer(
            job.get(FuzzyJoinConfig.TOKENIZER_PROPERTY, FuzzyJoinConfig.TOKENIZER_VALUE),
            FuzzyJoinConfig.WORD_SEPARATOR_REGEX, FuzzyJoinConfig.TOKEN_SEPARATOR);
    String similarityName = job.get(FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_NAME_VALUE);
    float similarityThreshold = job.getFloat(FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE);
    similarityFilters = SimilarityFiltersFactory.getSimilarityFilters(similarityName, similarityThreshold);
    //
    // set TokenRank
    //
    Path tokensPath;
    try {
        Path[] cache = DistributedCache.getLocalCacheFiles(job);
        if (cache == null) {
            tokensPath = new Path(job.get(FuzzyJoinConfig.DATA_TOKENS_PROPERTY));
        } else {
            tokensPath = cache[0];
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    TokenLoad tokenLoad = new TokenLoad(tokensPath.toString(), tokenRank);
    tokenLoad.loadTokenRank();
    //
    // set dataColumn
    //
    dataColumns[0] = FuzzyJoinUtil
            .getDataColumns(job.get(FuzzyJoinConfig.RECORD_DATA_PROPERTY, FuzzyJoinConfig.RECORD_DATA_VALUE));
    dataColumns[1] = FuzzyJoinUtil
            .getDataColumns(job.get(FuzzyJoinConfig.RECORD_DATA1_PROPERTY, FuzzyJoinConfig.RECORD_DATA1_VALUE));
    //
    // get suffix for second relation
    //
    suffixSecond = job.get(FuzzyJoinDriver.DATA_SUFFIX_INPUT_PROPERTY, "")
            .split(FuzzyJoinDriver.SEPSARATOR_REGEX)[1];
}

From source file:edu.uci.ics.fuzzyjoin.hadoop.ridpairs.token.MapSelfJoin.java

License:Apache License

@Override
public void configure(JobConf job) {
    ////w  ww. j a v  a2s  . c  om
    // set Tokenizer and SimilarityFilters
    //
    tokenizer = TokenizerFactory.getTokenizer(
            job.get(FuzzyJoinConfig.TOKENIZER_PROPERTY, FuzzyJoinConfig.TOKENIZER_VALUE),
            FuzzyJoinConfig.WORD_SEPARATOR_REGEX, FuzzyJoinConfig.TOKEN_SEPARATOR);
    String similarityName = job.get(FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_NAME_VALUE);
    float similarityThreshold = job.getFloat(FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE);
    similarityFilters = SimilarityFiltersFactory.getSimilarityFilters(similarityName, similarityThreshold);
    //
    // set TokenRank
    //
    Path tokensPath;
    try {
        Path[] cache = DistributedCache.getLocalCacheFiles(job);
        if (cache == null) {
            tokensPath = new Path(job.get(FuzzyJoinConfig.DATA_TOKENS_PROPERTY));
        } else {
            tokensPath = cache[0];
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    TokenLoad tokenLoad = new TokenLoad(tokensPath.toString(), tokenRank);
    tokenLoad.loadTokenRank();
    //
    // set dataColumn
    //
    dataColumns = FuzzyJoinUtil
            .getDataColumns(job.get(FuzzyJoinConfig.RECORD_DATA_PROPERTY, FuzzyJoinConfig.RECORD_DATA_VALUE));
}

From source file:edu.uci.ics.fuzzyjoin.hadoop.ridpairs.token.ReduceVerifyListJoin.java

License:Apache License

@Override
public void configure(JobConf job) {
    ////  w  ww .j  a  v a2s.co  m
    // set SimilarityFilters
    //
    String similarityName = job.get(FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_NAME_VALUE);
    similarityThreshold = job.getFloat(FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE);
    similarityFilters = SimilarityFiltersFactory.getSimilarityFilters(similarityName, similarityThreshold);
    // similarityMetric = SimilarityMetricFactory
    // .getSimilarityMetric(similarityName);
}

From source file:edu.uci.ics.fuzzyjoin.hadoop.ridrecordpairs.ppjoin.MapJoin.java

License:Apache License

@Override
public void configure(JobConf job) {
    ////from w w  w  .jav  a2  s  .c  o m
    // set Tokenizer and SimilarityFilters
    //
    tokenizer = TokenizerFactory.getTokenizer(
            job.get(FuzzyJoinConfig.TOKENIZER_PROPERTY, FuzzyJoinConfig.TOKENIZER_VALUE),
            FuzzyJoinConfig.WORD_SEPARATOR_REGEX, FuzzyJoinConfig.TOKEN_SEPARATOR);
    String similarityName = job.get(FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_NAME_VALUE);
    float similarityThreshold = job.getFloat(FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE);
    similarityFilters = SimilarityFiltersFactory.getSimilarityFilters(similarityName, similarityThreshold);
    //
    // set TokenRank and TokenGroup
    //
    Path tokensPath;
    Path lengthstatsPath = null;
    try {
        Path[] cache = DistributedCache.getLocalCacheFiles(job);
        if (cache == null) {
            tokensPath = new Path(job.get(FuzzyJoinConfig.DATA_TOKENS_PROPERTY));
            try {
                lengthstatsPath = new Path(job.get(FuzzyJoinDriver.DATA_LENGTHSTATS_PROPERTY));
            } catch (IllegalArgumentException e) {
            }
        } else {
            tokensPath = cache[0];
            if (job.getBoolean(FuzzyJoinDriver.TOKENS_LENGTHSTATS_PROPERTY,
                    FuzzyJoinDriver.TOKENS_LENGTHSTATS_VALUE)) {
                lengthstatsPath = cache[1];
            }
        }
    } catch (IOException e) {
        throw new RuntimeException(e);

    }
    String recordGroupClass = job.get(FuzzyJoinDriver.RIDPAIRS_GROUP_CLASS_PROPERTY,
            FuzzyJoinDriver.RIDPAIRS_GROUP_CLASS_VALUE);
    int recordGroupFactor = job.getInt(FuzzyJoinDriver.RIDPAIRS_GROUP_FACTOR_PROPERTY,
            FuzzyJoinDriver.RIDPAIRS_GROUP_FACTOR_VALUE);
    recordGroup = RecordGroupFactory.getRecordGroup(recordGroupClass,
            Math.max(1, job.getNumReduceTasks() * recordGroupFactor), similarityFilters, "" + lengthstatsPath);
    TokenLoad tokenLoad = new TokenLoad(tokensPath.toString(), tokenRank);
    tokenLoad.loadTokenRank();
    //
    // set dataColumn
    //
    dataColumns[0] = FuzzyJoinUtil
            .getDataColumns(job.get(FuzzyJoinConfig.RECORD_DATA_PROPERTY, FuzzyJoinConfig.RECORD_DATA_VALUE));
    dataColumns[1] = FuzzyJoinUtil
            .getDataColumns(job.get(FuzzyJoinConfig.RECORD_DATA1_PROPERTY, FuzzyJoinConfig.RECORD_DATA1_VALUE));
    //
    // get suffix for second relation
    //
    suffixSecond = job.get(FuzzyJoinDriver.DATA_SUFFIX_INPUT_PROPERTY, "")
            .split(FuzzyJoinDriver.SEPSARATOR_REGEX)[1];
}