Example usage for org.apache.hadoop.mapred JobConf getFloat

List of usage examples for org.apache.hadoop.mapred JobConf getFloat

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getFloat.

Prototype

public float getFloat(String name, float defaultValue) 

Source Link

Document

Get the value of the name property as a float.

Usage

From source file:StreamWikiDumpInputFormat.java

License:Apache License

public List<InputSplit> getSplits(JobConf job, FileStatus file, String pattern, long splitSize)
        throws IOException {
    NetworkTopology clusterMap = new NetworkTopology();
    List<InputSplit> splits = new ArrayList<InputSplit>();
    Path path = file.getPath();/* w  w  w  . j av a  2  s .  com*/
    long length = file.getLen();
    FileSystem fs = file.getPath().getFileSystem(job);
    BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
    if ((length != 0) && isSplitable(fs, path)) {

        long bytesRemaining = length;
        SeekableInputStream in = SeekableInputStream.getInstance(path, 0, length, fs, this.compressionCodecs);
        InputStream is = null;
        long start = 0;
        long skip = 0;
        if (is != null) {
            // start = is.getAdjustedStart();
            // length = is.getAdjustedEnd();
            is.close();
            in = null;
        }
        LOG.info("locations=" + Arrays.asList(blkLocations));
        FileSplit split = null;
        Set<Long> processedPageEnds = new HashSet<Long>();
        float factor = job.getFloat(KEY_SKIP_FACTOR, 1.2F);

        READLOOP: while (((double) bytesRemaining) / splitSize > factor && bytesRemaining > 0) {
            // prepare matcher
            ByteMatcher matcher;
            {
                long st = Math.min(start + skip + splitSize, length - 1);
                split = makeSplit(path, st, Math.min(splitSize, length - st), clusterMap, blkLocations);
                System.err.println("split move to: " + split);
                if (in != null)
                    in.close();
                if (split.getLength() <= 1) {
                    break;
                }
                in = SeekableInputStream.getInstance(split, fs, this.compressionCodecs);
                // SplitCompressionInputStream cin =
                // in.getSplitCompressionInputStream();
            }
            matcher = new ByteMatcher(in);

            // read until the next page end in the look-ahead split
            boolean reach = false;
            while (!matcher.readUntilMatch(pageEndPattern, null, split.getStart() + split.getLength())) {
                if (matcher.getPos() >= length || split.getLength() == length - split.getStart())
                    break READLOOP;
                reach = false;
                split = makeSplit(path, split.getStart(),
                        Math.min(split.getLength() + splitSize, length - split.getStart()), clusterMap,
                        blkLocations);
                System.err.println("split extend to: " + split);
            }
            System.err.println(
                    path + ": #" + splits.size() + " " + pageEndPattern + " found: pos=" + matcher.getPos()
                            + " last=" + matcher.getLastUnmatchPos() + " read=" + matcher.getReadBytes()
                            + " current=" + start + " remaining=" + bytesRemaining + " split=" + split);
            if (matcher.getLastUnmatchPos() > 0 && matcher.getPos() > matcher.getLastUnmatchPos()
                    && !processedPageEnds.contains(matcher.getPos())) {
                splits.add(makeSplit(path, start, matcher.getPos() - start, clusterMap, blkLocations));
                processedPageEnds.add(matcher.getPos());
                long newstart = Math.max(matcher.getLastUnmatchPos(), start);
                bytesRemaining = length - newstart;
                start = newstart;
                skip = 0;
            } else {
                skip = matcher.getPos() - start;
            }
        }

        if (bytesRemaining > 0 && !processedPageEnds.contains(length)) {
            System.err.println(
                    pageEndPattern + " remaining: pos=" + (length - bytesRemaining) + " end=" + length);
            splits.add(makeSplit(path, length - bytesRemaining, bytesRemaining,
                    blkLocations[blkLocations.length - 1].getHosts()));
        }
        if (in != null)
            in.close();
    } else if (length != 0) {
        splits.add(makeSplit(path, 0, length, clusterMap, blkLocations));
    } else {
        // Create empty hosts array for zero length files
        splits.add(makeSplit(path, 0, length, new String[0]));
    }
    return splits;
}

From source file:FIMReducer.java

License:Apache License

@Override
public void configure(JobConf conf) {
    minFreqPercent = conf.getInt("PARMM.minFreqPercent", 20);
    sampleSize = conf.getInt("PARMM.sampleSize", 1000);
    epsilon = conf.getFloat("PARMM.epsilon", 0.05f);
    id = conf.getInt("mapred.task.partition", -1);
    set = false;//from  ww  w .j  a v a  2  s.  com
}

From source file:com.ebay.erl.mobius.core.mapred.ConfigurableJob.java

License:Apache License

@Override
protected synchronized void submit() {
    JobConf jobConf = this.getJobConf();
    boolean isLocalHadoop = jobConf.get("mapred.job.tracker", "local").equals("local");

    // the default partitioner is {@link com.ebay.erl.mobius.core.datajoin.DataJoinKeyPartitioner}
    // which is hash based.
    ////  w w w  .j a v a 2 s  .c o  m
    // If user choose to use even partitioner, Mobius will use
    // {@link com.ebay.erl.mobius.core.datajoin.EvenlyPartitioner} which
    // is sampling based partitioner of attempting to balance the load
    // for each reducer.
    String partitioner = jobConf.get("mobius.partitioner", "default");

    if (!isLocalHadoop && jobConf.getNumReduceTasks() != 0 && partitioner.equals("even")) {
        // this job needs reducer, perform sampling on the keys to 
        // make load on reducers are almost evenly distributed.

        double freq = jobConf.getFloat("mobius.sampler.freq", 0.1F);
        int numSamples = jobConf.getInt("mobius.sampler.num.samples", 50000);
        int maxSplits = jobConf.getInt("mobius.sampler.max.slipts.sampled", 5);

        // log sampling parameters so that user knows.
        LOGGER.info("Sampling parameters { " + "mobius.sampler.freq:" + format.format(freq) + ", "
                + "mobius.sampler.num.samples:" + numSamples + ", " + "mobius.sampler.max.slipts.sampled:"
                + maxSplits + "}");

        InputSampler.Sampler<?, ?> sampler = new MobiusInputSampler(freq, numSamples, maxSplits);

        writePartitionFile(jobConf, sampler);

        // add to distributed cache
        try {
            URI partitionUri = new URI(TotalOrderPartitioner.getPartitionFile(jobConf) + "#_partitions");
            LOGGER.info("Adding partition uri to distributed cache:" + partitionUri.toString());

            DistributedCache.addCacheFile(partitionUri, jobConf);
            DistributedCache.createSymlink(jobConf);
            jobConf.setPartitionerClass(EvenlyPartitioner.class);

            LOGGER.info("Using " + EvenlyPartitioner.class.getCanonicalName()
                    + " to partiton the keys evenly among reducers.");
        } catch (URISyntaxException e) {
            LOGGER.error(e.getMessage(), e);
            throw new RuntimeException(e);
        }

        // adding -XX:-UseParallelOldGC, this will automatically set -XX:-UseParallelGC
        // according to Oracle's specification
        String jvmOpts = jobConf.get("mapred.child.java.opts", "");
        if (jvmOpts.isEmpty()) {
            jvmOpts = "-XX:-UseParallelOldGC";
        } else {
            if (jvmOpts.indexOf("-XX:-UseParallelOldGC") < 0) {
                // remove "
                jvmOpts = jvmOpts.replaceAll("\"", "");
                jvmOpts = jvmOpts.concat(" -XX:-UseParallelOldGC");
            }
        }
        jobConf.set("mapred.child.java.opts", jvmOpts);

        this.setJobConf(jobConf);
    }
    LOGGER.info("Submiting job:" + jobConf.getJobName());
    super.submit();
}

From source file:com.mellanox.hadoop.mapred.UdaPlugin.java

License:Apache License

public UdaPluginRT(UdaShuffleConsumerPluginShared udaShuffleConsumer, ReduceTask reduceTask, JobConf jobConf,
        Reporter reporter, int numMaps) throws IOException {
    super(jobConf);
    this.udaShuffleConsumer = udaShuffleConsumer;
    this.reduceTask = reduceTask;

    String totalRdmaSizeStr = jobConf.get("mapred.rdma.shuffle.total.size", "0"); // default 0 means ignoring this parameter and use instead -Xmx and mapred.job.shuffle.input.buffer.percent
    long totalRdmaSize = StringUtils.TraditionalBinaryPrefix.string2long(totalRdmaSizeStr);
    long maxRdmaBufferSize = jobConf.getLong("mapred.rdma.buf.size", 1024);
    long minRdmaBufferSize = jobConf.getLong("mapred.rdma.buf.size.min", 16);
    long shuffleMemorySize = totalRdmaSize;
    StringBuilder meminfoSb = new StringBuilder();
    meminfoSb.append("UDA: numMaps=").append(numMaps);
    meminfoSb.append(", maxRdmaBufferSize=").append(maxRdmaBufferSize);
    meminfoSb.append("KB, minRdmaBufferSize=").append(minRdmaBufferSize).append("KB");
    meminfoSb.append("KB, rdmaShuffleTotalSize=").append(totalRdmaSize);

    if (totalRdmaSize < 0) {
        LOG.warn("Illegal paramter value: mapred.rdma.shuffle.total.size=" + totalRdmaSize);
    }//from  w  w  w. j  a v  a  2s. com

    if (totalRdmaSize <= 0) {
        long maxHeapSize = Runtime.getRuntime().maxMemory();
        double shuffleInputBufferPercent = jobConf.getFloat("mapred.job.shuffle.input.buffer.percent",
                DEFAULT_SHUFFLE_INPUT_PERCENT);
        if ((shuffleInputBufferPercent < 0) || (shuffleInputBufferPercent > 1)) {
            LOG.warn("UDA: mapred.job.shuffle.input.buffer.percent is out of range - set to default: "
                    + DEFAULT_SHUFFLE_INPUT_PERCENT);
            shuffleInputBufferPercent = DEFAULT_SHUFFLE_INPUT_PERCENT;
        }
        shuffleMemorySize = (long) (maxHeapSize * shuffleInputBufferPercent);

        LOG.info("Using JAVA Xmx with mapred.job.shuffle.input.buffer.percent to limit UDA shuffle memory");

        meminfoSb.append(", maxHeapSize=").append(maxHeapSize).append("B");
        meminfoSb.append(", shuffleInputBufferPercent=").append(shuffleInputBufferPercent);
        meminfoSb.append("==> shuffleMemorySize=").append(shuffleMemorySize).append("B");

        LOG.info("RDMA shuffle memory is limited to " + shuffleMemorySize / 1024 / 1024 + "MB");
    } else {
        LOG.info("Using mapred.rdma.shuffle.total.size to limit UDA shuffle memory");
        LOG.info("RDMA shuffle memory is limited to " + totalRdmaSize / 1024 / 1024 + "MB");
    }

    LOG.debug(meminfoSb.toString());
    LOG.info("UDA: user prefer rdma.buf.size=" + maxRdmaBufferSize + "KB");
    LOG.info("UDA: minimum rdma.buf.size=" + minRdmaBufferSize + "KB");

    if (jobConf.getSpeculativeExecution()) { // (getMapSpeculativeExecution() || getReduceSpeculativeExecution())
        LOG.info("UDA has limited support for map task speculative execution");
    }

    LOG.info("UDA: number of segments to fetch: " + numMaps);

    /* init variables */
    init_kv_bufs();

    launchCppSide(true, this); // true: this is RT => we should execute NetMerger

    this.j2c_queue = new J2CQueue<K, V>();
    this.mTaskReporter = reporter;
    this.mMapsNeed = numMaps;

    /* send init message */
    TaskAttemptID reduceId = reduceTask.getTaskID();

    mParams.clear();
    mParams.add(Integer.toString(numMaps));
    mParams.add(reduceId.getJobID().toString());
    mParams.add(reduceId.toString());
    mParams.add(jobConf.get("mapred.netmerger.hybrid.lpq.size", "0"));
    mParams.add(Long.toString(maxRdmaBufferSize * 1024)); // in Bytes - pass the raw value we got from xml file (with only conversion to bytes)
    mParams.add(Long.toString(minRdmaBufferSize * 1024)); // in Bytes . passed for checking if rdmaBuffer is still larger than minRdmaBuffer after alignment          
    mParams.add(jobConf.getOutputKeyClass().getName());

    boolean compression = jobConf.getCompressMapOutput(); //"true" or "false"
    String alg = null;
    if (compression) {
        alg = jobConf.get("mapred.map.output.compression.codec", null);
    }
    mParams.add(alg);

    String bufferSize = Integer.toString(256 * 1024);
    if (alg != null) {
        if (alg.contains("lzo.LzoCodec")) {
            bufferSize = jobConf.get("io.compression.codec.lzo.buffersize", bufferSize);
        } else if (alg.contains("SnappyCodec")) {
            bufferSize = jobConf.get("io.compression.codec.snappy.buffersize", bufferSize);
        }
    }
    mParams.add(bufferSize);
    mParams.add(Long.toString(shuffleMemorySize));

    String[] dirs = jobConf.getLocalDirs();
    ArrayList<String> dirsCanBeCreated = new ArrayList<String>();
    //checking if the directories can be created
    for (int i = 0; i < dirs.length; i++) {
        try {
            DiskChecker.checkDir(new File(dirs[i].trim()));
            //saving only the directories that can be created
            dirsCanBeCreated.add(dirs[i].trim());
        } catch (DiskErrorException e) {
        }
    }
    //sending the directories
    int numDirs = dirsCanBeCreated.size();
    mParams.add(Integer.toString(numDirs));
    for (int i = 0; i < numDirs; i++) {
        mParams.add(dirsCanBeCreated.get(i));
    }

    LOG.info("mParams array is " + mParams);
    LOG.info("UDA: sending INIT_COMMAND");
    String msg = UdaCmd.formCmd(UdaCmd.INIT_COMMAND, mParams);
    UdaBridge.doCommand(msg);
    this.mProgress = new Progress();
    this.mProgress.set(0.5f);
}

From source file:edu.uci.ics.fuzzyjoin.hadoop.ridpairs.ppjoin.MapJoin.java

License:Apache License

@Override
public void configure(JobConf job) {
    ///*from  www .j  a  v  a2  s  . c o m*/
    // set Tokenizer and SimilarityFilters
    //
    tokenizer = TokenizerFactory.getTokenizer(
            job.get(FuzzyJoinConfig.TOKENIZER_PROPERTY, FuzzyJoinConfig.TOKENIZER_VALUE),
            FuzzyJoinConfig.WORD_SEPARATOR_REGEX, FuzzyJoinConfig.TOKEN_SEPARATOR);
    String similarityName = job.get(FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_NAME_VALUE);
    float similarityThreshold = job.getFloat(FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE);
    similarityFilters = SimilarityFiltersFactory.getSimilarityFilters(similarityName, similarityThreshold);
    //
    // set TokenRank and TokenGroup
    //
    Path tokensPath;
    Path lengthstatsPath = null;
    try {
        Path[] cache = DistributedCache.getLocalCacheFiles(job);
        if (cache == null) {
            tokensPath = new Path(job.get(FuzzyJoinConfig.DATA_TOKENS_PROPERTY));
            try {
                lengthstatsPath = new Path(job.get(FuzzyJoinDriver.DATA_LENGTHSTATS_PROPERTY));
            } catch (IllegalArgumentException e) {
            }
        } else {
            tokensPath = cache[0];
            if (job.getBoolean(FuzzyJoinDriver.TOKENS_LENGTHSTATS_PROPERTY,
                    FuzzyJoinDriver.TOKENS_LENGTHSTATS_VALUE)) {
                lengthstatsPath = cache[1];
            }
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    String recordGroupClass = job.get(FuzzyJoinDriver.RIDPAIRS_GROUP_CLASS_PROPERTY,
            FuzzyJoinDriver.RIDPAIRS_GROUP_CLASS_VALUE);
    int recordGroupFactor = job.getInt(FuzzyJoinDriver.RIDPAIRS_GROUP_FACTOR_PROPERTY,
            FuzzyJoinDriver.RIDPAIRS_GROUP_FACTOR_VALUE);
    recordGroup = RecordGroupFactory.getRecordGroup(recordGroupClass,
            Math.max(1, job.getNumReduceTasks() * recordGroupFactor), similarityFilters, "" + lengthstatsPath);
    TokenLoad tokenLoad = new TokenLoad(tokensPath.toString(), tokenRank);
    tokenLoad.loadTokenRank();
    //
    // set dataColumn
    //
    dataColumns[0] = FuzzyJoinUtil
            .getDataColumns(job.get(FuzzyJoinConfig.RECORD_DATA_PROPERTY, FuzzyJoinConfig.RECORD_DATA_VALUE));
    dataColumns[1] = FuzzyJoinUtil
            .getDataColumns(job.get(FuzzyJoinConfig.RECORD_DATA1_PROPERTY, FuzzyJoinConfig.RECORD_DATA1_VALUE));
    //
    // get suffix for second relation
    //
    suffixSecond = job.get(FuzzyJoinDriver.DATA_SUFFIX_INPUT_PROPERTY, "")
            .split(FuzzyJoinDriver.SEPSARATOR_REGEX)[1];
}

From source file:edu.uci.ics.fuzzyjoin.hadoop.ridpairs.ppjoin.MapSelfJoin.java

License:Apache License

@Override
public void configure(JobConf job) {
    ////from www.j a va 2 s .c o  m
    // set Tokenizer and SimilarityFilters
    //
    tokenizer = TokenizerFactory.getTokenizer(
            job.get(FuzzyJoinConfig.TOKENIZER_PROPERTY, FuzzyJoinConfig.TOKENIZER_VALUE),
            FuzzyJoinConfig.WORD_SEPARATOR_REGEX, FuzzyJoinConfig.TOKEN_SEPARATOR);
    String similarityName = job.get(FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_NAME_VALUE);
    float similarityThreshold = job.getFloat(FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE);
    similarityFilters = SimilarityFiltersFactory.getSimilarityFilters(similarityName, similarityThreshold);
    //
    // set TokenRank and TokenGroup
    //
    Path tokensPath;
    Path lengthstatsPath = null;
    try {
        Path[] cache = DistributedCache.getLocalCacheFiles(job);
        if (cache == null) {
            tokensPath = new Path(job.get(FuzzyJoinConfig.DATA_TOKENS_PROPERTY));
            try {
                lengthstatsPath = new Path(job.get(FuzzyJoinDriver.DATA_LENGTHSTATS_PROPERTY));
            } catch (IllegalArgumentException e) {
            }
        } else {
            tokensPath = cache[0];
            if (job.getBoolean(FuzzyJoinDriver.TOKENS_LENGTHSTATS_PROPERTY,
                    FuzzyJoinDriver.TOKENS_LENGTHSTATS_VALUE)) {
                lengthstatsPath = cache[1];
            }
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    String recordGroupClass = job.get(FuzzyJoinDriver.RIDPAIRS_GROUP_CLASS_PROPERTY,
            FuzzyJoinDriver.RIDPAIRS_GROUP_CLASS_VALUE);
    int recordGroupFactor = job.getInt(FuzzyJoinDriver.RIDPAIRS_GROUP_FACTOR_PROPERTY,
            FuzzyJoinDriver.RIDPAIRS_GROUP_FACTOR_VALUE);
    recordGroup = RecordGroupFactory.getRecordGroup(recordGroupClass,
            Math.max(1, job.getNumReduceTasks() * recordGroupFactor), similarityFilters, "" + lengthstatsPath);
    TokenLoad tokenLoad = new TokenLoad(tokensPath.toString(), tokenRank);
    tokenLoad.loadTokenRank();
    //
    // set dataColumn
    //
    dataColumns = FuzzyJoinUtil
            .getDataColumns(job.get(FuzzyJoinConfig.RECORD_DATA_PROPERTY, FuzzyJoinConfig.RECORD_DATA_VALUE));
}

From source file:edu.uci.ics.fuzzyjoin.hadoop.ridpairs.ppjoin.ReduceJoin.java

License:Apache License

@Override
public void configure(JobConf job) {
    similarityThreshold = job.getFloat(FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE);
}

From source file:edu.uci.ics.fuzzyjoin.hadoop.ridpairs.token.MapJoin.java

License:Apache License

@Override
public void configure(JobConf job) {
    ///*from  w  ww.j a  v a2 s .co m*/
    // set Tokenizer and SimilarityFilters
    //
    tokenizer = TokenizerFactory.getTokenizer(
            job.get(FuzzyJoinConfig.TOKENIZER_PROPERTY, FuzzyJoinConfig.TOKENIZER_VALUE),
            FuzzyJoinConfig.WORD_SEPARATOR_REGEX, FuzzyJoinConfig.TOKEN_SEPARATOR);
    String similarityName = job.get(FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_NAME_VALUE);
    float similarityThreshold = job.getFloat(FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE);
    similarityFilters = SimilarityFiltersFactory.getSimilarityFilters(similarityName, similarityThreshold);
    //
    // set TokenRank
    //
    Path tokensPath;
    try {
        Path[] cache = DistributedCache.getLocalCacheFiles(job);
        if (cache == null) {
            tokensPath = new Path(job.get(FuzzyJoinConfig.DATA_TOKENS_PROPERTY));
        } else {
            tokensPath = cache[0];
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    TokenLoad tokenLoad = new TokenLoad(tokensPath.toString(), tokenRank);
    tokenLoad.loadTokenRank();
    //
    // set dataColumn
    //
    dataColumns[0] = FuzzyJoinUtil
            .getDataColumns(job.get(FuzzyJoinConfig.RECORD_DATA_PROPERTY, FuzzyJoinConfig.RECORD_DATA_VALUE));
    dataColumns[1] = FuzzyJoinUtil
            .getDataColumns(job.get(FuzzyJoinConfig.RECORD_DATA1_PROPERTY, FuzzyJoinConfig.RECORD_DATA1_VALUE));
    //
    // get suffix for second relation
    //
    suffixSecond = job.get(FuzzyJoinDriver.DATA_SUFFIX_INPUT_PROPERTY, "")
            .split(FuzzyJoinDriver.SEPSARATOR_REGEX)[1];
}

From source file:edu.uci.ics.fuzzyjoin.hadoop.ridpairs.token.MapSelfJoin.java

License:Apache License

@Override
public void configure(JobConf job) {
    ////from  www .j a va  2  s  .c o m
    // set Tokenizer and SimilarityFilters
    //
    tokenizer = TokenizerFactory.getTokenizer(
            job.get(FuzzyJoinConfig.TOKENIZER_PROPERTY, FuzzyJoinConfig.TOKENIZER_VALUE),
            FuzzyJoinConfig.WORD_SEPARATOR_REGEX, FuzzyJoinConfig.TOKEN_SEPARATOR);
    String similarityName = job.get(FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_NAME_VALUE);
    float similarityThreshold = job.getFloat(FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE);
    similarityFilters = SimilarityFiltersFactory.getSimilarityFilters(similarityName, similarityThreshold);
    //
    // set TokenRank
    //
    Path tokensPath;
    try {
        Path[] cache = DistributedCache.getLocalCacheFiles(job);
        if (cache == null) {
            tokensPath = new Path(job.get(FuzzyJoinConfig.DATA_TOKENS_PROPERTY));
        } else {
            tokensPath = cache[0];
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    TokenLoad tokenLoad = new TokenLoad(tokensPath.toString(), tokenRank);
    tokenLoad.loadTokenRank();
    //
    // set dataColumn
    //
    dataColumns = FuzzyJoinUtil
            .getDataColumns(job.get(FuzzyJoinConfig.RECORD_DATA_PROPERTY, FuzzyJoinConfig.RECORD_DATA_VALUE));
}

From source file:edu.uci.ics.fuzzyjoin.hadoop.ridpairs.token.ReduceVerifyListJoin.java

License:Apache License

@Override
public void configure(JobConf job) {
    ///*w w w .ja  v a  2  s . c o  m*/
    // set SimilarityFilters
    //
    String similarityName = job.get(FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_NAME_VALUE);
    similarityThreshold = job.getFloat(FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE);
    similarityFilters = SimilarityFiltersFactory.getSimilarityFilters(similarityName, similarityThreshold);
    // similarityMetric = SimilarityMetricFactory
    // .getSimilarityMetric(similarityName);
}