Example usage for org.apache.hadoop.conf Configuration setFloat

List of usage examples for org.apache.hadoop.conf Configuration setFloat

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setFloat.

Prototype

public void setFloat(String name, float value) 

Source Link

Document

Set the value of the name property to a float.

Usage

From source file:RunPageRankSchimmy.java

License:Apache License

private void phase2(String path, int i, int j, int n, float missing) throws Exception {
    Configuration conf = getConf();

    LOG.info("missing PageRank mass: " + missing);
    LOG.info("number of nodes: " + n);

    String in = path + "/iter" + FORMAT.format(j) + "t";
    String out = path + "/iter" + FORMAT.format(j);

    LOG.info("PageRankSchimmy: iteration " + j + ": Phase2");
    LOG.info(" - input: " + in);
    LOG.info(" - output: " + out);

    Job job = Job.getInstance(conf);/*from w  w  w.  ja va  2  s . co m*/
    job.setJobName("PageRankSchimmy:iteration" + j + ":Phase2");
    job.setJarByClass(RunPageRankSchimmy.class);
    job.setNumReduceTasks(0);

    FileInputFormat.setInputPaths(job, new Path(in));
    FileOutputFormat.setOutputPath(job, new Path(out));

    job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNode.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PageRankNode.class);

    job.setMapperClass(MapPageRankMassDistributionClass.class);

    conf.setFloat("MissingMass", (float) missing);
    conf.setInt("NodeCount", n);

    FileSystem.get(conf).delete(new Path(out), true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
}

From source file:be.ugent.intec.halvade.utils.HalvadeConf.java

License:Open Source License

public static void setSCC(Configuration conf, double val) {
    conf.setFloat(scc, (float) val);
}

From source file:be.ugent.intec.halvade.utils.HalvadeConf.java

License:Open Source License

public static void setSEC(Configuration conf, double val) {
    conf.setFloat(sec, (float) val);
}

From source file:cc.slda.AnnotateDocuments.java

License:Apache License

/**
 * Runs this tool.//from w  w w . ja v  a  2 s  .  c  o m
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));
    options.addOption(OptionBuilder.withArgName(PCUTOFF).hasArg()
            .withDescription("probability of topic assignment").create(PCUTOFF));
    options.addOption(OptionBuilder.withArgName(INDEX).hasArg()
            .withDescription("path to data directory containing term and title indices").create(INDEX));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(INDEX)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String indexPath = cmdline.getOptionValue(INDEX);
    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    float cutoff = 0.9f;
    if (cmdline.hasOption(PCUTOFF)) {
        cutoff = Float.parseFloat(cmdline.getOptionValue(PCUTOFF));
    }
    LOG.info("Tool: " + AnnotateDocuments.class.getSimpleName());
    LOG.info(" - indices path: " + indexPath);
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - number of reducers: " + reduceTasks);
    LOG.info(" - log(probCutoff): " + Math.log(cutoff));

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    Job job = Job.getInstance(conf);
    job.setJobName(AnnotateDocuments.class.getSimpleName());
    job.setJarByClass(AnnotateDocuments.class);

    String termIndex = indexPath + Path.SEPARATOR + TERM;
    String titleIndex = indexPath + Path.SEPARATOR + TITLE;

    Path termIndexPath = new Path(termIndex);
    Path titleIndexPath = new Path(titleIndex);

    Preconditions.checkArgument(fs.exists(termIndexPath), "Missing term index files... " + termIndexPath);
    DistributedCache.addCacheFile(termIndexPath.toUri(), job.getConfiguration());
    Preconditions.checkArgument(fs.exists(titleIndexPath), "Missing title index files... " + titleIndexPath);
    DistributedCache.addCacheFile(titleIndexPath.toUri(), job.getConfiguration());

    job.setNumReduceTasks(reduceTasks);
    conf.setFloat(PCUTOFF, cutoff);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(HMapSIW.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(HMapSIW.class);

    job.setMapperClass(MyMapper.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:com.citic.zxyjs.zwlscx.mapreduce.lib.input.HFileOutputFormatBase.java

License:Apache License

public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(final TaskAttemptContext context)
        throws IOException, InterruptedException {
    // Get the path of the temporary output file
    final Path outputPath = FileOutputFormat.getOutputPath(context);
    final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath();
    final Path ignoreOutputPath = new Path(outputPath + "_ignore");

    final Configuration conf = context.getConfiguration();
    final FileSystem fs = outputdir.getFileSystem(conf);
    // These configs. are from hbase-*.xml
    final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE);
    // Invented config. Add to hbase-*.xml if other than default
    // compression.
    final String defaultCompression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName());
    final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
            false);/* ww  w .  j  a va2s.c  o m*/

    if (fs.exists(ignoreOutputPath)) {
        LOG.info("Deleted " + ignoreOutputPath.toString() + " success.");
        fs.delete(ignoreOutputPath, true);
    }

    // create a map from column family to the compression algorithm
    final Map<byte[], String> compressionMap = createFamilyCompressionMap(conf);
    final Map<byte[], String> bloomTypeMap = createFamilyBloomMap(conf);
    final Map<byte[], String> blockSizeMap = createFamilyBlockSizeMap(conf);

    String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_CONF_KEY);
    final HFileDataBlockEncoder encoder;
    if (dataBlockEncodingStr == null) {
        encoder = NoOpDataBlockEncoder.INSTANCE;
    } else {
        try {
            encoder = new HFileDataBlockEncoderImpl(DataBlockEncoding.valueOf(dataBlockEncodingStr));
        } catch (IllegalArgumentException ex) {
            throw new RuntimeException("Invalid data block encoding type configured for the param "
                    + DATABLOCK_ENCODING_CONF_KEY + " : " + dataBlockEncodingStr);
        }
    }

    return new RecordWriter<ImmutableBytesWritable, KeyValue>() {
        // Map of families to writers and how much has been output on the
        // writer.
        private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(
                Bytes.BYTES_COMPARATOR);
        private final FSDataOutputStream dos = fs.create(ignoreOutputPath);
        private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
        private final byte[] now = Bytes.toBytes(System.currentTimeMillis());
        private boolean rollRequested = false;

        public void write(ImmutableBytesWritable row, KeyValue kv) throws IOException {
            // null input == user explicitly wants to flush
            if (row == null && kv == null) {
                rollWriters();
                return;
            }

            byte[] rowKey = kv.getRow();
            long length = kv.getLength();
            byte[] family = kv.getFamily();

            if (ignore(kv)) {
                byte[] readBuf = rowKey;
                dos.write(readBuf, 0, readBuf.length);
                dos.write(Bytes.toBytes("\n"));
                return;
            }
            WriterLength wl = this.writers.get(family);

            // If this is a new column family, verify that the directory
            // exists
            if (wl == null) {
                Path path = null;
                path = new Path(outputdir, Bytes.toString(family));
                fs.mkdirs(path);
            }

            // If any of the HFiles for the column families has reached
            // maxsize, we need to roll all the writers
            if (wl != null && wl.written + length >= maxsize) {
                this.rollRequested = true;
            }

            // This can only happen once a row is finished though
            if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
                rollWriters();
            }

            // create a new HLog writer, if necessary
            if (wl == null || wl.writer == null) {
                wl = getNewWriter(family, conf);
            }

            // we now have the proper HLog writer. full steam ahead
            kv.updateLatestStamp(this.now);
            wl.writer.append(kv);
            wl.written += length;

            // Copy the row so we know when a row transition.
            this.previousRow = rowKey;
        }

        private void rollWriters() throws IOException {
            for (WriterLength wl : this.writers.values()) {
                if (wl.writer != null) {
                    LOG.info("Writer=" + wl.writer.getPath()
                            + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
                    close(wl.writer);
                }
                wl.writer = null;
                wl.written = 0;
            }
            this.rollRequested = false;
        }

        /*
         * Create a new StoreFile.Writer.
         * @param family
         * @return A WriterLength, containing a new StoreFile.Writer.
         * @throws IOException
         */
        private WriterLength getNewWriter(byte[] family, Configuration conf) throws IOException {
            WriterLength wl = new WriterLength();
            Path familydir = new Path(outputdir, Bytes.toString(family));
            String compression = compressionMap.get(family);
            compression = compression == null ? defaultCompression : compression;
            String bloomTypeStr = bloomTypeMap.get(family);
            BloomType bloomType = BloomType.NONE;
            if (bloomTypeStr != null) {
                bloomType = BloomType.valueOf(bloomTypeStr);
            }
            String blockSizeString = blockSizeMap.get(family);
            int blockSize = blockSizeString == null ? HConstants.DEFAULT_BLOCKSIZE
                    : Integer.parseInt(blockSizeString);
            Configuration tempConf = new Configuration(conf);
            tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
            wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs, blockSize)
                    .withOutputDir(familydir)
                    .withCompression(AbstractHFileWriter.compressionByName(compression))
                    .withBloomType(bloomType).withComparator(KeyValue.COMPARATOR).withDataBlockEncoder(encoder)
                    .withChecksumType(HStore.getChecksumType(conf))
                    .withBytesPerChecksum(HStore.getBytesPerChecksum(conf)).build();

            this.writers.put(family, wl);
            return wl;
        }

        private void close(final StoreFile.Writer w) throws IOException {
            if (w != null) {
                w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
                w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
                        Bytes.toBytes(context.getTaskAttemptID().toString()));
                w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude));
                w.appendTrackedTimestampsToMetadata();
                w.close();
            }
        }

        public void close(TaskAttemptContext c) throws IOException, InterruptedException {
            dos.flush();
            dos.close();
            for (WriterLength wl : this.writers.values()) {
                close(wl.writer);
            }
        }
    };
}

From source file:com.facebook.hive.orc.OrcConf.java

License:Open Source License

public static void setFloatVar(Configuration conf, ConfVars var, float val) {
    conf.setFloat(var.varname, val);
}

From source file:com.facebook.hiveio.conf.FloatConfOption.java

License:Apache License

/**
 * Set value//w ww.  j  a v  a 2s.  c om
 * @param conf Configuration
 * @param value to set
 */
public void set(Configuration conf, float value) {
    conf.setFloat(getKey(), value);
}

From source file:com.facebook.hiveio.conf.FloatConfOption.java

License:Apache License

/**
 * Set value if it's not already present
 * @param conf Configuration// ww  w  .  j  a  v  a  2  s.c om
 * @param value to set
 */
public void setIfUnset(Configuration conf, float value) {
    if (conf.get(getKey()) == null) {
        conf.setFloat(getKey(), value);
    }
}

From source file:com.github.ygf.pagerank.PageRank.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 3) {
        System.out.println("Usage: PageRank <links-simple-sorted.txt> <titles-dir> <output-dir>");
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }//from   ww  w.  ja v  a2 s .  c  o m

    Path linksFile = new Path(args[0]);
    Path titlesDir = new Path(args[1]);
    Path outputDir = new Path(args[2]);

    Configuration conf = getConf();

    // Do not create _SUCCESS files. MapFileOutputFormat.getReaders calls
    // try to read the _SUCCESS as another MapFile dir.
    conf.set("mapreduce.fileoutputcommitter.marksuccessfuljobs", "false");

    // Default values of the parameters of the algorithm.
    conf.setInt("pagerank.block_size", conf.getInt("pagerank.block_size", 10000));
    conf.setInt("pagerank.max_iterations", conf.getInt("pagerank.max_iterations", 2));
    conf.setFloat("pagerank.damping_factor", conf.getFloat("pagerank.damping_factor", 0.85f));
    conf.setInt("pagerank.top_results", conf.getInt("pagerank.top_results", 100));

    conf.set("pagerank.titles_dir", titlesDir.toString());
    int numPages = getNumPages(conf, titlesDir);
    conf.setLong("pagerank.num_pages", numPages);

    createTransitionMatrix(conf, linksFile, outputDir);

    int maxIters = Integer.parseInt(conf.get("pagerank.max_iterations"));
    for (int iter = 1; iter <= maxIters; iter++) {
        conf.setInt("pagerank.iteration", iter);
        pageRankIteration(iter, conf, outputDir);
        cleanPreviousIteration(iter, conf, outputDir);
    }

    summarizeResults(maxIters, conf, outputDir);

    return 0;
}

From source file:com.kakao.hbase.manager.command.Balance.java

License:Apache License

public static List<RegionPlan> makePlan(HBaseAdmin admin, Set<String> tableNameSet, BalanceFactor balanceFactor)
        throws IOException {
    Map<ServerName, List<HRegionInfo>> clusterState = CommandAdapter.initializeRegionMap(admin);

    for (Map.Entry<HRegionInfo, ServerName> entry : createRegionAssignmentMap(admin, tableNameSet).entrySet())
        clusterState.get(entry.getValue()).add(entry.getKey());

    Configuration conf = admin.getConfiguration();
    conf.setFloat("hbase.regions.slop", 0f);
    balanceFactor.setConf(conf);/*ww w .  ja  v  a 2  s . c om*/

    return CommandAdapter.makePlan(admin, clusterState, conf);
}