Example usage for org.apache.hadoop.util GenericOptionsParser GenericOptionsParser

Introduction

In this page you can find the example usage for org.apache.hadoop.util GenericOptionsParser GenericOptionsParser.

Prototype

public GenericOptionsParser(String[] args) throws IOException

Source Link

Document

Create an options parser to parse the args.

Usage

From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemCacheCleaner.java

License:Open Source License

public static void main(String[] args) throws IOException {
    GenericOptionsParser parser = new GenericOptionsParser(args);
    args = parser.getRemainingArgs();//from w  w  w .  ja  v a 2  s  .co  m
    Configuration configuration = parser.getConfiguration();

    // TODO: Wire out constants and defaults through GoogleHadoopFileSystemBase once submitted.
    if ("FILESYSTEM_BACKED".equals(configuration.get("fs.gs.metadata.cache.type", "IN_MEMORY"))) {
        String fsStringPath = configuration.get("fs.gs.metadata.cache.directory", "");
        Preconditions.checkState(!Strings.isNullOrEmpty(fsStringPath));
        LOG.info("Performing GC on cache directory {}", fsStringPath);

        Path path = Paths.get(fsStringPath);
        if (Files.exists(path)) {
            FileSystemBackedDirectoryListCache cache = new FileSystemBackedDirectoryListCache(fsStringPath);
            cleanCache(cache);
        }
    }

    LOG.info("Done with GC.");
}

From source file:com.twitter.elephanttwin.lzo.retrieval.LZOBlockLevelIndexingJobs.java

License:Open Source License

public static void main(String[] args) throws Exception {
    GenericOptionsParser optParser = new GenericOptionsParser(args);
    ToolRunner.run(optParser.getConfiguration(), new LZOBlockLevelIndexingJobs(), optParser.getRemainingArgs());
}

From source file:com.twitter.elephanttwin.lzo.retrieval.TestLzoIndexing.java

License:Open Source License

/**
 * first create a .lzo input file which contains ExciteLog data, then index it
 * to then create a .lzo.index file finally start an indexing job on
 * uid./*from   www  . j a va2 s .  c om*/
 *
 * @throws Exception
 */
@BeforeClass
public static void setUp() throws Exception {

    conf = new Configuration();

    conf.set("io.compression.codecs", "com.hadoop.compression.lzo.LzopCodec");
    conf.setInt(LzoCodec.LZO_BUFFER_SIZE_KEY, 256);
    codec = new LzopCodec();
    codec.setConf(conf);
    FileUtil.fullyDelete(new File(TESTDIR));
    inputDir.mkdirs();

    // close any FileySystem from previous tests:
    FileSystem.get(conf).close();

    //create 3 files to test globs and test on single lzo block in a split;
    //create File 1, which has only one lzo block.
    FileSystem fs = FileSystem.get(conf);
    String baseFilePath = TESTDIR + INPUTDIR;
    LzoIndex index;
    int repeatFactor1 = 1;
    createLZOFile(baseFilePath + "11.lzo", repeatFactor1, true);
    index = LzoIndex.readIndex(fs, new Path(baseFilePath + "11.lzo"));
    if (index.getNumberOfBlocks() > 1)
        throw new RuntimeException(baseFilePath + "11.lzo has more than one " + "lzo block");

    //create File 2, which has more than 1 lzo blocks.
    int repeatFactor2 = 10;
    createLZOFile(baseFilePath + "21.lzo", repeatFactor2, true);
    index = LzoIndex.readIndex(fs, new Path(baseFilePath + "21.lzo"));
    if (index.getNumberOfBlocks() < 2)
        throw new RuntimeException(baseFilePath + "21.lzo has only one lzo block");

    //create a new lzo file 3 to test combining lzo blocks.

    int repeatFactor3 = 30;
    createLZOFile(baseFilePath + "31.lzo", repeatFactor3, true); //b64 format
    index = LzoIndex.readIndex(fs, new Path(baseFilePath + "31.lzo"));
    if (index.getNumberOfBlocks() < 2)
        throw new RuntimeException(baseFilePath + "31.lzo has only one lzo block");

    int repeatFactor4 = 1;
    createLZOFile(baseFilePath + "b11.lzo", repeatFactor4, true);
    index = LzoIndex.readIndex(fs, new Path(baseFilePath + "b11.lzo"));
    if (index.getNumberOfBlocks() > 1)
        throw new RuntimeException(baseFilePath + "b11.lzo has more than one " + "lzo block");

    //create File 2, which has more than 1 lzo blocks.
    int repeatFactor5 = 10;
    createLZOFile(baseFilePath + "b21.lzo", repeatFactor5, true);
    index = LzoIndex.readIndex(fs, new Path(baseFilePath + "b21.lzo"));
    if (index.getNumberOfBlocks() < 2)
        throw new RuntimeException(baseFilePath + "b21.lzo has only one lzo block");

    int repeatFactor6 = 30;
    createLZOFile(baseFilePath + "b31.lzo", repeatFactor6, true);
    index = LzoIndex.readIndex(fs, new Path(baseFilePath + "b31.lzo"));
    if (index.getNumberOfBlocks() < 2)
        throw new RuntimeException(baseFilePath + "b31.lzo has only one lzo block");

    //index the created lzo files without combining lzo blocks;
    String[] args = new String[] { "-jobpoolsize=1", "-index=" + TESTDIR + INDEXDIR, "-input=" + baseFilePath,
            "-inputformat=com.twitter.elephantbird.mapreduce.input." + "LzoThriftB64LineInputFormat",
            "-value_class=com.twitter.elephanttwin.gen.ExciteLog", "-columnname=uid", "-num_partitions=1",
            "-sleeptime=10", "-overwrite=false", };

    GenericOptionsParser optParser = new GenericOptionsParser(args);
    ToolRunner.run(conf, new LZOBlockLevelIndexingJobs(), optParser.getRemainingArgs());

    // the number of each key appears in all files
    repeatFactor = repeatFactor1 + repeatFactor2 + repeatFactor3 + repeatFactor4 + repeatFactor5
            + repeatFactor6;
    // number of rows has the same unique key in two files matching *1.lzo globs
    globsCnt = repeatFactor;

    pigServer = new PigServer(ExecType.LOCAL);
    // set lzo codec:
    pigServer.getPigContext().getProperties().setProperty("io.compression.codecs",
            "com.hadoop.compression.lzo.LzopCodec");
    pigServer.getPigContext().getProperties().setProperty("io.compression.codec.lzo.class",
            "com.hadoop.compression.lzo.LzoCodec");

    System.err.println("ALL DONE SETTING UP");
    // Thread.sleep(500000);
}

From source file:com.twitter.elephanttwin.retrieval.ScanUsingIndexJob.java

License:Apache License

public static void main(String[] args) throws Exception {
    GenericOptionsParser optParser = new GenericOptionsParser(args);
    ToolRunner.run(optParser.getConfiguration(), new ScanUsingIndexJob(), optParser.getRemainingArgs());
}

From source file:com.twitter.elephanttwin.retrieval.TestIndexedReader.java

License:Apache License

public static void main(String[] args) throws Exception {
    GenericOptionsParser optParser = new GenericOptionsParser(args);
    ToolRunner.run(optParser.getConfiguration(), new TestIndexedReader(), optParser.getRemainingArgs());
}

From source file:edu.berkeley.amplab.adam.AdamMain.java

License:Apache License

/**
 * The args comming in should look like...
 *
 * [generic options] moduleName [module options]
 *
 * @param args/*from   w ww.ja  v a 2 s  .c  o m*/
 * @throws Exception
 */
public static void main(String[] args) throws Exception {
    if (args.length == 0) {
        printHelpAndExit();
    }
    // Args after removing all generic args
    String[] nonGenericArgs = new GenericOptionsParser(args).getRemainingArgs();
    if (nonGenericArgs.length == 0) {
        // We need at least an module name
        printHelpAndExit();
    }
    // The module name should be the first arg after the generic options
    String moduleName = nonGenericArgs[0];
    AdamModule module = findApp(moduleName);
    if (module == null) {
        System.err.println(String.format("Unknown module name='%s'", moduleName));
        printHelpAndExit();
    } else {
        String[] argsWithoutProgramName = new String[args.length - 1];
        boolean nameFound = false;
        int i = 0;
        for (String arg : args) {
            if (!nameFound && moduleName.equals(arg)) {
                nameFound = true;
                continue;
            }
            argsWithoutProgramName[i++] = arg;
        }
        // We run the tool after removing the program name from the arguments
        // NOTE: the actual method that is run here is AdamModule.run()
        int res = ToolRunner.run(module, argsWithoutProgramName);
        System.exit(res);
    }
}

From source file:edu.umn.cs.spatialHadoop.core.HilbertCurvePartitioner.java

License:Open Source License

public static void main(String[] args) throws IOException {
    OperationsParams params = new OperationsParams(new GenericOptionsParser(args));

    Path inPath = params.getInputPath();
    long length = inPath.getFileSystem(params).getFileStatus(inPath).getLen();
    ShapeIterRecordReader reader = new ShapeIterRecordReader(params,
            new FileSplit(inPath, 0, length, new String[0]));
    Rectangle key = reader.createKey();
    ShapeIterator shapes = reader.createValue();
    final Vector<Point> points = new Vector<Point>();
    while (reader.next(key, shapes)) {
        for (Shape s : shapes) {
            points.add(s.getMBR().getCenterPoint());
        }/*  w  ww.  jav a 2  s.  co m*/
    }
    Rectangle inMBR = (Rectangle) OperationsParams.getShape(params, "mbr");
    HilbertCurvePartitioner hcp = new HilbertCurvePartitioner();
    hcp.createFromPoints(inMBR, points.toArray(new Point[points.size()]), 10);

    System.out.println("x,y,partition");
    for (Point p : points) {
        int partition = hcp.overlapPartition(p);
        System.out.println(p.x + "," + p.y + "," + partition);
    }
}

From source file:edu.umn.cs.spatialHadoop.core.KdTreePartitioner.java

License:Open Source License

public static void main(String[] args) throws IOException {
    OperationsParams params = new OperationsParams(new GenericOptionsParser(args));

    Path inPath = params.getInputPath();
    long length = inPath.getFileSystem(params).getFileStatus(inPath).getLen();
    ShapeIterRecordReader reader = new ShapeIterRecordReader(params,
            new FileSplit(inPath, 0, length, new String[0]));
    Rectangle key = reader.createKey();
    ShapeIterator shapes = reader.createValue();
    final Vector<Point> points = new Vector<Point>();
    while (reader.next(key, shapes)) {
        for (Shape s : shapes) {
            points.add(s.getMBR().getCenterPoint());
        }/*from w ww . j  a  va 2  s.  c  om*/
    }
    Rectangle inMBR = (Rectangle) OperationsParams.getShape(params, "mbr");

    KdTreePartitioner kdp = new KdTreePartitioner();
    kdp.createFromPoints(inMBR, points.toArray(new Point[points.size()]), 7);
    System.out.println("x,y,partition");
    int[] sizes = new int[kdp.getPartitionCount() * 2];
    for (Point p : points) {
        int partition = kdp.overlapPartition(p);
        //System.out.println(p.x+","+p.y+","+partition);
        sizes[partition]++;
    }
    for (int i = 0; i < sizes.length; i++)
        System.out.print(sizes[i] + ",");
}

From source file:edu.umn.cs.spatialHadoop.core.QuadTreePartitioner.java

License:Open Source License

public static void main(String[] args) throws IOException {
    OperationsParams params = new OperationsParams(new GenericOptionsParser(args));

    Path inPath = params.getInputPath();
    long length = inPath.getFileSystem(params).getFileStatus(inPath).getLen();
    ShapeIterRecordReader reader = new ShapeIterRecordReader(params,
            new FileSplit(inPath, 0, length, new String[0]));
    Rectangle key = reader.createKey();
    ShapeIterator shapes = reader.createValue();
    final Vector<Point> points = new Vector<Point>();
    while (reader.next(key, shapes)) {
        for (Shape s : shapes) {
            points.add(s.getMBR().getCenterPoint());
        }/*  ww  w . j a  va 2s . c o m*/
    }
    Rectangle inMBR = (Rectangle) OperationsParams.getShape(params, "mbr");

    QuadTreePartitioner qtp = new QuadTreePartitioner();
    qtp.createFromPoints(inMBR, points.toArray(new Point[points.size()]), 8);
    System.out.println("x,y,partition");
    for (Point p : points) {
        int partition = qtp.overlapPartition(p);
        System.out.println(p.x + "," + p.y + "," + partition);
    }

    System.out.println("Partition count " + qtp.getPartitionCount());
    for (int i = 0; i < qtp.getPartitionCount(); i++) {
        System.out.println(qtp.getPartitionAt(i).toWKT());
    }
}

From source file:edu.umn.cs.spatialHadoop.delaunay.DelaunayTriangulation.java

License:Open Source License

/**
 * @param args/*w w w.j  a v  a2 s.c  o m*/
 * @throws IOException 
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    GenericOptionsParser parser = new GenericOptionsParser(args);
    OperationsParams params = new OperationsParams(parser);

    if (!params.checkInputOutput()) {
        printUsage();
        System.exit(1);
    }

    Path[] inFiles = params.getInputPaths();
    Path outFile = params.getOutputPath();

    long t1 = System.currentTimeMillis();
    Job job = delaunay(inFiles, outFile, params);
    long t2 = System.currentTimeMillis();
    System.out.println("Total time: " + (t2 - t1) + " millis");
    if (job != null) {
        System.out.println("Map final sites: "
                + job.getCounters().findCounter(DelaunayCounters.MAP_FINAL_SITES).getValue());
        System.out.println("Map non-final sites: "
                + job.getCounters().findCounter(DelaunayCounters.MAP_NONFINAL_SITES).getValue());
        System.out.println("Reduce final sites: "
                + job.getCounters().findCounter(DelaunayCounters.REDUCE_FINAL_SITES).getValue());
        System.out.println("Reduce non-final sites: "
                + job.getCounters().findCounter(DelaunayCounters.REDUCE_NONFINAL_SITES).getValue());
    }
}