Example usage for org.apache.hadoop.util GenericOptionsParser GenericOptionsParser

List of usage examples for org.apache.hadoop.util GenericOptionsParser GenericOptionsParser

Introduction

In this page you can find the example usage for org.apache.hadoop.util GenericOptionsParser GenericOptionsParser.

Prototype

public GenericOptionsParser(String[] args) throws IOException 

Source Link

Document

Create an options parser to parse the args.

Usage

From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemCacheCleaner.java

License:Open Source License

public static void main(String[] args) throws IOException {
    GenericOptionsParser parser = new GenericOptionsParser(args);
    args = parser.getRemainingArgs();//from w  w  w .  ja  v a 2  s  .co  m
    Configuration configuration = parser.getConfiguration();

    // TODO: Wire out constants and defaults through GoogleHadoopFileSystemBase once submitted.
    if ("FILESYSTEM_BACKED".equals(configuration.get("fs.gs.metadata.cache.type", "IN_MEMORY"))) {
        String fsStringPath = configuration.get("fs.gs.metadata.cache.directory", "");
        Preconditions.checkState(!Strings.isNullOrEmpty(fsStringPath));
        LOG.info("Performing GC on cache directory {}", fsStringPath);

        Path path = Paths.get(fsStringPath);
        if (Files.exists(path)) {
            FileSystemBackedDirectoryListCache cache = new FileSystemBackedDirectoryListCache(fsStringPath);
            cleanCache(cache);
        }
    }

    LOG.info("Done with GC.");
}

From source file:com.twitter.elephanttwin.lzo.retrieval.LZOBlockLevelIndexingJobs.java

License:Open Source License

public static void main(String[] args) throws Exception {
    GenericOptionsParser optParser = new GenericOptionsParser(args);
    ToolRunner.run(optParser.getConfiguration(), new LZOBlockLevelIndexingJobs(), optParser.getRemainingArgs());
}

From source file:com.twitter.elephanttwin.lzo.retrieval.TestLzoIndexing.java

License:Open Source License

/**
 * first create a .lzo input file which contains ExciteLog data, then index it
 * to then create a .lzo.index file finally start an indexing job on
 * uid./*from   www  . j a va2 s .  c om*/
 *
 * @throws Exception
 */
@BeforeClass
public static void setUp() throws Exception {

    conf = new Configuration();

    conf.set("io.compression.codecs", "com.hadoop.compression.lzo.LzopCodec");
    conf.setInt(LzoCodec.LZO_BUFFER_SIZE_KEY, 256);
    codec = new LzopCodec();
    codec.setConf(conf);
    FileUtil.fullyDelete(new File(TESTDIR));
    inputDir.mkdirs();

    // close any FileySystem from previous tests:
    FileSystem.get(conf).close();

    //create 3 files to test globs and test on single lzo block in a split;
    //create File 1, which has only one lzo block.
    FileSystem fs = FileSystem.get(conf);
    String baseFilePath = TESTDIR + INPUTDIR;
    LzoIndex index;
    int repeatFactor1 = 1;
    createLZOFile(baseFilePath + "11.lzo", repeatFactor1, true);
    index = LzoIndex.readIndex(fs, new Path(baseFilePath + "11.lzo"));
    if (index.getNumberOfBlocks() > 1)
        throw new RuntimeException(baseFilePath + "11.lzo has more than one " + "lzo block");

    //create File 2, which has more than 1 lzo blocks.
    int repeatFactor2 = 10;
    createLZOFile(baseFilePath + "21.lzo", repeatFactor2, true);
    index = LzoIndex.readIndex(fs, new Path(baseFilePath + "21.lzo"));
    if (index.getNumberOfBlocks() < 2)
        throw new RuntimeException(baseFilePath + "21.lzo has only one lzo block");

    //create a new lzo file 3 to test combining lzo blocks.

    int repeatFactor3 = 30;
    createLZOFile(baseFilePath + "31.lzo", repeatFactor3, true); //b64 format
    index = LzoIndex.readIndex(fs, new Path(baseFilePath + "31.lzo"));
    if (index.getNumberOfBlocks() < 2)
        throw new RuntimeException(baseFilePath + "31.lzo has only one lzo block");

    int repeatFactor4 = 1;
    createLZOFile(baseFilePath + "b11.lzo", repeatFactor4, true);
    index = LzoIndex.readIndex(fs, new Path(baseFilePath + "b11.lzo"));
    if (index.getNumberOfBlocks() > 1)
        throw new RuntimeException(baseFilePath + "b11.lzo has more than one " + "lzo block");

    //create File 2, which has more than 1 lzo blocks.
    int repeatFactor5 = 10;
    createLZOFile(baseFilePath + "b21.lzo", repeatFactor5, true);
    index = LzoIndex.readIndex(fs, new Path(baseFilePath + "b21.lzo"));
    if (index.getNumberOfBlocks() < 2)
        throw new RuntimeException(baseFilePath + "b21.lzo has only one lzo block");

    int repeatFactor6 = 30;
    createLZOFile(baseFilePath + "b31.lzo", repeatFactor6, true);
    index = LzoIndex.readIndex(fs, new Path(baseFilePath + "b31.lzo"));
    if (index.getNumberOfBlocks() < 2)
        throw new RuntimeException(baseFilePath + "b31.lzo has only one lzo block");

    //index the created lzo files without combining lzo blocks;
    String[] args = new String[] { "-jobpoolsize=1", "-index=" + TESTDIR + INDEXDIR, "-input=" + baseFilePath,
            "-inputformat=com.twitter.elephantbird.mapreduce.input." + "LzoThriftB64LineInputFormat",
            "-value_class=com.twitter.elephanttwin.gen.ExciteLog", "-columnname=uid", "-num_partitions=1",
            "-sleeptime=10", "-overwrite=false", };

    GenericOptionsParser optParser = new GenericOptionsParser(args);
    ToolRunner.run(conf, new LZOBlockLevelIndexingJobs(), optParser.getRemainingArgs());

    // the number of each key appears in all files
    repeatFactor = repeatFactor1 + repeatFactor2 + repeatFactor3 + repeatFactor4 + repeatFactor5
            + repeatFactor6;
    // number of rows has the same unique key in two files matching *1.lzo globs
    globsCnt = repeatFactor;

    pigServer = new PigServer(ExecType.LOCAL);
    // set lzo codec:
    pigServer.getPigContext().getProperties().setProperty("io.compression.codecs",
            "com.hadoop.compression.lzo.LzopCodec");
    pigServer.getPigContext().getProperties().setProperty("io.compression.codec.lzo.class",
            "com.hadoop.compression.lzo.LzoCodec");

    System.err.println("ALL DONE SETTING UP");
    // Thread.sleep(500000);
}

From source file:com.twitter.elephanttwin.retrieval.ScanUsingIndexJob.java

License:Apache License

public static void main(String[] args) throws Exception {
    GenericOptionsParser optParser = new GenericOptionsParser(args);
    ToolRunner.run(optParser.getConfiguration(), new ScanUsingIndexJob(), optParser.getRemainingArgs());
}

From source file:com.twitter.elephanttwin.retrieval.TestIndexedReader.java

License:Apache License

public static void main(String[] args) throws Exception {
    GenericOptionsParser optParser = new GenericOptionsParser(args);
    ToolRunner.run(optParser.getConfiguration(), new TestIndexedReader(), optParser.getRemainingArgs());
}

From source file:edu.berkeley.amplab.adam.AdamMain.java

License:Apache License

/**
 * The args comming in should look like...
 *
 * [generic options] moduleName [module options]
 *
 * @param args/*from   w ww.ja  v a 2 s  .c  o m*/
 * @throws Exception
 */
public static void main(String[] args) throws Exception {
    if (args.length == 0) {
        printHelpAndExit();
    }
    // Args after removing all generic args
    String[] nonGenericArgs = new GenericOptionsParser(args).getRemainingArgs();
    if (nonGenericArgs.length == 0) {
        // We need at least an module name
        printHelpAndExit();
    }
    // The module name should be the first arg after the generic options
    String moduleName = nonGenericArgs[0];
    AdamModule module = findApp(moduleName);
    if (module == null) {
        System.err.println(String.format("Unknown module name='%s'", moduleName));
        printHelpAndExit();
    } else {
        String[] argsWithoutProgramName = new String[args.length - 1];
        boolean nameFound = false;
        int i = 0;
        for (String arg : args) {
            if (!nameFound && moduleName.equals(arg)) {
                nameFound = true;
                continue;
            }
            argsWithoutProgramName[i++] = arg;
        }
        // We run the tool after removing the program name from the arguments
        // NOTE: the actual method that is run here is AdamModule.run()
        int res = ToolRunner.run(module, argsWithoutProgramName);
        System.exit(res);
    }
}

From source file:edu.umn.cs.spatialHadoop.core.HilbertCurvePartitioner.java

License:Open Source License

public static void main(String[] args) throws IOException {
    OperationsParams params = new OperationsParams(new GenericOptionsParser(args));

    Path inPath = params.getInputPath();
    long length = inPath.getFileSystem(params).getFileStatus(inPath).getLen();
    ShapeIterRecordReader reader = new ShapeIterRecordReader(params,
            new FileSplit(inPath, 0, length, new String[0]));
    Rectangle key = reader.createKey();
    ShapeIterator shapes = reader.createValue();
    final Vector<Point> points = new Vector<Point>();
    while (reader.next(key, shapes)) {
        for (Shape s : shapes) {
            points.add(s.getMBR().getCenterPoint());
        }/*  w  ww.  jav a 2  s.  co m*/
    }
    Rectangle inMBR = (Rectangle) OperationsParams.getShape(params, "mbr");
    HilbertCurvePartitioner hcp = new HilbertCurvePartitioner();
    hcp.createFromPoints(inMBR, points.toArray(new Point[points.size()]), 10);

    System.out.println("x,y,partition");
    for (Point p : points) {
        int partition = hcp.overlapPartition(p);
        System.out.println(p.x + "," + p.y + "," + partition);
    }
}

From source file:edu.umn.cs.spatialHadoop.core.KdTreePartitioner.java

License:Open Source License

public static void main(String[] args) throws IOException {
    OperationsParams params = new OperationsParams(new GenericOptionsParser(args));

    Path inPath = params.getInputPath();
    long length = inPath.getFileSystem(params).getFileStatus(inPath).getLen();
    ShapeIterRecordReader reader = new ShapeIterRecordReader(params,
            new FileSplit(inPath, 0, length, new String[0]));
    Rectangle key = reader.createKey();
    ShapeIterator shapes = reader.createValue();
    final Vector<Point> points = new Vector<Point>();
    while (reader.next(key, shapes)) {
        for (Shape s : shapes) {
            points.add(s.getMBR().getCenterPoint());
        }/*from w ww . j  a  va 2  s.  c  om*/
    }
    Rectangle inMBR = (Rectangle) OperationsParams.getShape(params, "mbr");

    KdTreePartitioner kdp = new KdTreePartitioner();
    kdp.createFromPoints(inMBR, points.toArray(new Point[points.size()]), 7);
    System.out.println("x,y,partition");
    int[] sizes = new int[kdp.getPartitionCount() * 2];
    for (Point p : points) {
        int partition = kdp.overlapPartition(p);
        //System.out.println(p.x+","+p.y+","+partition);
        sizes[partition]++;
    }
    for (int i = 0; i < sizes.length; i++)
        System.out.print(sizes[i] + ",");
}

From source file:edu.umn.cs.spatialHadoop.core.QuadTreePartitioner.java

License:Open Source License

public static void main(String[] args) throws IOException {
    OperationsParams params = new OperationsParams(new GenericOptionsParser(args));

    Path inPath = params.getInputPath();
    long length = inPath.getFileSystem(params).getFileStatus(inPath).getLen();
    ShapeIterRecordReader reader = new ShapeIterRecordReader(params,
            new FileSplit(inPath, 0, length, new String[0]));
    Rectangle key = reader.createKey();
    ShapeIterator shapes = reader.createValue();
    final Vector<Point> points = new Vector<Point>();
    while (reader.next(key, shapes)) {
        for (Shape s : shapes) {
            points.add(s.getMBR().getCenterPoint());
        }/*  ww  w . j a  va 2s . c o m*/
    }
    Rectangle inMBR = (Rectangle) OperationsParams.getShape(params, "mbr");

    QuadTreePartitioner qtp = new QuadTreePartitioner();
    qtp.createFromPoints(inMBR, points.toArray(new Point[points.size()]), 8);
    System.out.println("x,y,partition");
    for (Point p : points) {
        int partition = qtp.overlapPartition(p);
        System.out.println(p.x + "," + p.y + "," + partition);
    }

    System.out.println("Partition count " + qtp.getPartitionCount());
    for (int i = 0; i < qtp.getPartitionCount(); i++) {
        System.out.println(qtp.getPartitionAt(i).toWKT());
    }
}

From source file:edu.umn.cs.spatialHadoop.delaunay.DelaunayTriangulation.java

License:Open Source License

/**
 * @param args/*w w w.j  a v  a2 s.c  o m*/
 * @throws IOException 
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    GenericOptionsParser parser = new GenericOptionsParser(args);
    OperationsParams params = new OperationsParams(parser);

    if (!params.checkInputOutput()) {
        printUsage();
        System.exit(1);
    }

    Path[] inFiles = params.getInputPaths();
    Path outFile = params.getOutputPath();

    long t1 = System.currentTimeMillis();
    Job job = delaunay(inFiles, outFile, params);
    long t2 = System.currentTimeMillis();
    System.out.println("Total time: " + (t2 - t1) + " millis");
    if (job != null) {
        System.out.println("Map final sites: "
                + job.getCounters().findCounter(DelaunayCounters.MAP_FINAL_SITES).getValue());
        System.out.println("Map non-final sites: "
                + job.getCounters().findCounter(DelaunayCounters.MAP_NONFINAL_SITES).getValue());
        System.out.println("Reduce final sites: "
                + job.getCounters().findCounter(DelaunayCounters.REDUCE_FINAL_SITES).getValue());
        System.out.println("Reduce non-final sites: "
                + job.getCounters().findCounter(DelaunayCounters.REDUCE_NONFINAL_SITES).getValue());
    }
}