List of usage examples for org.apache.hadoop.util GenericOptionsParser GenericOptionsParser
public GenericOptionsParser(String[] args) throws IOException
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemCacheCleaner.java
License:Open Source License
public static void main(String[] args) throws IOException { GenericOptionsParser parser = new GenericOptionsParser(args); args = parser.getRemainingArgs();//from w w w . ja v a 2 s .co m Configuration configuration = parser.getConfiguration(); // TODO: Wire out constants and defaults through GoogleHadoopFileSystemBase once submitted. if ("FILESYSTEM_BACKED".equals(configuration.get("fs.gs.metadata.cache.type", "IN_MEMORY"))) { String fsStringPath = configuration.get("fs.gs.metadata.cache.directory", ""); Preconditions.checkState(!Strings.isNullOrEmpty(fsStringPath)); LOG.info("Performing GC on cache directory {}", fsStringPath); Path path = Paths.get(fsStringPath); if (Files.exists(path)) { FileSystemBackedDirectoryListCache cache = new FileSystemBackedDirectoryListCache(fsStringPath); cleanCache(cache); } } LOG.info("Done with GC."); }
From source file:com.twitter.elephanttwin.lzo.retrieval.LZOBlockLevelIndexingJobs.java
License:Open Source License
public static void main(String[] args) throws Exception { GenericOptionsParser optParser = new GenericOptionsParser(args); ToolRunner.run(optParser.getConfiguration(), new LZOBlockLevelIndexingJobs(), optParser.getRemainingArgs()); }
From source file:com.twitter.elephanttwin.lzo.retrieval.TestLzoIndexing.java
License:Open Source License
/** * first create a .lzo input file which contains ExciteLog data, then index it * to then create a .lzo.index file finally start an indexing job on * uid./*from www . j a va2 s . c om*/ * * @throws Exception */ @BeforeClass public static void setUp() throws Exception { conf = new Configuration(); conf.set("io.compression.codecs", "com.hadoop.compression.lzo.LzopCodec"); conf.setInt(LzoCodec.LZO_BUFFER_SIZE_KEY, 256); codec = new LzopCodec(); codec.setConf(conf); FileUtil.fullyDelete(new File(TESTDIR)); inputDir.mkdirs(); // close any FileySystem from previous tests: FileSystem.get(conf).close(); //create 3 files to test globs and test on single lzo block in a split; //create File 1, which has only one lzo block. FileSystem fs = FileSystem.get(conf); String baseFilePath = TESTDIR + INPUTDIR; LzoIndex index; int repeatFactor1 = 1; createLZOFile(baseFilePath + "11.lzo", repeatFactor1, true); index = LzoIndex.readIndex(fs, new Path(baseFilePath + "11.lzo")); if (index.getNumberOfBlocks() > 1) throw new RuntimeException(baseFilePath + "11.lzo has more than one " + "lzo block"); //create File 2, which has more than 1 lzo blocks. int repeatFactor2 = 10; createLZOFile(baseFilePath + "21.lzo", repeatFactor2, true); index = LzoIndex.readIndex(fs, new Path(baseFilePath + "21.lzo")); if (index.getNumberOfBlocks() < 2) throw new RuntimeException(baseFilePath + "21.lzo has only one lzo block"); //create a new lzo file 3 to test combining lzo blocks. int repeatFactor3 = 30; createLZOFile(baseFilePath + "31.lzo", repeatFactor3, true); //b64 format index = LzoIndex.readIndex(fs, new Path(baseFilePath + "31.lzo")); if (index.getNumberOfBlocks() < 2) throw new RuntimeException(baseFilePath + "31.lzo has only one lzo block"); int repeatFactor4 = 1; createLZOFile(baseFilePath + "b11.lzo", repeatFactor4, true); index = LzoIndex.readIndex(fs, new Path(baseFilePath + "b11.lzo")); if (index.getNumberOfBlocks() > 1) throw new RuntimeException(baseFilePath + "b11.lzo has more than one " + "lzo block"); //create File 2, which has more than 1 lzo blocks. int repeatFactor5 = 10; createLZOFile(baseFilePath + "b21.lzo", repeatFactor5, true); index = LzoIndex.readIndex(fs, new Path(baseFilePath + "b21.lzo")); if (index.getNumberOfBlocks() < 2) throw new RuntimeException(baseFilePath + "b21.lzo has only one lzo block"); int repeatFactor6 = 30; createLZOFile(baseFilePath + "b31.lzo", repeatFactor6, true); index = LzoIndex.readIndex(fs, new Path(baseFilePath + "b31.lzo")); if (index.getNumberOfBlocks() < 2) throw new RuntimeException(baseFilePath + "b31.lzo has only one lzo block"); //index the created lzo files without combining lzo blocks; String[] args = new String[] { "-jobpoolsize=1", "-index=" + TESTDIR + INDEXDIR, "-input=" + baseFilePath, "-inputformat=com.twitter.elephantbird.mapreduce.input." + "LzoThriftB64LineInputFormat", "-value_class=com.twitter.elephanttwin.gen.ExciteLog", "-columnname=uid", "-num_partitions=1", "-sleeptime=10", "-overwrite=false", }; GenericOptionsParser optParser = new GenericOptionsParser(args); ToolRunner.run(conf, new LZOBlockLevelIndexingJobs(), optParser.getRemainingArgs()); // the number of each key appears in all files repeatFactor = repeatFactor1 + repeatFactor2 + repeatFactor3 + repeatFactor4 + repeatFactor5 + repeatFactor6; // number of rows has the same unique key in two files matching *1.lzo globs globsCnt = repeatFactor; pigServer = new PigServer(ExecType.LOCAL); // set lzo codec: pigServer.getPigContext().getProperties().setProperty("io.compression.codecs", "com.hadoop.compression.lzo.LzopCodec"); pigServer.getPigContext().getProperties().setProperty("io.compression.codec.lzo.class", "com.hadoop.compression.lzo.LzoCodec"); System.err.println("ALL DONE SETTING UP"); // Thread.sleep(500000); }
From source file:com.twitter.elephanttwin.retrieval.ScanUsingIndexJob.java
License:Apache License
public static void main(String[] args) throws Exception { GenericOptionsParser optParser = new GenericOptionsParser(args); ToolRunner.run(optParser.getConfiguration(), new ScanUsingIndexJob(), optParser.getRemainingArgs()); }
From source file:com.twitter.elephanttwin.retrieval.TestIndexedReader.java
License:Apache License
public static void main(String[] args) throws Exception { GenericOptionsParser optParser = new GenericOptionsParser(args); ToolRunner.run(optParser.getConfiguration(), new TestIndexedReader(), optParser.getRemainingArgs()); }
From source file:edu.berkeley.amplab.adam.AdamMain.java
License:Apache License
/** * The args comming in should look like... * * [generic options] moduleName [module options] * * @param args/*from w ww.ja v a 2 s .c o m*/ * @throws Exception */ public static void main(String[] args) throws Exception { if (args.length == 0) { printHelpAndExit(); } // Args after removing all generic args String[] nonGenericArgs = new GenericOptionsParser(args).getRemainingArgs(); if (nonGenericArgs.length == 0) { // We need at least an module name printHelpAndExit(); } // The module name should be the first arg after the generic options String moduleName = nonGenericArgs[0]; AdamModule module = findApp(moduleName); if (module == null) { System.err.println(String.format("Unknown module name='%s'", moduleName)); printHelpAndExit(); } else { String[] argsWithoutProgramName = new String[args.length - 1]; boolean nameFound = false; int i = 0; for (String arg : args) { if (!nameFound && moduleName.equals(arg)) { nameFound = true; continue; } argsWithoutProgramName[i++] = arg; } // We run the tool after removing the program name from the arguments // NOTE: the actual method that is run here is AdamModule.run() int res = ToolRunner.run(module, argsWithoutProgramName); System.exit(res); } }
From source file:edu.umn.cs.spatialHadoop.core.HilbertCurvePartitioner.java
License:Open Source License
public static void main(String[] args) throws IOException { OperationsParams params = new OperationsParams(new GenericOptionsParser(args)); Path inPath = params.getInputPath(); long length = inPath.getFileSystem(params).getFileStatus(inPath).getLen(); ShapeIterRecordReader reader = new ShapeIterRecordReader(params, new FileSplit(inPath, 0, length, new String[0])); Rectangle key = reader.createKey(); ShapeIterator shapes = reader.createValue(); final Vector<Point> points = new Vector<Point>(); while (reader.next(key, shapes)) { for (Shape s : shapes) { points.add(s.getMBR().getCenterPoint()); }/* w ww. jav a 2 s. co m*/ } Rectangle inMBR = (Rectangle) OperationsParams.getShape(params, "mbr"); HilbertCurvePartitioner hcp = new HilbertCurvePartitioner(); hcp.createFromPoints(inMBR, points.toArray(new Point[points.size()]), 10); System.out.println("x,y,partition"); for (Point p : points) { int partition = hcp.overlapPartition(p); System.out.println(p.x + "," + p.y + "," + partition); } }
From source file:edu.umn.cs.spatialHadoop.core.KdTreePartitioner.java
License:Open Source License
public static void main(String[] args) throws IOException { OperationsParams params = new OperationsParams(new GenericOptionsParser(args)); Path inPath = params.getInputPath(); long length = inPath.getFileSystem(params).getFileStatus(inPath).getLen(); ShapeIterRecordReader reader = new ShapeIterRecordReader(params, new FileSplit(inPath, 0, length, new String[0])); Rectangle key = reader.createKey(); ShapeIterator shapes = reader.createValue(); final Vector<Point> points = new Vector<Point>(); while (reader.next(key, shapes)) { for (Shape s : shapes) { points.add(s.getMBR().getCenterPoint()); }/*from w ww . j a va 2 s. c om*/ } Rectangle inMBR = (Rectangle) OperationsParams.getShape(params, "mbr"); KdTreePartitioner kdp = new KdTreePartitioner(); kdp.createFromPoints(inMBR, points.toArray(new Point[points.size()]), 7); System.out.println("x,y,partition"); int[] sizes = new int[kdp.getPartitionCount() * 2]; for (Point p : points) { int partition = kdp.overlapPartition(p); //System.out.println(p.x+","+p.y+","+partition); sizes[partition]++; } for (int i = 0; i < sizes.length; i++) System.out.print(sizes[i] + ","); }
From source file:edu.umn.cs.spatialHadoop.core.QuadTreePartitioner.java
License:Open Source License
public static void main(String[] args) throws IOException { OperationsParams params = new OperationsParams(new GenericOptionsParser(args)); Path inPath = params.getInputPath(); long length = inPath.getFileSystem(params).getFileStatus(inPath).getLen(); ShapeIterRecordReader reader = new ShapeIterRecordReader(params, new FileSplit(inPath, 0, length, new String[0])); Rectangle key = reader.createKey(); ShapeIterator shapes = reader.createValue(); final Vector<Point> points = new Vector<Point>(); while (reader.next(key, shapes)) { for (Shape s : shapes) { points.add(s.getMBR().getCenterPoint()); }/* ww w . j a va 2s . c o m*/ } Rectangle inMBR = (Rectangle) OperationsParams.getShape(params, "mbr"); QuadTreePartitioner qtp = new QuadTreePartitioner(); qtp.createFromPoints(inMBR, points.toArray(new Point[points.size()]), 8); System.out.println("x,y,partition"); for (Point p : points) { int partition = qtp.overlapPartition(p); System.out.println(p.x + "," + p.y + "," + partition); } System.out.println("Partition count " + qtp.getPartitionCount()); for (int i = 0; i < qtp.getPartitionCount(); i++) { System.out.println(qtp.getPartitionAt(i).toWKT()); } }
From source file:edu.umn.cs.spatialHadoop.delaunay.DelaunayTriangulation.java
License:Open Source License
/** * @param args/*w w w.j a v a2 s.c o m*/ * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { GenericOptionsParser parser = new GenericOptionsParser(args); OperationsParams params = new OperationsParams(parser); if (!params.checkInputOutput()) { printUsage(); System.exit(1); } Path[] inFiles = params.getInputPaths(); Path outFile = params.getOutputPath(); long t1 = System.currentTimeMillis(); Job job = delaunay(inFiles, outFile, params); long t2 = System.currentTimeMillis(); System.out.println("Total time: " + (t2 - t1) + " millis"); if (job != null) { System.out.println("Map final sites: " + job.getCounters().findCounter(DelaunayCounters.MAP_FINAL_SITES).getValue()); System.out.println("Map non-final sites: " + job.getCounters().findCounter(DelaunayCounters.MAP_NONFINAL_SITES).getValue()); System.out.println("Reduce final sites: " + job.getCounters().findCounter(DelaunayCounters.REDUCE_FINAL_SITES).getValue()); System.out.println("Reduce non-final sites: " + job.getCounters().findCounter(DelaunayCounters.REDUCE_NONFINAL_SITES).getValue()); } }