List of usage examples for com.google.common.base Preconditions checkArgument
public static void checkArgument(boolean expression, @Nullable Object errorMessage)
From source file:ShuffleBlamedForParser.java
public static void main(String[] args) throws Exception { File inputFile = new File(args[0]); Preconditions.checkArgument(inputFile.exists(), "Please provide valid file; currentFile = " + inputFile); Pattern BLAMED_FOR = Pattern.compile("TaskAttemptImpl:(.*) blamed for read error from (.*) at"); //HDP 2.3.4 (as the log file format changed) //BLAMED_FOR = Pattern.compile("TaskAttemptImpl\\|:(.*) blamed for read error from (.*) at"); Pattern HOST_PATTERN = Pattern.compile("task=(.*), containerHost=(.*), localityMatchType"); Map<String, String> hostMap = Maps.newHashMap(); Map<String, Integer> fetcherFailure = Maps.newHashMap(); try (BufferedReader reader = new BufferedReader(new FileReader(inputFile))) { while (reader.ready()) { String line = reader.readLine(); if (line.contains("task") && line.contains("containerHost")) { Matcher matcher = HOST_PATTERN.matcher(line); while (matcher.find()) { String attempt = matcher.group(1).trim(); String host = matcher.group(2).trim(); fetcherFailure.put(attempt, 0); //Just initializing hostMap.put(attempt, host); }//from ww w .ja va 2 s .c o m } } } Set<String> hosts = new HashSet(hostMap.values()); System.out.println("Unique hosts : " + hosts.size()); Set<String> srcMachines = new HashSet<String>(); Set<String> fetcherMachines = new HashSet<String>(); try (BufferedReader reader = new BufferedReader(new FileReader(inputFile))) { try (FileWriter writer = new FileWriter(new File(".", "output.txt"))) { while (reader.ready()) { String line = reader.readLine(); if (line.contains("blamed for read error")) { Matcher matcher = BLAMED_FOR.matcher(line); while (matcher.find()) { String srcAttempt = matcher.group(1).trim(); String fetcherAttempt = matcher.group(2).trim(); fetcherFailure.put(fetcherAttempt, fetcherFailure.get(fetcherAttempt) + 1); if (hostMap.get(srcAttempt) == null) { System.out.println("ISSUE"); } String s = "src=" + srcAttempt + ", srcMachine=" + hostMap.get(srcAttempt.trim()) + ", fetcher=" + fetcherAttempt + ", fetcherMachine=" + hostMap.get(fetcherAttempt.trim()) //+ ", size=" + hostMap.size() + ", failure=" + fetcherFailure.get(fetcherAttempt); srcMachines.add(hostMap.get(srcAttempt.trim())); fetcherMachines.add(hostMap.get(fetcherAttempt.trim())); System.out.println(s); writer.write(s + "\n"); } } } } } //Summary System.out.println(); System.out.println(); System.out.println("Source Machines being blamed for "); for (String src : srcMachines) { System.out.println("\t" + src); } System.out.println(); System.out.println(); System.out.println("Fetcher Machines"); for (String fetcher : fetcherMachines) { System.out.println("\t" + fetcher); } }
From source file:org.gradoop.examples.sna.SNABenchmark1.java
/** * Runs the example program./*from w w w . ja v a2s.co m*/ * * Need a (possibly HDFS) input directory that contains * - nodes.json * - edges.json * - graphs.json * * Needs a (possibly HDFS) output directory to write the resulting graph to. * * @param args args[0] = input dir, args[1] output dir * @throws Exception */ @SuppressWarnings({ "unchecked", "Duplicates" }) public static void main(String[] args) throws Exception { Preconditions.checkArgument(args.length == 2, "input dir and output dir required"); String inputDir = args[0]; String outputDir = args[1]; LogicalGraph epgmDatabase = readLogicalGraph(inputDir); LogicalGraph result = execute(epgmDatabase); writeLogicalGraph(result, outputDir); }
From source file:com.cloudera.impala.datagenerator.JsonToParquetConverter.java
public static void main(String[] args) throws IOException { if (!(args.length == 3 || args.length == 4)) { System.err.println("Usage: [--legacy_collection_format] <schema path> <json path> <output path>"); System.exit(1);//from w w w . ja v a2 s . c om } // "Parse" args int i = 0; boolean legacyCollectionFormat = false; if (args.length == 4) { legacyCollectionFormat = true; ++i; } File schemaPath = new File(args[i++]); File jsonPath = new File(args[i++]); Path outputPath = new Path("file://" + args[i++]); // Parse Avro schema Schema schema = new Schema.Parser().parse(schemaPath); // Parse JSON file ObjectMapper mapper = new ObjectMapper(); JsonNode root = mapper.readValue(jsonPath, JsonNode.class); Preconditions.checkArgument(root.isArray(), "Input JSON should be an array of records"); // Set configuration to use legacy two-level collection format, or modern // three-level collection format Configuration conf = new Configuration(); if (legacyCollectionFormat) { conf.set("parquet.avro.write-old-list-structure", "true"); } else { conf.set("parquet.avro.write-old-list-structure", "false"); } // Write each JSON record to the parquet file // TODO: this ctor is deprecated, figure out how to create AvroWriteSupport // object instead of using 'schema' directly AvroParquetWriter<GenericRecord> writer = new AvroParquetWriter<GenericRecord>(outputPath, schema, AvroParquetWriter.DEFAULT_COMPRESSION_CODEC_NAME, AvroParquetWriter.DEFAULT_BLOCK_SIZE, AvroParquetWriter.DEFAULT_PAGE_SIZE, true, conf); try { for (JsonNode jsonRecord : root) { System.out.println("record: " + jsonRecord); GenericRecord record = (GenericRecord) JsonUtil.convertToAvro(GenericData.get(), jsonRecord, schema); writer.write(record); } } finally { writer.close(); } }
From source file:brooklyn.demo.RiakClusterExample.java
public static void main(String[] argv) { List<String> args = Lists.newArrayList(argv); String port = CommandLineUtil.getCommandLineOption(args, "--port", "8081+"); String location = CommandLineUtil.getCommandLineOption(args, "--location", DEFAULT_LOCATION_SPEC); Preconditions.checkArgument(args.isEmpty(), "Unsupported args: " + args); BrooklynLauncher launcher = BrooklynLauncher.newInstance() .application(EntitySpec.create(StartableApplication.class, RiakClusterExample.class)) .webconsolePort(port).location(location).start(); Entities.dumpInfo(launcher.getApplications()); }
From source file:kn.uni.gis.dataimport.FormatStrangeFlickrFormat.java
License:asdf
public static void main(String[] args) throws IOException { Iterable<String> readLines = filterNulls(concatLines(Files.readLines(new File(INPUT), Charsets.UTF_8))); // BufferedReader reader = Files // .newReader(new File(INPUT), Charsets.UTF_8); // 1,20,12/*from ww w. j a va 2 s .c o m*/ Files.write(Joiner.on("\n").skipNulls().join(Iterables.transform(readLines, new Function<String, String>() { @Override public String apply(String input) { // System.out.println(input); String[] split = input.split(";"); if (equalss(split[0], "524", "567", "2284", "2720")) { return null; } assertNumbers(split); String asdf = Joiner.on("\t").join(split[0], split[19], split[20], "Z", "M", split[3], ""); System.out.println(asdf); return asdf; } private void assertNumbers(String[] split) { if (!!!split[0].equals("Field1")) { Preconditions.checkArgument(Double.valueOf(split[19].replace(',', '.')) > 13, split[19] + Arrays.toString(split)); Preconditions.checkArgument(Double.valueOf(split[20].replace(',', '.')) > 52, split[20] + Arrays.toString(split)); } } })).replaceAll(",", "."), new File(OUTPUT), Charsets.UTF_8); }
From source file:mvm.rya.indexing.external.ExternalIndexMain.java
public static void main(String[] args) throws Exception { Preconditions.checkArgument(args.length == 6, "java " + ExternalIndexMain.class.getCanonicalName() + " sparqlFile cbinstance cbzk cbuser cbpassword rdfTablePrefix."); final String sparqlFile = args[0]; instStr = args[1];/* w w w . j a v a2s .com*/ zooStr = args[2]; userStr = args[3]; passStr = args[4]; tablePrefix = args[5]; String queryString = FileUtils.readFileToString(new File(sparqlFile)); // Look for Extra Indexes Instance inst = new ZooKeeperInstance(instStr, zooStr); Connector c = inst.getConnector(userStr, passStr.getBytes()); System.out.println("Searching for Indexes"); Map<String, String> indexTables = Maps.newLinkedHashMap(); for (String table : c.tableOperations().list()) { if (table.startsWith(tablePrefix + "INDEX_")) { Scanner s = c.createScanner(table, new Authorizations()); s.setRange(Range.exact(new Text("~SPARQL"))); for (Entry<Key, Value> e : s) { indexTables.put(table, e.getValue().toString()); } } } List<ExternalTupleSet> index = Lists.newArrayList(); if (indexTables.isEmpty()) { System.out.println("No Index found"); } else { for (String table : indexTables.keySet()) { String indexSparqlString = indexTables.get(table); System.out.println("====================== INDEX FOUND ======================"); System.out.println(" table : " + table); System.out.println(" sparql : "); System.out.println(indexSparqlString); index.add(new AccumuloIndexSet(indexSparqlString, c, table)); } } // Connect to Rya Sail s = getRyaSail(); SailRepository repo = new SailRepository(s); repo.initialize(); // Perform Query CountingTupleQueryResultHandler count = new CountingTupleQueryResultHandler(); SailRepositoryConnection conn; if (index.isEmpty()) { conn = repo.getConnection(); } else { ExternalProcessor processor = new ExternalProcessor(index); Sail processingSail = new ExternalSail(s, processor); SailRepository smartSailRepo = new SailRepository(processingSail); smartSailRepo.initialize(); conn = smartSailRepo.getConnection(); } startTime = System.currentTimeMillis(); lastTime = startTime; System.out.println("Query Started"); conn.prepareTupleQuery(QueryLanguage.SPARQL, queryString).evaluate(count); System.out.println("Count of Results found : " + count.i); System.out.println("Total query time (s) : " + (System.currentTimeMillis() - startTime) / 1000.); }
From source file:org.apache.drill.version.Generator.java
public static void main(String[] args) { String toReplace = "REPLACE_WITH_DRILL_VERSION"; String template = "/**\n" + " * Licensed to the Apache Software Foundation (ASF) under one\n" + " * or more contributor license agreements. See the NOTICE file\n" + " * distributed with this work for additional information\n" + " * regarding copyright ownership. The ASF licenses this file\n" + " * to you under the Apache License, Version 2.0 (the\n" + " * \"License\"); you may not use this file except in compliance\n" + " * with the License. You may obtain a copy of the License at\n" + " *\n" + " * http://www.apache.org/licenses/LICENSE-2.0\n" + " *\n" + " * Unless required by applicable law or agreed to in writing, software\n" + " * distributed under the License is distributed on an \"AS IS\" BASIS,\n" + " * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n" + " * See the License for the specific language governing permissions and\n" + " * limitations under the License.\n" + " */\n" + "package org.apache.drill.common.util;\n" + "\n" + "/**\n" + " * Get access to the Drill Version\n" + " */\n" + "// File generated during build, DO NOT EDIT!!\n" + "public class DrillVersionInfo {\n" + "\n" + " /**\n" + " * Get the Drill version from the Manifest file\n" + " * @return the version number as x.y.z\n" + " */\n" + " public static String getVersion() {\n" + " return \"" + toReplace + "\";\n" + " }\n" + "}\n"; Preconditions.checkArgument(args.length == 2, "Two arguments expected, the first is the target java source directory for the generated file" + " and the second is the Drill version."); File srcFile = new File(args[0] + "/org/apache/drill/common/util/DrillVersionInfo.java"); srcFile = srcFile.getAbsoluteFile(); File parent = srcFile.getParentFile(); if (!parent.exists()) { if (!parent.mkdirs()) { throw new RuntimeException( "Error generating Drill version info class. Couldn't mkdirs for " + parent); }//from www .ja va 2 s .com } final FileWriter writer; try { writer = new FileWriter(srcFile); writer.write(template.replace(toReplace, args[1])); writer.close(); } catch (IOException e) { throw new RuntimeException("Error generating Drill version info class. " + "Couldn't open source file for writing: " + srcFile); } }
From source file:org.obm.push.cassandra.dao.CQLSchemaGenerator.java
public static void main(String[] args) { appendInfoToStdOutput(MAVEN_SECTION_BREAK); appendInfoToStdOutput("Generating CQL schema"); appendInfoToStdOutput(MAVEN_SECTION_BREAK); Version fromVersion = Version.of(VMArgumentsUtils.integerArgumentValue("fromVersion")); Version toVersion = Version.of(VMArgumentsUtils.integerArgumentValue("toVersion")); Preconditions.checkArgument(args.length >= NUMBER_OF_ARGUMENTS, "Not enough arguments"); String buildDirectoryArgument = args[0]; File buildDirectory = buildDirectory(buildDirectoryArgument); String resourcesDirectoryArgument = args[1]; URL resourcesURL = resourcesURL(resourcesDirectoryArgument); CQLSchemaGenerator cqlSchemaGenerator = new CQLSchemaGenerator(resourcesURL, buildDirectory, fromVersion, toVersion);//from w ww .ja v a2 s. c o m cqlSchemaGenerator.generate(); }
From source file:com.mapr.synth.Synth.java
public static void main(String[] args) throws IOException, CmdLineException, InterruptedException, ExecutionException { final Options opts = new Options(); CmdLineParser parser = new CmdLineParser(opts); try {/* w w w .ja va 2s .c o m*/ parser.parseArgument(args); } catch (CmdLineException e) { System.err.println("Usage: " + "[ -count <number>G|M|K ] " + "-schema schema-file " + "[-quote DOUBLE_QUOTE|BACK_SLASH|OPTIMISTIC] " + "[-format JSON|TSV|CSV|XML ] " + "[-threads n] " + "[-output output-directory-name] "); throw e; } Preconditions.checkArgument(opts.threads > 0 && opts.threads <= 2000, "Must have at least one thread and no more than 2000"); if (opts.threads > 1) { Preconditions.checkArgument(!"-".equals(opts.output), "If more than on thread is used, you have to use -output to set the output directory"); } File outputDir = new File(opts.output); if (!"-".equals(opts.output)) { if (!outputDir.exists()) { Preconditions.checkState(outputDir.mkdirs(), String.format("Couldn't create output directory %s", opts.output)); } Preconditions.checkArgument(outputDir.exists() && outputDir.isDirectory(), String.format("Couldn't create directory %s", opts.output)); } if (opts.schema == null) { throw new IllegalArgumentException("Must specify schema file using [-schema filename] option"); } final SchemaSampler sampler = new SchemaSampler(opts.schema); final AtomicLong rowCount = new AtomicLong(); final List<ReportingWorker> tasks = Lists.newArrayList(); int limit = (opts.count + opts.threads - 1) / opts.threads; int remaining = opts.count; for (int i = 0; i < opts.threads; i++) { final int count = Math.min(limit, remaining); remaining -= count; tasks.add(new ReportingWorker(opts, sampler, rowCount, count, i)); } final double t0 = System.nanoTime() * 1e-9; ExecutorService pool = Executors.newFixedThreadPool(opts.threads); ScheduledExecutorService blinker = Executors.newScheduledThreadPool(1); final AtomicBoolean finalRun = new AtomicBoolean(false); final PrintStream sideLog = new PrintStream(new FileOutputStream("side-log")); Runnable blink = new Runnable() { public double oldT; private long oldN; @Override public void run() { double t = System.nanoTime() * 1e-9; long n = rowCount.get(); System.err.printf("%s\t%d\t%.1f\t%d\t%.1f\t%.3f\n", finalRun.get() ? "F" : "R", opts.threads, t - t0, n, n / (t - t0), (n - oldN) / (t - oldT)); for (ReportingWorker task : tasks) { ReportingWorker.ThreadReport r = task.report(); sideLog.printf("\t%d\t%.2f\t%.2f\t%.2f\t%.1f\t%.1f\n", r.fileNumber, r.threadTime, r.userTime, r.wallTime, r.rows / r.threadTime, r.rows / r.wallTime); } oldN = n; oldT = t; } }; if (!"-".equals(opts.output)) { blinker.scheduleAtFixedRate(blink, 0, 10, TimeUnit.SECONDS); } List<Future<Integer>> results = pool.invokeAll(tasks); int total = 0; for (Future<Integer> result : results) { total += result.get(); } Preconditions.checkState(total == opts.count, String .format("Expected to generate %d lines of output, but actually generated %d", opts.count, total)); pool.shutdownNow(); blinker.shutdownNow(); finalRun.set(true); sideLog.close(); blink.run(); }
From source file:cosmos.example.BuildingPermitsExample.java
public static void main(String[] args) throws Exception { BuildingPermitsExample example = new BuildingPermitsExample(); new JCommander(example, args); File inputFile = new File(example.fileName); Preconditions.checkArgument(inputFile.exists() && inputFile.isFile() && inputFile.canRead(), "Expected " + example.fileName + " to be a readable file"); String zookeepers;//from ww w . ja v a2 s .c o m String instanceName; Connector connector; MiniAccumuloCluster mac = null; File macDir = null; // Use the MiniAccumuloCluster is requested if (example.useMiniAccumuloCluster) { macDir = Files.createTempDir(); String password = "password"; MiniAccumuloConfig config = new MiniAccumuloConfig(macDir, password); config.setNumTservers(1); mac = new MiniAccumuloCluster(config); mac.start(); zookeepers = mac.getZooKeepers(); instanceName = mac.getInstanceName(); ZooKeeperInstance instance = new ZooKeeperInstance(instanceName, zookeepers); connector = instance.getConnector("root", new PasswordToken(password)); } else { // Otherwise connect to a running instance zookeepers = example.zookeepers; instanceName = example.instanceName; ZooKeeperInstance instance = new ZooKeeperInstance(instanceName, zookeepers); connector = instance.getConnector(example.username, new PasswordToken(example.password)); } // Instantiate an instance of Cosmos Cosmos cosmos = new CosmosImpl(zookeepers); // Create a definition for the data we want to load Store id = Store.create(connector, new Authorizations(), AscendingIndexIdentitySet.create()); // Register the definition with Cosmos so it can track its progress. cosmos.register(id); // Load all of the data from our inputFile LoadBuildingPermits loader = new LoadBuildingPermits(cosmos, id, inputFile); loader.run(); // Finalize the SortableResult which will prevent future writes to the data set cosmos.finalize(id); // Flush the ingest traces to the backend so we can see the results; id.sendTraces(); // Get back the Set of Columns that we've ingested. Set<Column> schema = Sets.newHashSet(cosmos.columns(id)); log.debug("\nColumns: " + schema); Iterator<Column> iter = schema.iterator(); while (iter.hasNext()) { Column c = iter.next(); // Remove the internal ID field and columns that begin with CONTRACTOR_ if (c.equals(LoadBuildingPermits.ID) || c.name().startsWith("CONTRACTOR_")) { iter.remove(); } } Iterable<Index> indices = Iterables.transform(schema, new Function<Column, Index>() { @Override public Index apply(Column col) { return Index.define(col); } }); // Ensure that we have locality groups set as we expect log.info("Ensure locality groups are set"); id.optimizeIndices(indices); // Compact down the data for this SortableResult log.info("Issuing compaction for relevant data"); id.consolidate(); final int numTopValues = 10; // Walk through each column in the result set for (Column c : schema) { Stopwatch sw = new Stopwatch(); sw.start(); // Get the number of times we've seen each value in a given column CloseableIterable<Entry<RecordValue<?>, Long>> groupingsInColumn = cosmos.groupResults(id, c); log.info(c.name() + ":"); // Iterate over the counts, collecting the top N values in each column TreeMap<Long, RecordValue<?>> topValues = Maps.newTreeMap(); for (Entry<RecordValue<?>, Long> entry : groupingsInColumn) { if (topValues.size() == numTopValues) { Entry<Long, RecordValue<?>> least = topValues.pollFirstEntry(); if (least.getKey() < entry.getValue()) { topValues.put(entry.getValue(), entry.getKey()); } else { topValues.put(least.getKey(), least.getValue()); } } else if (topValues.size() < numTopValues) { topValues.put(entry.getValue(), entry.getKey()); } } for (Long key : topValues.descendingKeySet()) { log.info(topValues.get(key).value() + " occurred " + key + " times"); } sw.stop(); log.info("Took " + sw.toString() + " to run query.\n"); } log.info("Deleting records"); // Delete the records we've ingested if (!example.useMiniAccumuloCluster) { // Because I'm lazy and don't want to wait around to run the BatchDeleter when we're just going // to rm -rf the directory in a few secs. cosmos.delete(id); } // And shut down Cosmos cosmos.close(); log.info("Cosmos stopped"); // If we were using MAC, also stop that if (example.useMiniAccumuloCluster && null != mac) { mac.stop(); if (null != macDir) { FileUtils.deleteDirectory(macDir); } } }