List of usage examples for org.apache.hadoop.fs FileSystem mkdirs
public boolean mkdirs(Path f) throws IOException
From source file:org.apache.accumulo.server.test.randomwalk.security.TableOp.java
License:Apache License
@Override public void visit(State state, Properties props) throws Exception { boolean userExists = SecurityHelper.getTabUserExists(state); Connector conn;//w w w . j a va2 s . c o m try { conn = state.getInstance().getConnector(SecurityHelper.getTabUserName(state), SecurityHelper.getTabUserPass(state)); } catch (AccumuloSecurityException ae) { if (ae.getErrorCode().equals(SecurityErrorCode.BAD_CREDENTIALS)) { if (userExists) throw new AccumuloException("User didn't exist when they should (or worse- password mismatch)", ae); else return; } throw new AccumuloException("Unexpected exception!", ae); } String action = props.getProperty("action", "_random"); TablePermission tp; if ("_random".equalsIgnoreCase(action)) { Random r = new Random(); tp = TablePermission.values()[r.nextInt(TablePermission.values().length)]; } else { tp = TablePermission.valueOf(action); } boolean tableExists = SecurityHelper.getTableExists(state); boolean hasPerm = SecurityHelper.getTabPerm(state, SecurityHelper.getTabUserName(state), tp); String tableName = state.getString("secTableName"); switch (tp) { case READ: Authorizations auths = SecurityHelper.getUserAuths(state, SecurityHelper.getTabUserName(state)); boolean canRead = SecurityHelper.getTabPerm(state, SecurityHelper.getTabUserName(state), TablePermission.READ); try { Scanner scan = conn.createScanner(tableName, conn.securityOperations().getUserAuthorizations(SecurityHelper.getTabUserName(state))); int seen = 0; Iterator<Entry<Key, Value>> iter = scan.iterator(); while (iter.hasNext()) { Entry<Key, Value> entry = iter.next(); Key k = entry.getKey(); seen++; if (!auths.contains(k.getColumnVisibilityData())) throw new AccumuloException( "Got data I should not be capable of seeing: " + k + " table " + tableName); } if (!canRead) throw new AccumuloException( "Was able to read when I shouldn't have had the perm with connection user " + conn.whoami() + " table " + tableName); for (Entry<String, Integer> entry : SecurityHelper.getAuthsMap(state).entrySet()) { if (auths.contains(entry.getKey().getBytes())) seen = seen - entry.getValue(); } if (seen != 0) throw new AccumuloException("Got mismatched amounts of data"); } catch (TableNotFoundException tnfe) { if (tableExists) throw new AccumuloException("Accumulo and test suite out of sync: table " + tableName, tnfe); return; } catch (AccumuloSecurityException ae) { if (ae.getErrorCode().equals(SecurityErrorCode.PERMISSION_DENIED)) { if (canRead) throw new AccumuloException( "Table read permission out of sync with Accumulo: table " + tableName, ae); else return; } throw new AccumuloException("Unexpected exception!", ae); } catch (RuntimeException re) { if (re.getCause() instanceof AccumuloSecurityException && ((AccumuloSecurityException) re.getCause()).getErrorCode() .equals(SecurityErrorCode.PERMISSION_DENIED)) { if (canRead) throw new AccumuloException( "Table read permission out of sync with Accumulo: table " + tableName, re.getCause()); else return; } throw new AccumuloException("Unexpected exception!", re); } break; case WRITE: String key = SecurityHelper.getLastKey(state) + "1"; Mutation m = new Mutation(new Text(key)); for (String s : SecurityHelper.getAuthsArray()) { m.put(new Text(), new Text(), new ColumnVisibility(s), new Value("value".getBytes())); } BatchWriter writer; try { writer = conn.createBatchWriter(tableName, 9000l, 0l, 1); } catch (TableNotFoundException tnfe) { if (tableExists) throw new AccumuloException("Table didn't exist when it should have: " + tableName); return; } boolean works = true; try { writer.addMutation(m); } catch (MutationsRejectedException mre) { throw new AccumuloException("Mutation exception!", mre); } if (works) for (String s : SecurityHelper.getAuthsArray()) SecurityHelper.increaseAuthMap(state, s, 1); break; case BULK_IMPORT: key = SecurityHelper.getLastKey(state) + "1"; SortedSet<Key> keys = new TreeSet<Key>(); for (String s : SecurityHelper.getAuthsArray()) { Key k = new Key(key, "", "", s); keys.add(k); } Path dir = new Path("/tmp", "bulk_" + UUID.randomUUID().toString()); Path fail = new Path(dir.toString() + "_fail"); FileSystem fs = SecurityHelper.getFs(state); FileSKVWriter f = FileOperations.getInstance().openWriter(dir + "/securityBulk." + RFile.EXTENSION, fs, fs.getConf(), AccumuloConfiguration.getDefaultConfiguration()); f.startDefaultLocalityGroup(); fs.mkdirs(fail); for (Key k : keys) f.append(k, new Value("Value".getBytes())); f.close(); try { conn.tableOperations().importDirectory(tableName, dir.toString(), fail.toString(), true); } catch (TableNotFoundException tnfe) { if (tableExists) throw new AccumuloException("Table didn't exist when it should have: " + tableName); return; } catch (AccumuloSecurityException ae) { if (ae.getErrorCode().equals(SecurityErrorCode.PERMISSION_DENIED)) { if (hasPerm) throw new AccumuloException("Bulk Import failed when it should have worked: " + tableName); return; } throw new AccumuloException("Unexpected exception!", ae); } for (String s : SecurityHelper.getAuthsArray()) SecurityHelper.increaseAuthMap(state, s, 1); fs.delete(dir, true); fs.delete(fail, true); if (!hasPerm) throw new AccumuloException( "Bulk Import succeeded when it should have failed: " + dir + " table " + tableName); break; case ALTER_TABLE: AlterTable.renameTable(conn, state, tableName, tableName + "plus", hasPerm, tableExists); break; case GRANT: props.setProperty("task", "grant"); props.setProperty("perm", "random"); props.setProperty("source", "table"); props.setProperty("target", "system"); AlterTablePerm.alter(state, props); break; case DROP_TABLE: props.setProperty("source", "table"); DropTable.dropTable(state, props); break; } }
From source file:org.apache.accumulo.server.test.randomwalk.shard.BulkInsert.java
License:Apache License
@Override public void visit(State state, Properties props) throws Exception { String indexTableName = (String) state.get("indexTableName"); String dataTableName = (String) state.get("docTableName"); int numPartitions = (Integer) state.get("numPartitions"); Random rand = (Random) state.get("rand"); long nextDocID = (Long) state.get("nextDocID"); int minInsert = Integer.parseInt(props.getProperty("minInsert")); int maxInsert = Integer.parseInt(props.getProperty("maxInsert")); int numToInsert = rand.nextInt((maxInsert - minInsert)) + minInsert; int maxSplits = Integer.parseInt(props.getProperty("maxSplits")); Configuration conf = CachedConfiguration.getInstance(); FileSystem fs = FileSystem.get(conf); String rootDir = "/tmp/shard_bulk/" + dataTableName; fs.mkdirs(new Path(rootDir)); BatchWriter dataWriter = new SeqfileBatchWriter(conf, fs, rootDir + "/data.seq"); BatchWriter indexWriter = new SeqfileBatchWriter(conf, fs, rootDir + "/index.seq"); for (int i = 0; i < numToInsert; i++) { String docID = Insert.insertRandomDocument(nextDocID++, dataWriter, indexWriter, indexTableName, dataTableName, numPartitions, rand); log.debug("Bulk inserting document " + docID); }// w w w. j a v a 2s .com state.set("nextDocID", new Long(nextDocID)); dataWriter.close(); indexWriter.close(); sort(state, fs, dataTableName, rootDir + "/data.seq", rootDir + "/data_bulk", rootDir + "/data_work", maxSplits); sort(state, fs, indexTableName, rootDir + "/index.seq", rootDir + "/index_bulk", rootDir + "/index_work", maxSplits); bulkImport(fs, state, dataTableName, rootDir, "data"); bulkImport(fs, state, indexTableName, rootDir, "index"); fs.delete(new Path(rootDir), true); }
From source file:org.apache.accumulo.server.test.randomwalk.shard.BulkInsert.java
License:Apache License
private void bulkImport(FileSystem fs, State state, String tableName, String rootDir, String prefix) throws Exception { while (true) { String bulkDir = rootDir + "/" + prefix + "_bulk"; String failDir = rootDir + "/" + prefix + "_failure"; Path failPath = new Path(failDir); fs.delete(failPath, true);/* w w w. j a v a 2 s.com*/ fs.mkdirs(failPath); state.getConnector().tableOperations().importDirectory(tableName, bulkDir, failDir, true); FileStatus[] failures = fs.listStatus(failPath); if (failures != null && failures.length > 0) { log.warn("Failed to bulk import some files, retrying "); for (FileStatus failure : failures) { if (!failure.getPath().getName().endsWith(".seq")) fs.rename(failure.getPath(), new Path(new Path(bulkDir), failure.getPath().getName())); else log.debug("Ignoring " + failure.getPath()); } UtilWaitThread.sleep(3000); } else break; } }
From source file:org.apache.accumulo.spark.CopyPlus5K.java
License:Apache License
public static void main(String[] args) throws Exception { if ((!args[0].equals("batch") && !args[0].equals("bulk")) || args[1].isEmpty()) { System.out.println("Usage: ./run.sh [batch|bulk] /path/to/accumulo-client.properties"); System.exit(1);/*from ww w . j av a 2 s. c o m*/ } // Read client properties from file final Properties props = Accumulo.newClientProperties().from(args[1]).build(); cleanupAndCreateTables(props); SparkConf conf = new SparkConf(); conf.setAppName("CopyPlus5K"); // KryoSerializer is needed for serializing Accumulo Key when partitioning data for bulk import conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); conf.registerKryoClasses(new Class[] { Key.class, Value.class, Properties.class }); JavaSparkContext sc = new JavaSparkContext(conf); Job job = Job.getInstance(); // Read input from Accumulo AccumuloInputFormat.configure().clientProperties(props).table(inputTable).store(job); JavaPairRDD<Key, Value> data = sc.newAPIHadoopRDD(job.getConfiguration(), AccumuloInputFormat.class, Key.class, Value.class); // Add 5K to all values JavaPairRDD<Key, Value> dataPlus5K = data .mapValues(v -> new Value("" + (Integer.parseInt(v.toString()) + 5_000))); if (args[0].equals("batch")) { // Write output using batch writer dataPlus5K.foreachPartition(iter -> { // Intentionally created an Accumulo client for each partition to avoid attempting to // serialize it and send it to each remote process. try (AccumuloClient client = Accumulo.newClient().from(props).build(); BatchWriter bw = client.createBatchWriter(outputTable)) { iter.forEachRemaining(kv -> { Key key = kv._1; Value val = kv._2; Mutation m = new Mutation(key.getRow()); m.at().family(key.getColumnFamily()).qualifier(key.getColumnQualifier()) .visibility(key.getColumnVisibility()).timestamp(key.getTimestamp()).put(val); try { bw.addMutation(m); } catch (MutationsRejectedException e) { e.printStackTrace(); } }); } }); } else if (args[0].equals("bulk")) { // Write output using bulk import // Create HDFS directory for bulk import FileSystem hdfs = FileSystem.get(new Configuration()); hdfs.mkdirs(rootPath); Path outputDir = new Path(rootPath.toString() + "/output"); // Write Spark output to HDFS AccumuloFileOutputFormat.configure().outputPath(outputDir).store(job); Partitioner partitioner = new AccumuloRangePartitioner("3", "7"); JavaPairRDD<Key, Value> partData = dataPlus5K.repartitionAndSortWithinPartitions(partitioner); partData.saveAsNewAPIHadoopFile(outputDir.toString(), Key.class, Value.class, AccumuloFileOutputFormat.class); // Bulk import into Accumulo try (AccumuloClient client = Accumulo.newClient().from(props).build()) { client.tableOperations().importDirectory(outputDir.toString()).to(outputTable).load(); } } else { System.out.println("Unknown method to write output: " + args[0]); System.exit(1); } }
From source file:org.apache.accumulo.start.classloader.vfs.providers.ReadOnlyHdfsFileProviderTest.java
License:Apache License
private FileObject createTestFile(FileSystem hdfs) throws IOException { //Create the directory hdfs.mkdirs(DIR1_PATH); FileObject dir = manager.resolveFile(TEST_DIR1); Assert.assertNotNull(dir);// ww w.j a v a 2s. c om Assert.assertTrue(dir.exists()); Assert.assertTrue(dir.getType().equals(FileType.FOLDER)); //Create the file in the directory hdfs.create(FILE1_PATH).close(); FileObject f = manager.resolveFile(TEST_FILE1); Assert.assertNotNull(f); Assert.assertTrue(f.exists()); Assert.assertTrue(f.getType().equals(FileType.FILE)); return f; }
From source file:org.apache.accumulo.test.BulkImportDirectory.java
License:Apache License
public static void main(String[] args) throws IOException, AccumuloException, AccumuloSecurityException, TableNotFoundException { final FileSystem fs = FileSystem.get(CachedConfiguration.getInstance()); Opts opts = new Opts(); if (args.length == 5) { System.err.println("Deprecated syntax for BulkImportDirectory, please use the new style (see --help)"); final String user = args[0]; final byte[] pass = args[1].getBytes(UTF_8); final String tableName = args[2]; final String dir = args[3]; final String failureDir = args[4]; final Path failureDirPath = new Path(failureDir); fs.delete(failureDirPath, true); fs.mkdirs(failureDirPath); HdfsZooInstance.getInstance().getConnector(user, new PasswordToken(pass)).tableOperations() .importDirectory(tableName, dir, failureDir, false); } else {//from www. j a v a2s . c o m opts.parseArgs(BulkImportDirectory.class.getName(), args); fs.delete(new Path(opts.failures), true); fs.mkdirs(new Path(opts.failures)); opts.getConnector().tableOperations().importDirectory(opts.getTableName(), opts.source, opts.failures, false); } }
From source file:org.apache.accumulo.test.BulkImportMonitoringIT.java
License:Apache License
@Test public void test() throws Exception { getCluster().getClusterControl().start(ServerType.MONITOR); final Connector c = getConnector(); final String tableName = getUniqueNames(1)[0]; c.tableOperations().create(tableName); c.tableOperations().setProperty(tableName, Property.TABLE_MAJC_RATIO.getKey(), "1"); // splits to slow down bulk import SortedSet<Text> splits = new TreeSet<>(); for (int i = 1; i < 0xf; i++) { splits.add(new Text(Integer.toHexString(i))); }/*from w w w . ja va2 s . c om*/ c.tableOperations().addSplits(tableName, splits); MasterMonitorInfo stats = getCluster().getMasterMonitorInfo(); assertEquals(1, stats.tServerInfo.size()); assertEquals(0, stats.bulkImports.size()); assertEquals(0, stats.tServerInfo.get(0).bulkImports.size()); log.info("Creating lots of bulk import files"); final FileSystem fs = getCluster().getFileSystem(); final Path basePath = getCluster().getTemporaryPath(); CachedConfiguration.setInstance(fs.getConf()); final Path base = new Path(basePath, "testBulkLoad" + tableName); fs.delete(base, true); fs.mkdirs(base); ExecutorService es = Executors.newFixedThreadPool(5); List<Future<Pair<String, String>>> futures = new ArrayList<>(); for (int i = 0; i < 10; i++) { final int which = i; futures.add(es.submit(new Callable<Pair<String, String>>() { @Override public Pair<String, String> call() throws Exception { Path bulkFailures = new Path(base, "failures" + which); Path files = new Path(base, "files" + which); fs.mkdirs(bulkFailures); fs.mkdirs(files); for (int i = 0; i < 10; i++) { FileSKVWriter writer = FileOperations.getInstance().newWriterBuilder() .forFile(files.toString() + "/bulk_" + i + "." + RFile.EXTENSION, fs, fs.getConf()) .withTableConfiguration(AccumuloConfiguration.getDefaultConfiguration()).build(); writer.startDefaultLocalityGroup(); for (int j = 0x100; j < 0xfff; j += 3) { writer.append(new Key(Integer.toHexString(j)), new Value(new byte[0])); } writer.close(); } return new Pair<>(files.toString(), bulkFailures.toString()); } })); } List<Pair<String, String>> dirs = new ArrayList<>(); for (Future<Pair<String, String>> f : futures) { dirs.add(f.get()); } log.info("Importing"); long now = System.currentTimeMillis(); List<Future<Object>> errs = new ArrayList<>(); for (Pair<String, String> entry : dirs) { final String dir = entry.getFirst(); final String err = entry.getSecond(); errs.add(es.submit(new Callable<Object>() { @Override public Object call() throws Exception { c.tableOperations().importDirectory(tableName, dir, err, false); return null; } })); } es.shutdown(); while (!es.isTerminated() && stats.bulkImports.size() + stats.tServerInfo.get(0).bulkImports.size() == 0) { es.awaitTermination(10, TimeUnit.MILLISECONDS); stats = getCluster().getMasterMonitorInfo(); } log.info(stats.bulkImports.toString()); assertTrue(stats.bulkImports.size() > 0); // look for exception for (Future<Object> err : errs) { err.get(); } es.awaitTermination(2, TimeUnit.MINUTES); assertTrue(es.isTerminated()); log.info(String.format("Completed in %.2f seconds", (System.currentTimeMillis() - now) / 1000.)); }
From source file:org.apache.accumulo.test.BulkImportSequentialRowsIT.java
License:Apache License
@Test public void testBulkImportFailure() throws Exception { String tableName = getUniqueNames(1)[0]; TableOperations to = getConnector().tableOperations(); to.create(tableName);/*from w w w .j a v a2 s. c o m*/ FileSystem fs = getFileSystem(); Path rootPath = new Path(fs.makeQualified(getUsableDir()), getClass().getSimpleName()); log.info("Writing to {}", rootPath); if (fs.exists(rootPath)) { assertTrue(fs.delete(rootPath, true)); } assertTrue(fs.mkdirs(rootPath)); Path bulk = new Path(rootPath, "bulk"); log.info("bulk: {}", bulk); assertTrue(fs.mkdirs(bulk)); Path err = new Path(rootPath, "err"); log.info("err: {}", err); assertTrue(fs.mkdirs(bulk)); assertTrue(fs.mkdirs(err)); Path rfile = new Path(bulk, "file.rf"); log.info("Generating RFile {}", rfile.toUri().toString()); GenerateSequentialRFile.main(new String[] { "-f", rfile.toUri().toString(), "-nr", Long.toString(NR), "-nv", Long.toString(NV) }); assertTrue("Expected that " + rfile + " exists, but it does not", fs.exists(rfile)); FsShell fsShell = new FsShell(fs.getConf()); assertEquals("Failed to chmod " + rootPath, 0, fsShell.run(new String[] { "-chmod", "-R", "777", rootPath.toString() })); // Add some splits to.addSplits(tableName, getSplits()); // Then import a single rfile to all the tablets, hoping that we get a failure to import because of the balancer moving tablets around // and then we get to verify that the bug is actually fixed. to.importDirectory(tableName, bulk.toString(), err.toString(), false); // The bug is that some tablets don't get imported into. assertEquals(NR * NV, Iterables.size(getConnector().createScanner(tableName, Authorizations.EMPTY))); }
From source file:org.apache.accumulo.test.BulkImportVolumeIT.java
License:Apache License
@Test public void testBulkImportFailure() throws Exception { String tableName = getUniqueNames(1)[0]; TableOperations to = getConnector().tableOperations(); to.create(tableName);//from w w w.j a va 2 s . c om FileSystem fs = getFileSystem(); Path rootPath = new Path(cluster.getTemporaryPath(), getClass().getName()); Path bulk = new Path(rootPath, "bulk"); log.info("bulk: {}", bulk); if (fs.exists(bulk)) { fs.delete(bulk, true); } assertTrue(fs.mkdirs(bulk)); Path err = new Path(rootPath, "err"); log.info("err: {}", err); if (fs.exists(err)) { fs.delete(err, true); } assertTrue(fs.mkdirs(err)); Path bogus = new Path(bulk, "bogus.rf"); fs.create(bogus).close(); log.info("bogus: {}", bogus); assertTrue(fs.exists(bogus)); log.info("Importing {} into {} with failures directory {}", bulk, tableName, err); to.importDirectory(tableName, bulk.toString(), err.toString(), false); assertEquals(1, fs.listStatus(err).length); }
From source file:org.apache.accumulo.test.CountNameNodeOpsBulkIT.java
License:Apache License
@Test public void compareOldNewBulkImportTest() throws Exception { try (AccumuloClient c = Accumulo.newClient().from(getClientProperties()).build()) { getCluster().getClusterControl().kill(ServerType.GARBAGE_COLLECTOR, "localhost"); final String tableName = getUniqueNames(1)[0]; c.tableOperations().create(tableName); // turn off compactions c.tableOperations().setProperty(tableName, Property.TABLE_MAJC_RATIO.getKey(), "2000"); c.tableOperations().setProperty(tableName, Property.TABLE_FILE_MAX.getKey(), "2000"); // splits to slow down bulk import SortedSet<Text> splits = new TreeSet<>(); for (int i = 1; i < 0xf; i++) { splits.add(new Text(Integer.toHexString(i))); }/*w ww.j a v a 2 s .c o m*/ c.tableOperations().addSplits(tableName, splits); MasterMonitorInfo stats = getCluster().getMasterMonitorInfo(); assertEquals(1, stats.tServerInfo.size()); log.info("Creating lots of bulk import files"); final FileSystem fs = getCluster().getFileSystem(); final Path basePath = getCluster().getTemporaryPath(); final Path base = new Path(basePath, "testBulkLoad" + tableName); fs.delete(base, true); fs.mkdirs(base); ExecutorService es = Executors.newFixedThreadPool(5); List<Future<String>> futures = new ArrayList<>(); for (int i = 0; i < 10; i++) { final int which = i; futures.add(es.submit(() -> { Path files = new Path(base, "files" + which); fs.mkdirs(files); for (int i1 = 0; i1 < 100; i1++) { FileSKVWriter writer = FileOperations.getInstance().newWriterBuilder() .forFile(files + "/bulk_" + i1 + "." + RFile.EXTENSION, fs, fs.getConf(), CryptoServiceFactory.newDefaultInstance()) .withTableConfiguration(DefaultConfiguration.getInstance()).build(); writer.startDefaultLocalityGroup(); for (int j = 0x100; j < 0xfff; j += 3) { writer.append(new Key(Integer.toHexString(j)), new Value(new byte[0])); } writer.close(); } return files.toString(); })); } List<String> dirs = new ArrayList<>(); for (Future<String> f : futures) { dirs.add(f.get()); } log.info("Importing"); long startOps = getStat(getStats(), "FileInfoOps"); long now = System.currentTimeMillis(); List<Future<Object>> errs = new ArrayList<>(); for (String dir : dirs) { errs.add(es.submit(() -> { c.tableOperations().importDirectory(dir).to(tableName).load(); return null; })); } for (Future<Object> err : errs) { err.get(); } es.shutdown(); es.awaitTermination(2, TimeUnit.MINUTES); log.info(String.format("Completed in %.2f seconds", (System.currentTimeMillis() - now) / 1000.)); sleepUninterruptibly(30, TimeUnit.SECONDS); Map<?, ?> map = getStats(); map.forEach((k, v) -> { try { if (v != null && Double.parseDouble(v.toString()) > 0.0) log.debug("{}:{}", k, v); } catch (NumberFormatException e) { // only looking for numbers } }); long getFileInfoOpts = getStat(map, "FileInfoOps") - startOps; log.info("New bulk import used {} opts, vs old using 2060", getFileInfoOpts); // counts for old bulk import: // Expected number of FileInfoOps was between 1000 and 2100 // new bulk import is way better :) assertEquals("unexpected number of FileInfoOps", 20, getFileInfoOpts); } }