List of usage examples for org.apache.hadoop.fs FileSystem getConf
@Override
public Configuration getConf()
From source file:org.apache.accumulo.test.CountNameNodeOpsBulkIT.java
License:Apache License
@Test public void compareOldNewBulkImportTest() throws Exception { try (AccumuloClient c = Accumulo.newClient().from(getClientProperties()).build()) { getCluster().getClusterControl().kill(ServerType.GARBAGE_COLLECTOR, "localhost"); final String tableName = getUniqueNames(1)[0]; c.tableOperations().create(tableName); // turn off compactions c.tableOperations().setProperty(tableName, Property.TABLE_MAJC_RATIO.getKey(), "2000"); c.tableOperations().setProperty(tableName, Property.TABLE_FILE_MAX.getKey(), "2000"); // splits to slow down bulk import SortedSet<Text> splits = new TreeSet<>(); for (int i = 1; i < 0xf; i++) { splits.add(new Text(Integer.toHexString(i))); }//from www .j av a 2s. com c.tableOperations().addSplits(tableName, splits); MasterMonitorInfo stats = getCluster().getMasterMonitorInfo(); assertEquals(1, stats.tServerInfo.size()); log.info("Creating lots of bulk import files"); final FileSystem fs = getCluster().getFileSystem(); final Path basePath = getCluster().getTemporaryPath(); final Path base = new Path(basePath, "testBulkLoad" + tableName); fs.delete(base, true); fs.mkdirs(base); ExecutorService es = Executors.newFixedThreadPool(5); List<Future<String>> futures = new ArrayList<>(); for (int i = 0; i < 10; i++) { final int which = i; futures.add(es.submit(() -> { Path files = new Path(base, "files" + which); fs.mkdirs(files); for (int i1 = 0; i1 < 100; i1++) { FileSKVWriter writer = FileOperations.getInstance().newWriterBuilder() .forFile(files + "/bulk_" + i1 + "." + RFile.EXTENSION, fs, fs.getConf(), CryptoServiceFactory.newDefaultInstance()) .withTableConfiguration(DefaultConfiguration.getInstance()).build(); writer.startDefaultLocalityGroup(); for (int j = 0x100; j < 0xfff; j += 3) { writer.append(new Key(Integer.toHexString(j)), new Value(new byte[0])); } writer.close(); } return files.toString(); })); } List<String> dirs = new ArrayList<>(); for (Future<String> f : futures) { dirs.add(f.get()); } log.info("Importing"); long startOps = getStat(getStats(), "FileInfoOps"); long now = System.currentTimeMillis(); List<Future<Object>> errs = new ArrayList<>(); for (String dir : dirs) { errs.add(es.submit(() -> { c.tableOperations().importDirectory(dir).to(tableName).load(); return null; })); } for (Future<Object> err : errs) { err.get(); } es.shutdown(); es.awaitTermination(2, TimeUnit.MINUTES); log.info(String.format("Completed in %.2f seconds", (System.currentTimeMillis() - now) / 1000.)); sleepUninterruptibly(30, TimeUnit.SECONDS); Map<?, ?> map = getStats(); map.forEach((k, v) -> { try { if (v != null && Double.parseDouble(v.toString()) > 0.0) log.debug("{}:{}", k, v); } catch (NumberFormatException e) { // only looking for numbers } }); long getFileInfoOpts = getStat(map, "FileInfoOps") - startOps; log.info("New bulk import used {} opts, vs old using 2060", getFileInfoOpts); // counts for old bulk import: // Expected number of FileInfoOps was between 1000 and 2100 // new bulk import is way better :) assertEquals("unexpected number of FileInfoOps", 20, getFileInfoOpts); } }
From source file:org.apache.accumulo.test.functional.BulkLoadIT.java
License:Apache License
private String writeData(String file, AccumuloConfiguration aconf, int s, int e) throws Exception { FileSystem fs = getCluster().getFileSystem(); String filename = file + RFile.EXTENSION; try (FileSKVWriter writer = FileOperations.getInstance().newWriterBuilder() .forFile(filename, fs, fs.getConf()).withTableConfiguration(aconf).build()) { writer.startDefaultLocalityGroup(); for (int i = s; i <= e; i++) { writer.append(new Key(new Text(String.format("%04d", i))), new Value(Integer.toString(i).getBytes(UTF_8))); }//from w ww . j a va 2 s. c o m } return hash(filename); }
From source file:org.apache.accumulo.test.GetFileInfoBulkIT.java
License:Apache License
@Test public void test() throws Exception { final Connector c = getConnector(); getCluster().getClusterControl().kill(ServerType.GARBAGE_COLLECTOR, "localhost"); final String tableName = getUniqueNames(1)[0]; c.tableOperations().create(tableName); // turn off compactions c.tableOperations().setProperty(tableName, Property.TABLE_MAJC_RATIO.getKey(), "2000"); c.tableOperations().setProperty(tableName, Property.TABLE_FILE_MAX.getKey(), "2000"); // splits to slow down bulk import SortedSet<Text> splits = new TreeSet<>(); for (int i = 1; i < 0xf; i++) { splits.add(new Text(Integer.toHexString(i))); }//w w w. j a va2 s.co m c.tableOperations().addSplits(tableName, splits); MasterMonitorInfo stats = getCluster().getMasterMonitorInfo(); assertEquals(1, stats.tServerInfo.size()); log.info("Creating lots of bulk import files"); final FileSystem fs = getCluster().getFileSystem(); final Path basePath = getCluster().getTemporaryPath(); CachedConfiguration.setInstance(fs.getConf()); final Path base = new Path(basePath, "testBulkLoad" + tableName); fs.delete(base, true); fs.mkdirs(base); ExecutorService es = Executors.newFixedThreadPool(5); List<Future<Pair<String, String>>> futures = new ArrayList<>(); for (int i = 0; i < 10; i++) { final int which = i; futures.add(es.submit(new Callable<Pair<String, String>>() { @Override public Pair<String, String> call() throws Exception { Path bulkFailures = new Path(base, "failures" + which); Path files = new Path(base, "files" + which); fs.mkdirs(bulkFailures); fs.mkdirs(files); for (int i = 0; i < 100; i++) { FileSKVWriter writer = FileOperations.getInstance().newWriterBuilder() .forFile(files.toString() + "/bulk_" + i + "." + RFile.EXTENSION, fs, fs.getConf()) .withTableConfiguration(AccumuloConfiguration.getDefaultConfiguration()).build(); writer.startDefaultLocalityGroup(); for (int j = 0x100; j < 0xfff; j += 3) { writer.append(new Key(Integer.toHexString(j)), new Value(new byte[0])); } writer.close(); } return new Pair<>(files.toString(), bulkFailures.toString()); } })); } List<Pair<String, String>> dirs = new ArrayList<>(); for (Future<Pair<String, String>> f : futures) { dirs.add(f.get()); } log.info("Importing"); long startOps = getOpts(); long now = System.currentTimeMillis(); List<Future<Object>> errs = new ArrayList<>(); for (Pair<String, String> entry : dirs) { final String dir = entry.getFirst(); final String err = entry.getSecond(); errs.add(es.submit(new Callable<Object>() { @Override public Object call() throws Exception { c.tableOperations().importDirectory(tableName, dir, err, false); return null; } })); } for (Future<Object> err : errs) { err.get(); } es.shutdown(); es.awaitTermination(2, TimeUnit.MINUTES); log.info(String.format("Completed in %.2f seconds", (System.currentTimeMillis() - now) / 1000.)); Uninterruptibles.sleepUninterruptibly(30, TimeUnit.SECONDS); long getFileInfoOpts = getOpts() - startOps; log.info("# opts: {}", getFileInfoOpts); assertTrue("unexpected number of getFileOps", getFileInfoOpts < 2100 && getFileInfoOpts > 1000); }
From source file:org.apache.accumulo.test.ImportExportIT.java
License:Apache License
@Test public void testExportImportThenScan() throws Exception { Connector conn = getConnector();/*from w w w . j a va 2 s. com*/ String[] tableNames = getUniqueNames(2); String srcTable = tableNames[0], destTable = tableNames[1]; conn.tableOperations().create(srcTable); BatchWriter bw = conn.createBatchWriter(srcTable, new BatchWriterConfig()); for (int row = 0; row < 1000; row++) { Mutation m = new Mutation(Integer.toString(row)); for (int col = 0; col < 100; col++) { m.put(Integer.toString(col), "", Integer.toString(col * 2)); } bw.addMutation(m); } bw.close(); conn.tableOperations().compact(srcTable, null, null, true, true); // Make a directory we can use to throw the export and import directories // Must exist on the filesystem the cluster is running. FileSystem fs = cluster.getFileSystem(); Path tmp = cluster.getTemporaryPath(); log.info("Using FileSystem: " + fs); Path baseDir = new Path(tmp, getClass().getName()); if (fs.exists(baseDir)) { log.info("{} exists on filesystem, deleting", baseDir); assertTrue("Failed to deleted " + baseDir, fs.delete(baseDir, true)); } log.info("Creating {}", baseDir); assertTrue("Failed to create " + baseDir, fs.mkdirs(baseDir)); Path exportDir = new Path(baseDir, "export"); Path importDir = new Path(baseDir, "import"); for (Path p : new Path[] { exportDir, importDir }) { assertTrue("Failed to create " + baseDir, fs.mkdirs(p)); } log.info("Exporting table to {}", exportDir); log.info("Importing table from {}", importDir); // Offline the table conn.tableOperations().offline(srcTable, true); // Then export it conn.tableOperations().exportTable(srcTable, exportDir.toString()); // Make sure the distcp.txt file that exporttable creates is available Path distcp = new Path(exportDir, "distcp.txt"); Assert.assertTrue("Distcp file doesn't exist", fs.exists(distcp)); FSDataInputStream is = fs.open(distcp); BufferedReader reader = new BufferedReader(new InputStreamReader(is)); // Copy each file that was exported to the import directory String line; while (null != (line = reader.readLine())) { Path p = new Path(line.substring(5)); Assert.assertTrue("File doesn't exist: " + p, fs.exists(p)); Path dest = new Path(importDir, p.getName()); Assert.assertFalse("Did not expect " + dest + " to exist", fs.exists(dest)); FileUtil.copy(fs, p, fs, dest, false, fs.getConf()); } reader.close(); log.info("Import dir: {}", Arrays.toString(fs.listStatus(importDir))); // Import the exported data into a new table conn.tableOperations().importTable(destTable, importDir.toString()); // Get the table ID for the table that the importtable command created final String tableId = conn.tableOperations().tableIdMap().get(destTable); Assert.assertNotNull(tableId); // Get all `file` colfams from the metadata table for the new table log.info("Imported into table with ID: {}", tableId); Scanner s = conn.createScanner(MetadataTable.NAME, Authorizations.EMPTY); s.setRange(MetadataSchema.TabletsSection.getRange(tableId)); s.fetchColumnFamily(MetadataSchema.TabletsSection.DataFileColumnFamily.NAME); MetadataSchema.TabletsSection.ServerColumnFamily.DIRECTORY_COLUMN.fetch(s); // Should find a single entry for (Entry<Key, Value> fileEntry : s) { Key k = fileEntry.getKey(); String value = fileEntry.getValue().toString(); if (k.getColumnFamily().equals(MetadataSchema.TabletsSection.DataFileColumnFamily.NAME)) { // The file should be an absolute URI (file:///...), not a relative path (/b-000.../I000001.rf) String fileUri = k.getColumnQualifier().toString(); Assert.assertFalse("Imported files should have absolute URIs, not relative: " + fileUri, looksLikeRelativePath(fileUri)); } else if (k.getColumnFamily().equals(MetadataSchema.TabletsSection.ServerColumnFamily.NAME)) { Assert.assertFalse("Server directory should have absolute URI, not relative: " + value, looksLikeRelativePath(value)); } else { Assert.fail("Got expected pair: " + k + "=" + fileEntry.getValue()); } } // Online the original table before we verify equivalence conn.tableOperations().online(srcTable, true); verifyTableEquality(conn, srcTable, destTable); }
From source file:org.apache.accumulo.test.performance.metadata.FastBulkImportIT.java
License:Apache License
@Test public void test() throws Exception { log.info("Creating table"); final String tableName = getUniqueNames(1)[0]; final Connector c = getConnector(); c.tableOperations().create(tableName); log.info("Adding splits"); SortedSet<Text> splits = new TreeSet<>(); for (int i = 1; i < 0xfff; i += 7) { splits.add(new Text(Integer.toHexString(i))); }//from w ww . j a va2s . c o m c.tableOperations().addSplits(tableName, splits); log.info("Creating lots of bulk import files"); FileSystem fs = getCluster().getFileSystem(); Path basePath = getCluster().getTemporaryPath(); CachedConfiguration.setInstance(fs.getConf()); Path base = new Path(basePath, "testBulkFail_" + tableName); fs.delete(base, true); fs.mkdirs(base); Path bulkFailures = new Path(base, "failures"); Path files = new Path(base, "files"); fs.mkdirs(bulkFailures); fs.mkdirs(files); for (int i = 0; i < 100; i++) { FileSKVWriter writer = FileOperations.getInstance().newWriterBuilder() .forFile(files.toString() + "/bulk_" + i + "." + RFile.EXTENSION, fs, fs.getConf()) .withTableConfiguration(AccumuloConfiguration.getDefaultConfiguration()).build(); writer.startDefaultLocalityGroup(); for (int j = 0x100; j < 0xfff; j += 3) { writer.append(new Key(Integer.toHexString(j)), new Value(new byte[0])); } writer.close(); } log.info("Waiting for balance"); c.instanceOperations().waitForBalance(); log.info("Bulk importing files"); long now = System.currentTimeMillis(); c.tableOperations().importDirectory(tableName, files.toString(), bulkFailures.toString(), true); double diffSeconds = (System.currentTimeMillis() - now) / 1000.; log.info(String.format("Import took %.2f seconds", diffSeconds)); assertTrue(diffSeconds < 30); }
From source file:org.apache.accumulo.test.performance.scan.CollectTabletStats.java
License:Apache License
private static int readFiles(VolumeManager fs, AccumuloConfiguration aconf, List<FileRef> files, KeyExtent ke, String[] columns) throws Exception { int count = 0; HashSet<ByteSequence> columnSet = createColumnBSS(columns); for (FileRef file : files) { FileSystem ns = fs.getVolumeByPath(file.path()).getFileSystem(); FileSKVIterator reader = FileOperations.getInstance().newReaderBuilder() .forFile(file.path().toString(), ns, ns.getConf()).withTableConfiguration(aconf).build(); Range range = new Range(ke.getPrevEndRow(), false, ke.getEndRow(), true); reader.seek(range, columnSet, columnSet.size() == 0 ? false : true); while (reader.hasTop() && !range.afterEndKey(reader.getTopKey())) { count++;//www . j av a2 s. c o m reader.next(); } reader.close(); } return count; }
From source file:org.apache.accumulo.test.performance.scan.CollectTabletStats.java
License:Apache License
private static int readFilesUsingIterStack(VolumeManager fs, ServerConfigurationFactory aconf, List<FileRef> files, Authorizations auths, KeyExtent ke, String[] columns, boolean useTableIterators) throws Exception { SortedKeyValueIterator<Key, Value> reader; List<SortedKeyValueIterator<Key, Value>> readers = new ArrayList<>(files.size()); for (FileRef file : files) { FileSystem ns = fs.getVolumeByPath(file.path()).getFileSystem(); readers.add(FileOperations.getInstance().newReaderBuilder() .forFile(file.path().toString(), ns, ns.getConf()) .withTableConfiguration(aconf.getConfiguration()).build()); }//from w ww . j ava 2s. c om List<IterInfo> emptyIterinfo = Collections.emptyList(); Map<String, Map<String, String>> emptySsio = Collections.emptyMap(); TableConfiguration tconf = aconf.getTableConfiguration(ke.getTableId()); reader = createScanIterator(ke, readers, auths, new byte[] {}, new HashSet<Column>(), emptyIterinfo, emptySsio, useTableIterators, tconf); HashSet<ByteSequence> columnSet = createColumnBSS(columns); reader.seek(new Range(ke.getPrevEndRow(), false, ke.getEndRow(), true), columnSet, columnSet.size() == 0 ? false : true); int count = 0; while (reader.hasTop()) { count++; reader.next(); } return count; }
From source file:org.apache.accumulo.test.proxy.SimpleProxyBase.java
License:Apache License
@Test public void importExportTable() throws Exception { // Write some data String expected[][] = new String[10][]; for (int i = 0; i < 10; i++) { client.updateAndFlush(creds, tableName, mutation("row" + i, "cf", "cq", "" + i)); expected[i] = new String[] { "row" + i, "cf", "cq", "" + i }; client.flushTable(creds, tableName, null, null, true); }//from w w w . j a va 2 s . c o m assertScan(expected, tableName); // export/import MiniAccumuloClusterImpl cluster = SharedMiniClusterBase.getCluster(); FileSystem fs = cluster.getFileSystem(); Path base = cluster.getTemporaryPath(); Path dir = new Path(base, "test"); assertTrue(fs.mkdirs(dir)); Path destDir = new Path(base, "test_dest"); assertTrue(fs.mkdirs(destDir)); client.offlineTable(creds, tableName, false); client.exportTable(creds, tableName, dir.toString()); // copy files to a new location FSDataInputStream is = fs.open(new Path(dir, "distcp.txt")); try (BufferedReader r = new BufferedReader(new InputStreamReader(is, UTF_8))) { while (true) { String line = r.readLine(); if (line == null) break; Path srcPath = new Path(line); FileUtil.copy(fs, srcPath, fs, destDir, false, fs.getConf()); } } client.deleteTable(creds, tableName); client.importTable(creds, "testify", destDir.toString()); assertScan(expected, "testify"); client.deleteTable(creds, "testify"); try { // ACCUMULO-1558 a second import from the same dir should fail, the first import moved the files client.importTable(creds, "testify2", destDir.toString()); fail(); } catch (Exception e) { } assertFalse(client.listTables(creds).contains("testify2")); }
From source file:org.apache.accumulo.test.proxy.SimpleProxyBase.java
License:Apache License
@Test public void bulkImport() throws Exception { MiniAccumuloClusterImpl cluster = SharedMiniClusterBase.getCluster(); FileSystem fs = cluster.getFileSystem(); Path base = cluster.getTemporaryPath(); Path dir = new Path(base, "test"); assertTrue(fs.mkdirs(dir));/* w w w.j a v a 2 s .co m*/ // Write an RFile String filename = dir + "/bulk/import/rfile.rf"; FileSKVWriter writer = FileOperations.getInstance().newWriterBuilder().forFile(filename, fs, fs.getConf()) .withTableConfiguration(DefaultConfiguration.getInstance()).build(); writer.startDefaultLocalityGroup(); writer.append(new org.apache.accumulo.core.data.Key(new Text("a"), new Text("b"), new Text("c")), new Value("value".getBytes(UTF_8))); writer.close(); // Create failures directory fs.mkdirs(new Path(dir + "/bulk/fail")); // Run the bulk import client.importDirectory(creds, tableName, dir + "/bulk/import", dir + "/bulk/fail", true); // Make sure we find the data String scanner = client.createScanner(creds, tableName, null); ScanResult more = client.nextK(scanner, 100); client.closeScanner(scanner); assertEquals(1, more.results.size()); ByteBuffer maxRow = client.getMaxRow(creds, tableName, null, null, false, null, false); assertEquals(s2bb("a"), maxRow); }
From source file:org.apache.accumulo.test.randomwalk.bulk.BulkPlusOne.java
License:Apache License
static void bulkLoadLots(Logger log, State state, Environment env, Value value) throws Exception { final Path dir = new Path("/tmp", "bulk_" + UUID.randomUUID().toString()); final Path fail = new Path(dir.toString() + "_fail"); final DefaultConfiguration defaultConfiguration = AccumuloConfiguration.getDefaultConfiguration(); final Random rand = (Random) state.get("rand"); final FileSystem fs = (FileSystem) state.get("fs"); fs.mkdirs(fail);//from www .jav a 2 s .co m final int parts = rand.nextInt(10) + 1; TreeSet<Integer> startRows = new TreeSet<>(); startRows.add(0); while (startRows.size() < parts) startRows.add(rand.nextInt(LOTS)); List<String> printRows = new ArrayList<>(startRows.size()); for (Integer row : startRows) printRows.add(String.format(FMT, row)); String markerColumnQualifier = String.format("%07d", counter.incrementAndGet()); log.debug("preparing bulk files with start rows " + printRows + " last row " + String.format(FMT, LOTS - 1) + " marker " + markerColumnQualifier); List<Integer> rows = new ArrayList<>(startRows); rows.add(LOTS); for (int i = 0; i < parts; i++) { String fileName = dir + "/" + String.format("part_%d.", i) + RFile.EXTENSION; FileSKVWriter f = FileOperations.getInstance().newWriterBuilder().forFile(fileName, fs, fs.getConf()) .withTableConfiguration(defaultConfiguration).build(); f.startDefaultLocalityGroup(); int start = rows.get(i); int end = rows.get(i + 1); for (int j = start; j < end; j++) { Text row = new Text(String.format(FMT, j)); for (Column col : COLNAMES) { f.append(new Key(row, col.getColumnFamily(), col.getColumnQualifier()), value); } f.append(new Key(row, MARKER_CF, new Text(markerColumnQualifier)), ONE); } f.close(); } env.getConnector().tableOperations().importDirectory(Setup.getTableName(), dir.toString(), fail.toString(), true); fs.delete(dir, true); FileStatus[] failures = fs.listStatus(fail); if (failures != null && failures.length > 0) { state.set("bulkImportSuccess", "false"); throw new Exception(failures.length + " failure files found importing files from " + dir); } fs.delete(fail, true); log.debug("Finished bulk import, start rows " + printRows + " last row " + String.format(FMT, LOTS - 1) + " marker " + markerColumnQualifier); }