Example usage for org.apache.hadoop.fs FileSystem getConf

List of usage examples for org.apache.hadoop.fs FileSystem getConf

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getConf.

Prototype

@Override
    public Configuration getConf() 

Source Link

Usage

From source file:org.apache.accumulo.test.CountNameNodeOpsBulkIT.java

License:Apache License

@Test
public void compareOldNewBulkImportTest() throws Exception {
    try (AccumuloClient c = Accumulo.newClient().from(getClientProperties()).build()) {
        getCluster().getClusterControl().kill(ServerType.GARBAGE_COLLECTOR, "localhost");
        final String tableName = getUniqueNames(1)[0];
        c.tableOperations().create(tableName);
        // turn off compactions
        c.tableOperations().setProperty(tableName, Property.TABLE_MAJC_RATIO.getKey(), "2000");
        c.tableOperations().setProperty(tableName, Property.TABLE_FILE_MAX.getKey(), "2000");
        // splits to slow down bulk import
        SortedSet<Text> splits = new TreeSet<>();
        for (int i = 1; i < 0xf; i++) {
            splits.add(new Text(Integer.toHexString(i)));
        }//from   www .j  av a  2s. com
        c.tableOperations().addSplits(tableName, splits);

        MasterMonitorInfo stats = getCluster().getMasterMonitorInfo();
        assertEquals(1, stats.tServerInfo.size());

        log.info("Creating lots of bulk import files");
        final FileSystem fs = getCluster().getFileSystem();
        final Path basePath = getCluster().getTemporaryPath();

        final Path base = new Path(basePath, "testBulkLoad" + tableName);
        fs.delete(base, true);
        fs.mkdirs(base);

        ExecutorService es = Executors.newFixedThreadPool(5);
        List<Future<String>> futures = new ArrayList<>();
        for (int i = 0; i < 10; i++) {
            final int which = i;
            futures.add(es.submit(() -> {
                Path files = new Path(base, "files" + which);
                fs.mkdirs(files);
                for (int i1 = 0; i1 < 100; i1++) {
                    FileSKVWriter writer = FileOperations.getInstance().newWriterBuilder()
                            .forFile(files + "/bulk_" + i1 + "." + RFile.EXTENSION, fs, fs.getConf(),
                                    CryptoServiceFactory.newDefaultInstance())
                            .withTableConfiguration(DefaultConfiguration.getInstance()).build();
                    writer.startDefaultLocalityGroup();
                    for (int j = 0x100; j < 0xfff; j += 3) {
                        writer.append(new Key(Integer.toHexString(j)), new Value(new byte[0]));
                    }
                    writer.close();
                }
                return files.toString();
            }));
        }
        List<String> dirs = new ArrayList<>();
        for (Future<String> f : futures) {
            dirs.add(f.get());
        }
        log.info("Importing");
        long startOps = getStat(getStats(), "FileInfoOps");
        long now = System.currentTimeMillis();
        List<Future<Object>> errs = new ArrayList<>();
        for (String dir : dirs) {
            errs.add(es.submit(() -> {
                c.tableOperations().importDirectory(dir).to(tableName).load();
                return null;
            }));
        }
        for (Future<Object> err : errs) {
            err.get();
        }
        es.shutdown();
        es.awaitTermination(2, TimeUnit.MINUTES);
        log.info(String.format("Completed in %.2f seconds", (System.currentTimeMillis() - now) / 1000.));
        sleepUninterruptibly(30, TimeUnit.SECONDS);
        Map<?, ?> map = getStats();
        map.forEach((k, v) -> {
            try {
                if (v != null && Double.parseDouble(v.toString()) > 0.0)
                    log.debug("{}:{}", k, v);
            } catch (NumberFormatException e) {
                // only looking for numbers
            }
        });
        long getFileInfoOpts = getStat(map, "FileInfoOps") - startOps;
        log.info("New bulk import used {} opts, vs old using 2060", getFileInfoOpts);
        // counts for old bulk import:
        // Expected number of FileInfoOps was between 1000 and 2100
        // new bulk import is way better :)
        assertEquals("unexpected number of FileInfoOps", 20, getFileInfoOpts);
    }
}

From source file:org.apache.accumulo.test.functional.BulkLoadIT.java

License:Apache License

private String writeData(String file, AccumuloConfiguration aconf, int s, int e) throws Exception {
    FileSystem fs = getCluster().getFileSystem();
    String filename = file + RFile.EXTENSION;
    try (FileSKVWriter writer = FileOperations.getInstance().newWriterBuilder()
            .forFile(filename, fs, fs.getConf()).withTableConfiguration(aconf).build()) {
        writer.startDefaultLocalityGroup();
        for (int i = s; i <= e; i++) {
            writer.append(new Key(new Text(String.format("%04d", i))),
                    new Value(Integer.toString(i).getBytes(UTF_8)));
        }//from   w ww  . j  a  va  2 s. c o m
    }

    return hash(filename);
}

From source file:org.apache.accumulo.test.GetFileInfoBulkIT.java

License:Apache License

@Test
public void test() throws Exception {
    final Connector c = getConnector();
    getCluster().getClusterControl().kill(ServerType.GARBAGE_COLLECTOR, "localhost");
    final String tableName = getUniqueNames(1)[0];
    c.tableOperations().create(tableName);
    // turn off compactions
    c.tableOperations().setProperty(tableName, Property.TABLE_MAJC_RATIO.getKey(), "2000");
    c.tableOperations().setProperty(tableName, Property.TABLE_FILE_MAX.getKey(), "2000");
    // splits to slow down bulk import
    SortedSet<Text> splits = new TreeSet<>();
    for (int i = 1; i < 0xf; i++) {
        splits.add(new Text(Integer.toHexString(i)));
    }//w  w  w. j  a  va2  s.co m
    c.tableOperations().addSplits(tableName, splits);

    MasterMonitorInfo stats = getCluster().getMasterMonitorInfo();
    assertEquals(1, stats.tServerInfo.size());

    log.info("Creating lots of bulk import files");
    final FileSystem fs = getCluster().getFileSystem();
    final Path basePath = getCluster().getTemporaryPath();
    CachedConfiguration.setInstance(fs.getConf());

    final Path base = new Path(basePath, "testBulkLoad" + tableName);
    fs.delete(base, true);
    fs.mkdirs(base);

    ExecutorService es = Executors.newFixedThreadPool(5);
    List<Future<Pair<String, String>>> futures = new ArrayList<>();
    for (int i = 0; i < 10; i++) {
        final int which = i;
        futures.add(es.submit(new Callable<Pair<String, String>>() {
            @Override
            public Pair<String, String> call() throws Exception {
                Path bulkFailures = new Path(base, "failures" + which);
                Path files = new Path(base, "files" + which);
                fs.mkdirs(bulkFailures);
                fs.mkdirs(files);
                for (int i = 0; i < 100; i++) {
                    FileSKVWriter writer = FileOperations.getInstance().newWriterBuilder()
                            .forFile(files.toString() + "/bulk_" + i + "." + RFile.EXTENSION, fs, fs.getConf())
                            .withTableConfiguration(AccumuloConfiguration.getDefaultConfiguration()).build();
                    writer.startDefaultLocalityGroup();
                    for (int j = 0x100; j < 0xfff; j += 3) {
                        writer.append(new Key(Integer.toHexString(j)), new Value(new byte[0]));
                    }
                    writer.close();
                }
                return new Pair<>(files.toString(), bulkFailures.toString());
            }
        }));
    }
    List<Pair<String, String>> dirs = new ArrayList<>();
    for (Future<Pair<String, String>> f : futures) {
        dirs.add(f.get());
    }
    log.info("Importing");
    long startOps = getOpts();
    long now = System.currentTimeMillis();
    List<Future<Object>> errs = new ArrayList<>();
    for (Pair<String, String> entry : dirs) {
        final String dir = entry.getFirst();
        final String err = entry.getSecond();
        errs.add(es.submit(new Callable<Object>() {
            @Override
            public Object call() throws Exception {
                c.tableOperations().importDirectory(tableName, dir, err, false);
                return null;
            }
        }));
    }
    for (Future<Object> err : errs) {
        err.get();
    }
    es.shutdown();
    es.awaitTermination(2, TimeUnit.MINUTES);
    log.info(String.format("Completed in %.2f seconds", (System.currentTimeMillis() - now) / 1000.));
    Uninterruptibles.sleepUninterruptibly(30, TimeUnit.SECONDS);
    long getFileInfoOpts = getOpts() - startOps;
    log.info("# opts: {}", getFileInfoOpts);
    assertTrue("unexpected number of getFileOps", getFileInfoOpts < 2100 && getFileInfoOpts > 1000);
}

From source file:org.apache.accumulo.test.ImportExportIT.java

License:Apache License

@Test
public void testExportImportThenScan() throws Exception {
    Connector conn = getConnector();/*from w  w w . j a va 2  s. com*/

    String[] tableNames = getUniqueNames(2);
    String srcTable = tableNames[0], destTable = tableNames[1];
    conn.tableOperations().create(srcTable);

    BatchWriter bw = conn.createBatchWriter(srcTable, new BatchWriterConfig());
    for (int row = 0; row < 1000; row++) {
        Mutation m = new Mutation(Integer.toString(row));
        for (int col = 0; col < 100; col++) {
            m.put(Integer.toString(col), "", Integer.toString(col * 2));
        }
        bw.addMutation(m);
    }

    bw.close();

    conn.tableOperations().compact(srcTable, null, null, true, true);

    // Make a directory we can use to throw the export and import directories
    // Must exist on the filesystem the cluster is running.
    FileSystem fs = cluster.getFileSystem();
    Path tmp = cluster.getTemporaryPath();
    log.info("Using FileSystem: " + fs);
    Path baseDir = new Path(tmp, getClass().getName());
    if (fs.exists(baseDir)) {
        log.info("{} exists on filesystem, deleting", baseDir);
        assertTrue("Failed to deleted " + baseDir, fs.delete(baseDir, true));
    }
    log.info("Creating {}", baseDir);
    assertTrue("Failed to create " + baseDir, fs.mkdirs(baseDir));
    Path exportDir = new Path(baseDir, "export");
    Path importDir = new Path(baseDir, "import");
    for (Path p : new Path[] { exportDir, importDir }) {
        assertTrue("Failed to create " + baseDir, fs.mkdirs(p));
    }

    log.info("Exporting table to {}", exportDir);
    log.info("Importing table from {}", importDir);

    // Offline the table
    conn.tableOperations().offline(srcTable, true);
    // Then export it
    conn.tableOperations().exportTable(srcTable, exportDir.toString());

    // Make sure the distcp.txt file that exporttable creates is available
    Path distcp = new Path(exportDir, "distcp.txt");
    Assert.assertTrue("Distcp file doesn't exist", fs.exists(distcp));
    FSDataInputStream is = fs.open(distcp);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));

    // Copy each file that was exported to the import directory
    String line;
    while (null != (line = reader.readLine())) {
        Path p = new Path(line.substring(5));
        Assert.assertTrue("File doesn't exist: " + p, fs.exists(p));

        Path dest = new Path(importDir, p.getName());
        Assert.assertFalse("Did not expect " + dest + " to exist", fs.exists(dest));
        FileUtil.copy(fs, p, fs, dest, false, fs.getConf());
    }

    reader.close();

    log.info("Import dir: {}", Arrays.toString(fs.listStatus(importDir)));

    // Import the exported data into a new table
    conn.tableOperations().importTable(destTable, importDir.toString());

    // Get the table ID for the table that the importtable command created
    final String tableId = conn.tableOperations().tableIdMap().get(destTable);
    Assert.assertNotNull(tableId);

    // Get all `file` colfams from the metadata table for the new table
    log.info("Imported into table with ID: {}", tableId);
    Scanner s = conn.createScanner(MetadataTable.NAME, Authorizations.EMPTY);
    s.setRange(MetadataSchema.TabletsSection.getRange(tableId));
    s.fetchColumnFamily(MetadataSchema.TabletsSection.DataFileColumnFamily.NAME);
    MetadataSchema.TabletsSection.ServerColumnFamily.DIRECTORY_COLUMN.fetch(s);

    // Should find a single entry
    for (Entry<Key, Value> fileEntry : s) {
        Key k = fileEntry.getKey();
        String value = fileEntry.getValue().toString();
        if (k.getColumnFamily().equals(MetadataSchema.TabletsSection.DataFileColumnFamily.NAME)) {
            // The file should be an absolute URI (file:///...), not a relative path (/b-000.../I000001.rf)
            String fileUri = k.getColumnQualifier().toString();
            Assert.assertFalse("Imported files should have absolute URIs, not relative: " + fileUri,
                    looksLikeRelativePath(fileUri));
        } else if (k.getColumnFamily().equals(MetadataSchema.TabletsSection.ServerColumnFamily.NAME)) {
            Assert.assertFalse("Server directory should have absolute URI, not relative: " + value,
                    looksLikeRelativePath(value));
        } else {
            Assert.fail("Got expected pair: " + k + "=" + fileEntry.getValue());
        }
    }

    // Online the original table before we verify equivalence
    conn.tableOperations().online(srcTable, true);

    verifyTableEquality(conn, srcTable, destTable);
}

From source file:org.apache.accumulo.test.performance.metadata.FastBulkImportIT.java

License:Apache License

@Test
public void test() throws Exception {
    log.info("Creating table");
    final String tableName = getUniqueNames(1)[0];
    final Connector c = getConnector();
    c.tableOperations().create(tableName);
    log.info("Adding splits");
    SortedSet<Text> splits = new TreeSet<>();
    for (int i = 1; i < 0xfff; i += 7) {
        splits.add(new Text(Integer.toHexString(i)));
    }//from  w  ww .  j a  va2s  .  c o  m
    c.tableOperations().addSplits(tableName, splits);

    log.info("Creating lots of bulk import files");
    FileSystem fs = getCluster().getFileSystem();
    Path basePath = getCluster().getTemporaryPath();
    CachedConfiguration.setInstance(fs.getConf());

    Path base = new Path(basePath, "testBulkFail_" + tableName);
    fs.delete(base, true);
    fs.mkdirs(base);
    Path bulkFailures = new Path(base, "failures");
    Path files = new Path(base, "files");
    fs.mkdirs(bulkFailures);
    fs.mkdirs(files);
    for (int i = 0; i < 100; i++) {
        FileSKVWriter writer = FileOperations.getInstance().newWriterBuilder()
                .forFile(files.toString() + "/bulk_" + i + "." + RFile.EXTENSION, fs, fs.getConf())
                .withTableConfiguration(AccumuloConfiguration.getDefaultConfiguration()).build();
        writer.startDefaultLocalityGroup();
        for (int j = 0x100; j < 0xfff; j += 3) {
            writer.append(new Key(Integer.toHexString(j)), new Value(new byte[0]));
        }
        writer.close();
    }
    log.info("Waiting for balance");
    c.instanceOperations().waitForBalance();

    log.info("Bulk importing files");
    long now = System.currentTimeMillis();
    c.tableOperations().importDirectory(tableName, files.toString(), bulkFailures.toString(), true);
    double diffSeconds = (System.currentTimeMillis() - now) / 1000.;
    log.info(String.format("Import took %.2f seconds", diffSeconds));
    assertTrue(diffSeconds < 30);
}

From source file:org.apache.accumulo.test.performance.scan.CollectTabletStats.java

License:Apache License

private static int readFiles(VolumeManager fs, AccumuloConfiguration aconf, List<FileRef> files, KeyExtent ke,
        String[] columns) throws Exception {

    int count = 0;

    HashSet<ByteSequence> columnSet = createColumnBSS(columns);

    for (FileRef file : files) {
        FileSystem ns = fs.getVolumeByPath(file.path()).getFileSystem();
        FileSKVIterator reader = FileOperations.getInstance().newReaderBuilder()
                .forFile(file.path().toString(), ns, ns.getConf()).withTableConfiguration(aconf).build();
        Range range = new Range(ke.getPrevEndRow(), false, ke.getEndRow(), true);
        reader.seek(range, columnSet, columnSet.size() == 0 ? false : true);
        while (reader.hasTop() && !range.afterEndKey(reader.getTopKey())) {
            count++;//www . j  av a2  s. c o  m
            reader.next();
        }
        reader.close();
    }

    return count;
}

From source file:org.apache.accumulo.test.performance.scan.CollectTabletStats.java

License:Apache License

private static int readFilesUsingIterStack(VolumeManager fs, ServerConfigurationFactory aconf,
        List<FileRef> files, Authorizations auths, KeyExtent ke, String[] columns, boolean useTableIterators)
        throws Exception {

    SortedKeyValueIterator<Key, Value> reader;

    List<SortedKeyValueIterator<Key, Value>> readers = new ArrayList<>(files.size());

    for (FileRef file : files) {
        FileSystem ns = fs.getVolumeByPath(file.path()).getFileSystem();
        readers.add(FileOperations.getInstance().newReaderBuilder()
                .forFile(file.path().toString(), ns, ns.getConf())
                .withTableConfiguration(aconf.getConfiguration()).build());
    }//from   w  ww . j  ava  2s. c om

    List<IterInfo> emptyIterinfo = Collections.emptyList();
    Map<String, Map<String, String>> emptySsio = Collections.emptyMap();
    TableConfiguration tconf = aconf.getTableConfiguration(ke.getTableId());
    reader = createScanIterator(ke, readers, auths, new byte[] {}, new HashSet<Column>(), emptyIterinfo,
            emptySsio, useTableIterators, tconf);

    HashSet<ByteSequence> columnSet = createColumnBSS(columns);

    reader.seek(new Range(ke.getPrevEndRow(), false, ke.getEndRow(), true), columnSet,
            columnSet.size() == 0 ? false : true);

    int count = 0;

    while (reader.hasTop()) {
        count++;
        reader.next();
    }

    return count;

}

From source file:org.apache.accumulo.test.proxy.SimpleProxyBase.java

License:Apache License

@Test
public void importExportTable() throws Exception {
    // Write some data
    String expected[][] = new String[10][];
    for (int i = 0; i < 10; i++) {
        client.updateAndFlush(creds, tableName, mutation("row" + i, "cf", "cq", "" + i));
        expected[i] = new String[] { "row" + i, "cf", "cq", "" + i };
        client.flushTable(creds, tableName, null, null, true);
    }//from  w  w w .  j a va  2 s .  c  o m
    assertScan(expected, tableName);

    // export/import
    MiniAccumuloClusterImpl cluster = SharedMiniClusterBase.getCluster();
    FileSystem fs = cluster.getFileSystem();
    Path base = cluster.getTemporaryPath();
    Path dir = new Path(base, "test");
    assertTrue(fs.mkdirs(dir));
    Path destDir = new Path(base, "test_dest");
    assertTrue(fs.mkdirs(destDir));
    client.offlineTable(creds, tableName, false);
    client.exportTable(creds, tableName, dir.toString());
    // copy files to a new location
    FSDataInputStream is = fs.open(new Path(dir, "distcp.txt"));
    try (BufferedReader r = new BufferedReader(new InputStreamReader(is, UTF_8))) {
        while (true) {
            String line = r.readLine();
            if (line == null)
                break;
            Path srcPath = new Path(line);
            FileUtil.copy(fs, srcPath, fs, destDir, false, fs.getConf());
        }
    }
    client.deleteTable(creds, tableName);
    client.importTable(creds, "testify", destDir.toString());
    assertScan(expected, "testify");
    client.deleteTable(creds, "testify");

    try {
        // ACCUMULO-1558 a second import from the same dir should fail, the first import moved the files
        client.importTable(creds, "testify2", destDir.toString());
        fail();
    } catch (Exception e) {
    }

    assertFalse(client.listTables(creds).contains("testify2"));
}

From source file:org.apache.accumulo.test.proxy.SimpleProxyBase.java

License:Apache License

@Test
public void bulkImport() throws Exception {
    MiniAccumuloClusterImpl cluster = SharedMiniClusterBase.getCluster();
    FileSystem fs = cluster.getFileSystem();
    Path base = cluster.getTemporaryPath();
    Path dir = new Path(base, "test");
    assertTrue(fs.mkdirs(dir));/*  w w w.j a  v a 2 s .co  m*/

    // Write an RFile
    String filename = dir + "/bulk/import/rfile.rf";
    FileSKVWriter writer = FileOperations.getInstance().newWriterBuilder().forFile(filename, fs, fs.getConf())
            .withTableConfiguration(DefaultConfiguration.getInstance()).build();
    writer.startDefaultLocalityGroup();
    writer.append(new org.apache.accumulo.core.data.Key(new Text("a"), new Text("b"), new Text("c")),
            new Value("value".getBytes(UTF_8)));
    writer.close();

    // Create failures directory
    fs.mkdirs(new Path(dir + "/bulk/fail"));

    // Run the bulk import
    client.importDirectory(creds, tableName, dir + "/bulk/import", dir + "/bulk/fail", true);

    // Make sure we find the data
    String scanner = client.createScanner(creds, tableName, null);
    ScanResult more = client.nextK(scanner, 100);
    client.closeScanner(scanner);
    assertEquals(1, more.results.size());
    ByteBuffer maxRow = client.getMaxRow(creds, tableName, null, null, false, null, false);
    assertEquals(s2bb("a"), maxRow);
}

From source file:org.apache.accumulo.test.randomwalk.bulk.BulkPlusOne.java

License:Apache License

static void bulkLoadLots(Logger log, State state, Environment env, Value value) throws Exception {
    final Path dir = new Path("/tmp", "bulk_" + UUID.randomUUID().toString());
    final Path fail = new Path(dir.toString() + "_fail");
    final DefaultConfiguration defaultConfiguration = AccumuloConfiguration.getDefaultConfiguration();
    final Random rand = (Random) state.get("rand");
    final FileSystem fs = (FileSystem) state.get("fs");
    fs.mkdirs(fail);//from   www .jav a 2 s  .co m
    final int parts = rand.nextInt(10) + 1;

    TreeSet<Integer> startRows = new TreeSet<>();
    startRows.add(0);
    while (startRows.size() < parts)
        startRows.add(rand.nextInt(LOTS));

    List<String> printRows = new ArrayList<>(startRows.size());
    for (Integer row : startRows)
        printRows.add(String.format(FMT, row));

    String markerColumnQualifier = String.format("%07d", counter.incrementAndGet());
    log.debug("preparing bulk files with start rows " + printRows + " last row " + String.format(FMT, LOTS - 1)
            + " marker " + markerColumnQualifier);

    List<Integer> rows = new ArrayList<>(startRows);
    rows.add(LOTS);

    for (int i = 0; i < parts; i++) {
        String fileName = dir + "/" + String.format("part_%d.", i) + RFile.EXTENSION;
        FileSKVWriter f = FileOperations.getInstance().newWriterBuilder().forFile(fileName, fs, fs.getConf())
                .withTableConfiguration(defaultConfiguration).build();
        f.startDefaultLocalityGroup();
        int start = rows.get(i);
        int end = rows.get(i + 1);
        for (int j = start; j < end; j++) {
            Text row = new Text(String.format(FMT, j));
            for (Column col : COLNAMES) {
                f.append(new Key(row, col.getColumnFamily(), col.getColumnQualifier()), value);
            }
            f.append(new Key(row, MARKER_CF, new Text(markerColumnQualifier)), ONE);
        }
        f.close();
    }
    env.getConnector().tableOperations().importDirectory(Setup.getTableName(), dir.toString(), fail.toString(),
            true);
    fs.delete(dir, true);
    FileStatus[] failures = fs.listStatus(fail);
    if (failures != null && failures.length > 0) {
        state.set("bulkImportSuccess", "false");
        throw new Exception(failures.length + " failure files found importing files from " + dir);
    }
    fs.delete(fail, true);
    log.debug("Finished bulk import, start rows " + printRows + " last row " + String.format(FMT, LOTS - 1)
            + " marker " + markerColumnQualifier);
}