Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.accumulo.server.util; import static com.google.common.util.concurrent.Uninterruptibles.sleepUninterruptibly; import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily.DIRECTORY_COLUMN; import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily.TIME_COLUMN; import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.TabletColumnFamily.PREV_ROW_COLUMN; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Optional; import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; import java.util.concurrent.TimeUnit; import org.apache.accumulo.core.Constants; import org.apache.accumulo.core.client.AccumuloException; import org.apache.accumulo.core.client.AccumuloSecurityException; import org.apache.accumulo.core.client.BatchWriter; import org.apache.accumulo.core.client.BatchWriterConfig; import org.apache.accumulo.core.client.Connector; import org.apache.accumulo.core.client.IsolatedScanner; import org.apache.accumulo.core.client.MutationsRejectedException; import org.apache.accumulo.core.client.Scanner; import org.apache.accumulo.core.client.TableNotFoundException; import org.apache.accumulo.core.client.impl.BatchWriterImpl; import org.apache.accumulo.core.client.impl.ClientContext; import org.apache.accumulo.core.client.impl.Credentials; import org.apache.accumulo.core.client.impl.ScannerImpl; import org.apache.accumulo.core.client.impl.Writer; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Mutation; import org.apache.accumulo.core.data.PartialKey; import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.data.impl.KeyExtent; import org.apache.accumulo.core.metadata.MetadataTable; import org.apache.accumulo.core.metadata.RootTable; import org.apache.accumulo.core.metadata.schema.DataFileValue; import org.apache.accumulo.core.metadata.schema.MetadataSchema; import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection; import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ChoppedColumnFamily; import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ClonedColumnFamily; import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.DataFileColumnFamily; import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.LogColumnFamily; import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ScanFileColumnFamily; import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily; import org.apache.accumulo.core.replication.ReplicationTable; import org.apache.accumulo.core.security.Authorizations; import org.apache.accumulo.core.tabletserver.log.LogEntry; import org.apache.accumulo.core.tabletserver.thrift.ConstraintViolationException; import org.apache.accumulo.core.util.ColumnFQ; import org.apache.accumulo.core.util.FastFormat; import org.apache.accumulo.core.util.Pair; import org.apache.accumulo.core.zookeeper.ZooUtil; import org.apache.accumulo.fate.zookeeper.IZooReaderWriter; import org.apache.accumulo.fate.zookeeper.ZooUtil.NodeExistsPolicy; import org.apache.accumulo.fate.zookeeper.ZooUtil.NodeMissingPolicy; import org.apache.accumulo.server.AccumuloServerContext; import org.apache.accumulo.server.ServerConstants; import org.apache.accumulo.server.client.HdfsZooInstance; import org.apache.accumulo.server.fs.FileRef; import org.apache.accumulo.server.fs.VolumeManager; import org.apache.accumulo.server.fs.VolumeManagerImpl; import org.apache.accumulo.server.tablets.TabletTime; import org.apache.accumulo.server.zookeeper.ZooLock; import org.apache.accumulo.server.zookeeper.ZooReaderWriter; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.zookeeper.KeeperException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.annotations.VisibleForTesting; /** * provides a reference to the metadata table for updates by tablet servers */ public class MetadataTableUtil { private static final Text EMPTY_TEXT = new Text(); private static final byte[] EMPTY_BYTES = new byte[0]; private static Map<Credentials, Writer> root_tables = new HashMap<>(); private static Map<Credentials, Writer> metadata_tables = new HashMap<>(); private static final Logger log = LoggerFactory.getLogger(MetadataTableUtil.class); private MetadataTableUtil() { } public synchronized static Writer getMetadataTable(ClientContext context) { Credentials credentials = context.getCredentials(); Writer metadataTable = metadata_tables.get(credentials); if (metadataTable == null) { metadataTable = new Writer(context, MetadataTable.ID); metadata_tables.put(credentials, metadataTable); } return metadataTable; } public synchronized static Writer getRootTable(ClientContext context) { Credentials credentials = context.getCredentials(); Writer rootTable = root_tables.get(credentials); if (rootTable == null) { rootTable = new Writer(context, RootTable.ID); root_tables.put(credentials, rootTable); } return rootTable; } public static void putLockID(ZooLock zooLock, Mutation m) { TabletsSection.ServerColumnFamily.LOCK_COLUMN.put(m, new Value(zooLock.getLockID() .serialize(ZooUtil.getRoot(HdfsZooInstance.getInstance()) + "/").getBytes(UTF_8))); } private static void update(ClientContext context, Mutation m, KeyExtent extent) { update(context, null, m, extent); } public static void update(ClientContext context, ZooLock zooLock, Mutation m, KeyExtent extent) { Writer t = extent.isMeta() ? getRootTable(context) : getMetadataTable(context); update(t, zooLock, m); } public static void update(Writer t, ZooLock zooLock, Mutation m) { if (zooLock != null) putLockID(zooLock, m); while (true) { try { t.update(m); return; } catch (AccumuloException e) { log.error("{}", e.getMessage(), e); } catch (AccumuloSecurityException e) { log.error("{}", e.getMessage(), e); } catch (ConstraintViolationException e) { log.error("{}", e.getMessage(), e); // retrying when a CVE occurs is probably futile and can cause problems, see ACCUMULO-3096 throw new RuntimeException(e); } catch (TableNotFoundException e) { log.error("{}", e.getMessage(), e); } sleepUninterruptibly(1, TimeUnit.SECONDS); } } public static void updateTabletFlushID(KeyExtent extent, long flushID, ClientContext context, ZooLock zooLock) { if (!extent.isRootTablet()) { Mutation m = new Mutation(extent.getMetadataEntry()); TabletsSection.ServerColumnFamily.FLUSH_COLUMN.put(m, new Value((flushID + "").getBytes(UTF_8))); update(context, zooLock, m, extent); } } public static void updateTabletCompactID(KeyExtent extent, long compactID, ClientContext context, ZooLock zooLock) { if (!extent.isRootTablet()) { Mutation m = new Mutation(extent.getMetadataEntry()); TabletsSection.ServerColumnFamily.COMPACT_COLUMN.put(m, new Value((compactID + "").getBytes(UTF_8))); update(context, zooLock, m, extent); } } public static void updateTabletDataFile(long tid, KeyExtent extent, Map<FileRef, DataFileValue> estSizes, String time, ClientContext context, ZooLock zooLock) { Mutation m = new Mutation(extent.getMetadataEntry()); byte[] tidBytes = Long.toString(tid).getBytes(UTF_8); for (Entry<FileRef, DataFileValue> entry : estSizes.entrySet()) { Text file = entry.getKey().meta(); m.put(DataFileColumnFamily.NAME, file, new Value(entry.getValue().encode())); m.put(TabletsSection.BulkFileColumnFamily.NAME, file, new Value(tidBytes)); } TabletsSection.ServerColumnFamily.TIME_COLUMN.put(m, new Value(time.getBytes(UTF_8))); update(context, zooLock, m, extent); } public static void updateTabletDir(KeyExtent extent, String newDir, ClientContext context, ZooLock lock) { Mutation m = new Mutation(extent.getMetadataEntry()); TabletsSection.ServerColumnFamily.DIRECTORY_COLUMN.put(m, new Value(newDir.getBytes(UTF_8))); update(context, lock, m, extent); } public static void addTablet(KeyExtent extent, String path, ClientContext context, char timeType, ZooLock lock) { Mutation m = extent.getPrevRowUpdateMutation(); TabletsSection.ServerColumnFamily.DIRECTORY_COLUMN.put(m, new Value(path.getBytes(UTF_8))); TabletsSection.ServerColumnFamily.TIME_COLUMN.put(m, new Value((timeType + "0").getBytes(UTF_8))); update(context, lock, m, extent); } public static void updateTabletVolumes(KeyExtent extent, List<LogEntry> logsToRemove, List<LogEntry> logsToAdd, List<FileRef> filesToRemove, SortedMap<FileRef, DataFileValue> filesToAdd, String newDir, ZooLock zooLock, AccumuloServerContext context) { if (extent.isRootTablet()) { if (newDir != null) throw new IllegalArgumentException("newDir not expected for " + extent); if (filesToRemove.size() != 0 || filesToAdd.size() != 0) throw new IllegalArgumentException("files not expected for " + extent); // add before removing in case of process death for (LogEntry logEntry : logsToAdd) addRootLogEntry(context, zooLock, logEntry); removeUnusedWALEntries(context, extent, logsToRemove, zooLock); } else { Mutation m = new Mutation(extent.getMetadataEntry()); for (LogEntry logEntry : logsToRemove) m.putDelete(logEntry.getColumnFamily(), logEntry.getColumnQualifier()); for (LogEntry logEntry : logsToAdd) m.put(logEntry.getColumnFamily(), logEntry.getColumnQualifier(), logEntry.getValue()); for (FileRef fileRef : filesToRemove) m.putDelete(DataFileColumnFamily.NAME, fileRef.meta()); for (Entry<FileRef, DataFileValue> entry : filesToAdd.entrySet()) m.put(DataFileColumnFamily.NAME, entry.getKey().meta(), new Value(entry.getValue().encode())); if (newDir != null) ServerColumnFamily.DIRECTORY_COLUMN.put(m, new Value(newDir.getBytes(UTF_8))); update(context, m, extent); } } private static interface ZooOperation { void run(IZooReaderWriter rw) throws KeeperException, InterruptedException, IOException; } private static void retryZooKeeperUpdate(ClientContext context, ZooLock zooLock, ZooOperation op) { while (true) { try { IZooReaderWriter zoo = ZooReaderWriter.getInstance(); if (zoo.isLockHeld(zooLock.getLockID())) { op.run(zoo); } break; } catch (Exception e) { log.error("Unexpected exception {}", e.getMessage(), e); } sleepUninterruptibly(1, TimeUnit.SECONDS); } } private static void addRootLogEntry(AccumuloServerContext context, ZooLock zooLock, final LogEntry entry) { retryZooKeeperUpdate(context, zooLock, new ZooOperation() { @Override public void run(IZooReaderWriter rw) throws KeeperException, InterruptedException, IOException { String root = getZookeeperLogLocation(); rw.putPersistentData(root + "/" + entry.getUniqueID(), entry.toBytes(), NodeExistsPolicy.OVERWRITE); } }); } public static SortedMap<FileRef, DataFileValue> getDataFileSizes(KeyExtent extent, ClientContext context) throws IOException { TreeMap<FileRef, DataFileValue> sizes = new TreeMap<>(); try (Scanner mdScanner = new ScannerImpl(context, MetadataTable.ID, Authorizations.EMPTY)) { mdScanner.fetchColumnFamily(DataFileColumnFamily.NAME); Text row = extent.getMetadataEntry(); VolumeManager fs = VolumeManagerImpl.get(); Key endKey = new Key(row, DataFileColumnFamily.NAME, new Text("")); endKey = endKey.followingKey(PartialKey.ROW_COLFAM); mdScanner.setRange(new Range(new Key(row), endKey)); for (Entry<Key, Value> entry : mdScanner) { if (!entry.getKey().getRow().equals(row)) break; DataFileValue dfv = new DataFileValue(entry.getValue().get()); sizes.put(new FileRef(fs, entry.getKey()), dfv); } return sizes; } } public static void rollBackSplit(Text metadataEntry, Text oldPrevEndRow, ClientContext context, ZooLock zooLock) { KeyExtent ke = new KeyExtent(metadataEntry, oldPrevEndRow); Mutation m = ke.getPrevRowUpdateMutation(); TabletsSection.TabletColumnFamily.SPLIT_RATIO_COLUMN.putDelete(m); TabletsSection.TabletColumnFamily.OLD_PREV_ROW_COLUMN.putDelete(m); update(context, zooLock, m, new KeyExtent(metadataEntry, (Text) null)); } public static void splitTablet(KeyExtent extent, Text oldPrevEndRow, double splitRatio, ClientContext context, ZooLock zooLock) { Mutation m = extent.getPrevRowUpdateMutation(); // TabletsSection.TabletColumnFamily.SPLIT_RATIO_COLUMN.put(m, new Value(Double.toString(splitRatio).getBytes(UTF_8))); TabletsSection.TabletColumnFamily.OLD_PREV_ROW_COLUMN.put(m, KeyExtent.encodePrevEndRow(oldPrevEndRow)); ChoppedColumnFamily.CHOPPED_COLUMN.putDelete(m); update(context, zooLock, m, extent); } public static void finishSplit(Text metadataEntry, Map<FileRef, DataFileValue> datafileSizes, List<FileRef> highDatafilesToRemove, final ClientContext context, ZooLock zooLock) { Mutation m = new Mutation(metadataEntry); TabletsSection.TabletColumnFamily.SPLIT_RATIO_COLUMN.putDelete(m); TabletsSection.TabletColumnFamily.OLD_PREV_ROW_COLUMN.putDelete(m); ChoppedColumnFamily.CHOPPED_COLUMN.putDelete(m); for (Entry<FileRef, DataFileValue> entry : datafileSizes.entrySet()) { m.put(DataFileColumnFamily.NAME, entry.getKey().meta(), new Value(entry.getValue().encode())); } for (FileRef pathToRemove : highDatafilesToRemove) { m.putDelete(DataFileColumnFamily.NAME, pathToRemove.meta()); } update(context, zooLock, m, new KeyExtent(metadataEntry, (Text) null)); } public static void finishSplit(KeyExtent extent, Map<FileRef, DataFileValue> datafileSizes, List<FileRef> highDatafilesToRemove, ClientContext context, ZooLock zooLock) { finishSplit(extent.getMetadataEntry(), datafileSizes, highDatafilesToRemove, context, zooLock); } public static void addDeleteEntries(KeyExtent extent, Set<FileRef> datafilesToDelete, ClientContext context) throws IOException { String tableId = extent.getTableId(); // TODO could use batch writer,would need to handle failure and retry like update does - ACCUMULO-1294 for (FileRef pathToRemove : datafilesToDelete) { update(context, createDeleteMutation(tableId, pathToRemove.path().toString()), extent); } } public static void addDeleteEntry(AccumuloServerContext context, String tableId, String path) throws IOException { update(context, createDeleteMutation(tableId, path), new KeyExtent(tableId, null, null)); } public static Mutation createDeleteMutation(String tableId, String pathToRemove) throws IOException { Path path = VolumeManagerImpl.get().getFullPath(tableId, pathToRemove); Mutation delFlag = new Mutation(new Text(MetadataSchema.DeletesSection.getRowPrefix() + path.toString())); delFlag.put(EMPTY_TEXT, EMPTY_TEXT, new Value(new byte[] {})); return delFlag; } public static void removeScanFiles(KeyExtent extent, Set<FileRef> scanFiles, ClientContext context, ZooLock zooLock) { Mutation m = new Mutation(extent.getMetadataEntry()); for (FileRef pathToRemove : scanFiles) m.putDelete(ScanFileColumnFamily.NAME, pathToRemove.meta()); update(context, zooLock, m, extent); } public static void splitDatafiles(String tableId, Text midRow, double splitRatio, Map<FileRef, FileUtil.FileInfo> firstAndLastRows, SortedMap<FileRef, DataFileValue> datafiles, SortedMap<FileRef, DataFileValue> lowDatafileSizes, SortedMap<FileRef, DataFileValue> highDatafileSizes, List<FileRef> highDatafilesToRemove) { for (Entry<FileRef, DataFileValue> entry : datafiles.entrySet()) { Text firstRow = null; Text lastRow = null; boolean rowsKnown = false; FileUtil.FileInfo mfi = firstAndLastRows.get(entry.getKey()); if (mfi != null) { firstRow = mfi.getFirstRow(); lastRow = mfi.getLastRow(); rowsKnown = true; } if (rowsKnown && firstRow.compareTo(midRow) > 0) { // only in high long highSize = entry.getValue().getSize(); long highEntries = entry.getValue().getNumEntries(); highDatafileSizes.put(entry.getKey(), new DataFileValue(highSize, highEntries, entry.getValue().getTime())); } else if (rowsKnown && lastRow.compareTo(midRow) <= 0) { // only in low long lowSize = entry.getValue().getSize(); long lowEntries = entry.getValue().getNumEntries(); lowDatafileSizes.put(entry.getKey(), new DataFileValue(lowSize, lowEntries, entry.getValue().getTime())); highDatafilesToRemove.add(entry.getKey()); } else { long lowSize = (long) Math.floor((entry.getValue().getSize() * splitRatio)); long lowEntries = (long) Math.floor((entry.getValue().getNumEntries() * splitRatio)); lowDatafileSizes.put(entry.getKey(), new DataFileValue(lowSize, lowEntries, entry.getValue().getTime())); long highSize = (long) Math.ceil((entry.getValue().getSize() * (1.0 - splitRatio))); long highEntries = (long) Math.ceil((entry.getValue().getNumEntries() * (1.0 - splitRatio))); highDatafileSizes.put(entry.getKey(), new DataFileValue(highSize, highEntries, entry.getValue().getTime())); } } } public static void deleteTable(String tableId, boolean insertDeletes, ClientContext context, ZooLock lock) throws AccumuloException, IOException { try (Scanner ms = new ScannerImpl(context, MetadataTable.ID, Authorizations.EMPTY); BatchWriter bw = new BatchWriterImpl(context, MetadataTable.ID, new BatchWriterConfig().setMaxMemory(1000000).setMaxLatency(120000l, TimeUnit.MILLISECONDS) .setMaxWriteThreads(2))) { // scan metadata for our table and delete everything we find Mutation m = null; ms.setRange(new KeyExtent(tableId, null, null).toMetadataRange()); // insert deletes before deleting data from metadata... this makes the code fault tolerant if (insertDeletes) { ms.fetchColumnFamily(DataFileColumnFamily.NAME); TabletsSection.ServerColumnFamily.DIRECTORY_COLUMN.fetch(ms); for (Entry<Key, Value> cell : ms) { Key key = cell.getKey(); if (key.getColumnFamily().equals(DataFileColumnFamily.NAME)) { FileRef ref = new FileRef(VolumeManagerImpl.get(), key); bw.addMutation(createDeleteMutation(tableId, ref.meta().toString())); } if (TabletsSection.ServerColumnFamily.DIRECTORY_COLUMN.hasColumns(key)) { bw.addMutation(createDeleteMutation(tableId, cell.getValue().toString())); } } bw.flush(); ms.clearColumns(); } for (Entry<Key, Value> cell : ms) { Key key = cell.getKey(); if (m == null) { m = new Mutation(key.getRow()); if (lock != null) putLockID(lock, m); } if (key.getRow().compareTo(m.getRow(), 0, m.getRow().length) != 0) { bw.addMutation(m); m = new Mutation(key.getRow()); if (lock != null) putLockID(lock, m); } m.putDelete(key.getColumnFamily(), key.getColumnQualifier()); } if (m != null) bw.addMutation(m); } } static String getZookeeperLogLocation() { return ZooUtil.getRoot(HdfsZooInstance.getInstance()) + RootTable.ZROOT_TABLET_WALOGS; } public static void setRootTabletDir(String dir) throws IOException { IZooReaderWriter zoo = ZooReaderWriter.getInstance(); String zpath = ZooUtil.getRoot(HdfsZooInstance.getInstance()) + RootTable.ZROOT_TABLET_PATH; try { zoo.putPersistentData(zpath, dir.getBytes(UTF_8), -1, NodeExistsPolicy.OVERWRITE); } catch (KeeperException e) { throw new IOException(e); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new IOException(e); } } public static String getRootTabletDir() throws IOException { IZooReaderWriter zoo = ZooReaderWriter.getInstance(); String zpath = ZooUtil.getRoot(HdfsZooInstance.getInstance()) + RootTable.ZROOT_TABLET_PATH; try { return new String(zoo.getData(zpath, null), UTF_8); } catch (KeeperException e) { throw new IOException(e); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new IOException(e); } } public static Pair<List<LogEntry>, SortedMap<FileRef, DataFileValue>> getFileAndLogEntries( ClientContext context, KeyExtent extent) throws KeeperException, InterruptedException, IOException { ArrayList<LogEntry> result = new ArrayList<>(); TreeMap<FileRef, DataFileValue> sizes = new TreeMap<>(); VolumeManager fs = VolumeManagerImpl.get(); if (extent.isRootTablet()) { getRootLogEntries(result); Path rootDir = new Path(getRootTabletDir()); FileStatus[] files = fs.listStatus(rootDir); for (FileStatus fileStatus : files) { if (fileStatus.getPath().toString().endsWith("_tmp")) { continue; } DataFileValue dfv = new DataFileValue(0, 0); sizes.put(new FileRef(fileStatus.getPath().toString(), fileStatus.getPath()), dfv); } } else { String systemTableToCheck = extent.isMeta() ? RootTable.ID : MetadataTable.ID; try (Scanner scanner = new ScannerImpl(context, systemTableToCheck, Authorizations.EMPTY)) { scanner.fetchColumnFamily(LogColumnFamily.NAME); scanner.fetchColumnFamily(DataFileColumnFamily.NAME); scanner.setRange(extent.toMetadataRange()); for (Entry<Key, Value> entry : scanner) { if (!entry.getKey().getRow().equals(extent.getMetadataEntry())) { throw new RuntimeException("Unexpected row " + entry.getKey().getRow() + " expected " + extent.getMetadataEntry()); } if (entry.getKey().getColumnFamily().equals(LogColumnFamily.NAME)) { result.add(LogEntry.fromKeyValue(entry.getKey(), entry.getValue())); } else if (entry.getKey().getColumnFamily().equals(DataFileColumnFamily.NAME)) { DataFileValue dfv = new DataFileValue(entry.getValue().get()); sizes.put(new FileRef(fs, entry.getKey()), dfv); } else { throw new RuntimeException("Unexpected col fam " + entry.getKey().getColumnFamily()); } } } } return new Pair<>(result, sizes); } public static List<LogEntry> getLogEntries(ClientContext context, KeyExtent extent) throws IOException, KeeperException, InterruptedException { log.info("Scanning logging entries for " + extent); ArrayList<LogEntry> result = new ArrayList<>(); if (extent.equals(RootTable.EXTENT)) { log.info("Getting logs for root tablet from zookeeper"); getRootLogEntries(result); } else { log.info("Scanning metadata for logs used for tablet " + extent); Scanner scanner = getTabletLogScanner(context, extent); Text pattern = extent.getMetadataEntry(); for (Entry<Key, Value> entry : scanner) { Text row = entry.getKey().getRow(); if (entry.getKey().getColumnFamily().equals(LogColumnFamily.NAME)) { if (row.equals(pattern)) { result.add(LogEntry.fromKeyValue(entry.getKey(), entry.getValue())); } } } } log.info("Returning logs " + result + " for extent " + extent); return result; } static void getRootLogEntries(final ArrayList<LogEntry> result) throws KeeperException, InterruptedException, IOException { IZooReaderWriter zoo = ZooReaderWriter.getInstance(); String root = getZookeeperLogLocation(); // there's a little race between getting the children and fetching // the data. The log can be removed in between. while (true) { result.clear(); for (String child : zoo.getChildren(root)) { try { LogEntry e = LogEntry.fromBytes(zoo.getData(root + "/" + child, null)); // upgrade from !0;!0<< -> +r<< e = new LogEntry(RootTable.EXTENT, 0, e.server, e.filename); result.add(e); } catch (KeeperException.NoNodeException ex) { continue; } } break; } } private static Scanner getTabletLogScanner(ClientContext context, KeyExtent extent) { String tableId = MetadataTable.ID; if (extent.isMeta()) tableId = RootTable.ID; Scanner scanner = new ScannerImpl(context, tableId, Authorizations.EMPTY); scanner.fetchColumnFamily(LogColumnFamily.NAME); Text start = extent.getMetadataEntry(); Key endKey = new Key(start, LogColumnFamily.NAME); endKey = endKey.followingKey(PartialKey.ROW_COLFAM); scanner.setRange(new Range(new Key(start), endKey)); return scanner; } private static class LogEntryIterator implements Iterator<LogEntry> { Iterator<LogEntry> zookeeperEntries = null; Iterator<LogEntry> rootTableEntries = null; Iterator<Entry<Key, Value>> metadataEntries = null; LogEntryIterator(ClientContext context) throws IOException, KeeperException, InterruptedException { zookeeperEntries = getLogEntries(context, RootTable.EXTENT).iterator(); rootTableEntries = getLogEntries(context, new KeyExtent(MetadataTable.ID, null, null)).iterator(); try { Scanner scanner = context.getConnector().createScanner(MetadataTable.NAME, Authorizations.EMPTY); log.info("Setting range to " + MetadataSchema.TabletsSection.getRange()); scanner.setRange(MetadataSchema.TabletsSection.getRange()); scanner.fetchColumnFamily(LogColumnFamily.NAME); metadataEntries = scanner.iterator(); } catch (Exception ex) { throw new IOException(ex); } } @Override public boolean hasNext() { return zookeeperEntries.hasNext() || rootTableEntries.hasNext() || metadataEntries.hasNext(); } @Override public LogEntry next() { if (zookeeperEntries.hasNext()) { return zookeeperEntries.next(); } if (rootTableEntries.hasNext()) { return rootTableEntries.next(); } Entry<Key, Value> entry = metadataEntries.next(); return LogEntry.fromKeyValue(entry.getKey(), entry.getValue()); } @Override public void remove() { throw new UnsupportedOperationException(); } } public static Iterator<LogEntry> getLogEntries(ClientContext context) throws IOException, KeeperException, InterruptedException { return new LogEntryIterator(context); } public static void removeUnusedWALEntries(AccumuloServerContext context, KeyExtent extent, final List<LogEntry> entries, ZooLock zooLock) { if (extent.isRootTablet()) { retryZooKeeperUpdate(context, zooLock, new ZooOperation() { @Override public void run(IZooReaderWriter rw) throws KeeperException, InterruptedException, IOException { String root = getZookeeperLogLocation(); for (LogEntry entry : entries) { String path = root + "/" + entry.getUniqueID(); log.debug("Removing " + path + " from zookeeper"); rw.recursiveDelete(path, NodeMissingPolicy.SKIP); } } }); } else { Mutation m = new Mutation(extent.getMetadataEntry()); for (LogEntry entry : entries) { m.putDelete(entry.getColumnFamily(), entry.getColumnQualifier()); } update(context, zooLock, m, extent); } } private static void getFiles(Set<String> files, Map<Key, Value> tablet, String srcTableId) { for (Entry<Key, Value> entry : tablet.entrySet()) { if (entry.getKey().getColumnFamily().equals(DataFileColumnFamily.NAME)) { String cf = entry.getKey().getColumnQualifier().toString(); if (srcTableId != null && !cf.startsWith("../") && !cf.contains(":")) { cf = "../" + srcTableId + entry.getKey().getColumnQualifier(); } files.add(cf); } } } private static Mutation createCloneMutation(String srcTableId, String tableId, Map<Key, Value> tablet) { KeyExtent ke = new KeyExtent(tablet.keySet().iterator().next().getRow(), (Text) null); Mutation m = new Mutation(KeyExtent.getMetadataEntry(tableId, ke.getEndRow())); for (Entry<Key, Value> entry : tablet.entrySet()) { if (entry.getKey().getColumnFamily().equals(DataFileColumnFamily.NAME)) { String cf = entry.getKey().getColumnQualifier().toString(); if (!cf.startsWith("../") && !cf.contains(":")) cf = "../" + srcTableId + entry.getKey().getColumnQualifier(); m.put(entry.getKey().getColumnFamily(), new Text(cf), entry.getValue()); } else if (entry.getKey().getColumnFamily().equals(TabletsSection.CurrentLocationColumnFamily.NAME)) { m.put(TabletsSection.LastLocationColumnFamily.NAME, entry.getKey().getColumnQualifier(), entry.getValue()); } else if (entry.getKey().getColumnFamily().equals(TabletsSection.LastLocationColumnFamily.NAME)) { // skip } else { m.put(entry.getKey().getColumnFamily(), entry.getKey().getColumnQualifier(), entry.getValue()); } } return m; } private static Scanner createCloneScanner(String tableName, String tableId, Connector conn) throws TableNotFoundException { if (tableId.equals(MetadataTable.ID)) tableName = RootTable.NAME; Scanner mscanner = new IsolatedScanner(conn.createScanner(tableName, Authorizations.EMPTY)); mscanner.setRange(new KeyExtent(tableId, null, null).toMetadataRange()); mscanner.fetchColumnFamily(DataFileColumnFamily.NAME); mscanner.fetchColumnFamily(TabletsSection.CurrentLocationColumnFamily.NAME); mscanner.fetchColumnFamily(TabletsSection.LastLocationColumnFamily.NAME); mscanner.fetchColumnFamily(ClonedColumnFamily.NAME); TabletsSection.TabletColumnFamily.PREV_ROW_COLUMN.fetch(mscanner); TabletsSection.ServerColumnFamily.TIME_COLUMN.fetch(mscanner); return mscanner; } @VisibleForTesting public static void initializeClone(String tableName, String srcTableId, String tableId, Connector conn, BatchWriter bw) throws TableNotFoundException, MutationsRejectedException { TabletIterator ti; if (srcTableId.equals(MetadataTable.ID)) ti = new TabletIterator(createCloneScanner(tableName, srcTableId, conn), new Range(), true, true); else ti = new TabletIterator(createCloneScanner(tableName, srcTableId, conn), new KeyExtent(srcTableId, null, null).toMetadataRange(), true, true); if (!ti.hasNext()) throw new RuntimeException(" table deleted during clone? srcTableId = " + srcTableId); while (ti.hasNext()) bw.addMutation(createCloneMutation(srcTableId, tableId, ti.next())); bw.flush(); } private static int compareEndRows(Text endRow1, Text endRow2) { return new KeyExtent("0", endRow1, null).compareTo(new KeyExtent("0", endRow2, null)); } @VisibleForTesting public static int checkClone(String tableName, String srcTableId, String tableId, Connector conn, BatchWriter bw) throws TableNotFoundException, MutationsRejectedException { TabletIterator srcIter = new TabletIterator(createCloneScanner(tableName, srcTableId, conn), new KeyExtent(srcTableId, null, null).toMetadataRange(), true, true); TabletIterator cloneIter = new TabletIterator(createCloneScanner(tableName, tableId, conn), new KeyExtent(tableId, null, null).toMetadataRange(), true, true); if (!cloneIter.hasNext() || !srcIter.hasNext()) throw new RuntimeException( " table deleted during clone? srcTableId = " + srcTableId + " tableId=" + tableId); int rewrites = 0; while (cloneIter.hasNext()) { Map<Key, Value> cloneTablet = cloneIter.next(); Text cloneEndRow = new KeyExtent(cloneTablet.keySet().iterator().next().getRow(), (Text) null) .getEndRow(); HashSet<String> cloneFiles = new HashSet<>(); boolean cloneSuccessful = false; for (Entry<Key, Value> entry : cloneTablet.entrySet()) { if (entry.getKey().getColumnFamily().equals(ClonedColumnFamily.NAME)) { cloneSuccessful = true; break; } } if (!cloneSuccessful) getFiles(cloneFiles, cloneTablet, null); List<Map<Key, Value>> srcTablets = new ArrayList<>(); Map<Key, Value> srcTablet = srcIter.next(); srcTablets.add(srcTablet); Text srcEndRow = new KeyExtent(srcTablet.keySet().iterator().next().getRow(), (Text) null).getEndRow(); int cmp = compareEndRows(cloneEndRow, srcEndRow); if (cmp < 0) throw new TabletIterator.TabletDeletedException( "Tablets deleted from src during clone : " + cloneEndRow + " " + srcEndRow); HashSet<String> srcFiles = new HashSet<>(); if (!cloneSuccessful) getFiles(srcFiles, srcTablet, srcTableId); while (cmp > 0) { srcTablet = srcIter.next(); srcTablets.add(srcTablet); srcEndRow = new KeyExtent(srcTablet.keySet().iterator().next().getRow(), (Text) null).getEndRow(); cmp = compareEndRows(cloneEndRow, srcEndRow); if (cmp < 0) throw new TabletIterator.TabletDeletedException( "Tablets deleted from src during clone : " + cloneEndRow + " " + srcEndRow); if (!cloneSuccessful) getFiles(srcFiles, srcTablet, srcTableId); } if (cloneSuccessful) continue; if (!srcFiles.containsAll(cloneFiles)) { // delete existing cloned tablet entry Mutation m = new Mutation(cloneTablet.keySet().iterator().next().getRow()); for (Entry<Key, Value> entry : cloneTablet.entrySet()) { Key k = entry.getKey(); m.putDelete(k.getColumnFamily(), k.getColumnQualifier(), k.getTimestamp()); } bw.addMutation(m); for (Map<Key, Value> st : srcTablets) bw.addMutation(createCloneMutation(srcTableId, tableId, st)); rewrites++; } else { // write out marker that this tablet was successfully cloned Mutation m = new Mutation(cloneTablet.keySet().iterator().next().getRow()); m.put(ClonedColumnFamily.NAME, new Text(""), new Value("OK".getBytes(UTF_8))); bw.addMutation(m); } } bw.flush(); return rewrites; } public static void cloneTable(ClientContext context, String srcTableId, String tableId, VolumeManager volumeManager) throws Exception { Connector conn = context.getConnector(); try (BatchWriter bw = conn.createBatchWriter(MetadataTable.NAME, new BatchWriterConfig())) { while (true) { try { initializeClone(MetadataTable.NAME, srcTableId, tableId, conn, bw); // the following loop looks changes in the file that occurred during the copy.. if files were dereferenced then they could have been GCed while (true) { int rewrites = checkClone(MetadataTable.NAME, srcTableId, tableId, conn, bw); if (rewrites == 0) break; } bw.flush(); break; } catch (TabletIterator.TabletDeletedException tde) { // tablets were merged in the src table bw.flush(); // delete what we have cloned and try again deleteTable(tableId, false, context, null); log.debug("Tablets merged in table " + srcTableId + " while attempting to clone, trying again"); sleepUninterruptibly(100, TimeUnit.MILLISECONDS); } } // delete the clone markers and create directory entries Scanner mscanner = conn.createScanner(MetadataTable.NAME, Authorizations.EMPTY); mscanner.setRange(new KeyExtent(tableId, null, null).toMetadataRange()); mscanner.fetchColumnFamily(ClonedColumnFamily.NAME); int dirCount = 0; for (Entry<Key, Value> entry : mscanner) { Key k = entry.getKey(); Mutation m = new Mutation(k.getRow()); m.putDelete(k.getColumnFamily(), k.getColumnQualifier()); String dir = volumeManager.choose(Optional.of(tableId), ServerConstants.getBaseUris()) + Constants.HDFS_TABLES_DIR + Path.SEPARATOR + tableId + Path.SEPARATOR + new String( FastFormat.toZeroPaddedString(dirCount++, 8, 16, Constants.CLONE_PREFIX_BYTES)); TabletsSection.ServerColumnFamily.DIRECTORY_COLUMN.put(m, new Value(dir.getBytes(UTF_8))); bw.addMutation(m); } } } public static void chopped(AccumuloServerContext context, KeyExtent extent, ZooLock zooLock) { Mutation m = new Mutation(extent.getMetadataEntry()); ChoppedColumnFamily.CHOPPED_COLUMN.put(m, new Value("chopped".getBytes(UTF_8))); update(context, zooLock, m, extent); } public static void removeBulkLoadEntries(Connector conn, String tableId, long tid) throws Exception { try (Scanner mscanner = new IsolatedScanner(conn.createScanner(MetadataTable.NAME, Authorizations.EMPTY)); BatchWriter bw = conn.createBatchWriter(MetadataTable.NAME, new BatchWriterConfig())) { mscanner.setRange(new KeyExtent(tableId, null, null).toMetadataRange()); mscanner.fetchColumnFamily(TabletsSection.BulkFileColumnFamily.NAME); for (Entry<Key, Value> entry : mscanner) { log.debug("Looking at entry " + entry + " with tid " + tid); if (Long.parseLong(entry.getValue().toString()) == tid) { log.debug("deleting entry " + entry); Mutation m = new Mutation(entry.getKey().getRow()); m.putDelete(entry.getKey().getColumnFamily(), entry.getKey().getColumnQualifier()); bw.addMutation(m); } } } } public static List<FileRef> getBulkFilesLoaded(Connector conn, KeyExtent extent, long tid) throws IOException { List<FileRef> result = new ArrayList<>(); try (Scanner mscanner = new IsolatedScanner( conn.createScanner(extent.isMeta() ? RootTable.NAME : MetadataTable.NAME, Authorizations.EMPTY))) { VolumeManager fs = VolumeManagerImpl.get(); mscanner.setRange(extent.toMetadataRange()); mscanner.fetchColumnFamily(TabletsSection.BulkFileColumnFamily.NAME); for (Entry<Key, Value> entry : mscanner) { if (Long.parseLong(entry.getValue().toString()) == tid) { result.add(new FileRef(fs, entry.getKey())); } } return result; } catch (TableNotFoundException ex) { // unlikely throw new RuntimeException("Onos! teh metadata table has vanished!!"); } } public static Map<Long, ? extends Collection<FileRef>> getBulkFilesLoaded(ClientContext context, KeyExtent extent) throws IOException { Text metadataRow = extent.getMetadataEntry(); Map<Long, List<FileRef>> result = new HashMap<>(); VolumeManager fs = VolumeManagerImpl.get(); try (Scanner scanner = new ScannerImpl(context, extent.isMeta() ? RootTable.ID : MetadataTable.ID, Authorizations.EMPTY)) { scanner.setRange(new Range(metadataRow)); scanner.fetchColumnFamily(TabletsSection.BulkFileColumnFamily.NAME); for (Entry<Key, Value> entry : scanner) { Long tid = Long.parseLong(entry.getValue().toString()); List<FileRef> lst = result.get(tid); if (lst == null) { result.put(tid, lst = new ArrayList<>()); } lst.add(new FileRef(fs, entry.getKey())); } } return result; } public static void addBulkLoadInProgressFlag(AccumuloServerContext context, String path) { Mutation m = new Mutation(MetadataSchema.BlipSection.getRowPrefix() + path); m.put(EMPTY_TEXT, EMPTY_TEXT, new Value(new byte[] {})); // new KeyExtent is only added to force update to write to the metadata table, not the root table // because bulk loads aren't supported to the metadata table update(context, m, new KeyExtent("anythingNotMetadata", null, null)); } public static void removeBulkLoadInProgressFlag(AccumuloServerContext context, String path) { Mutation m = new Mutation(MetadataSchema.BlipSection.getRowPrefix() + path); m.putDelete(EMPTY_TEXT, EMPTY_TEXT); // new KeyExtent is only added to force update to write to the metadata table, not the root table // because bulk loads aren't supported to the metadata table update(context, m, new KeyExtent("anythingNotMetadata", null, null)); } /** * During an upgrade from 1.6 to 1.7, we need to add the replication table */ public static void createReplicationTable(ClientContext context) throws IOException { String dir = VolumeManagerImpl.get().choose(Optional.of(ReplicationTable.ID), ServerConstants.getBaseUris()) + Constants.HDFS_TABLES_DIR + Path.SEPARATOR + ReplicationTable.ID + Constants.DEFAULT_TABLET_LOCATION; Mutation m = new Mutation(new Text(KeyExtent.getMetadataEntry(ReplicationTable.ID, null))); m.put(DIRECTORY_COLUMN.getColumnFamily(), DIRECTORY_COLUMN.getColumnQualifier(), 0, new Value(dir.getBytes(UTF_8))); m.put(TIME_COLUMN.getColumnFamily(), TIME_COLUMN.getColumnQualifier(), 0, new Value((TabletTime.LOGICAL_TIME_ID + "0").getBytes(UTF_8))); m.put(PREV_ROW_COLUMN.getColumnFamily(), PREV_ROW_COLUMN.getColumnQualifier(), 0, KeyExtent.encodePrevEndRow(null)); update(getMetadataTable(context), null, m); } /** * During an upgrade we need to move deletion requests for files under the !METADATA table to the root tablet. */ public static void moveMetaDeleteMarkers(ClientContext context) { String oldDeletesPrefix = "!!~del"; Range oldDeletesRange = new Range(oldDeletesPrefix, true, "!!~dem", false); // move old delete markers to new location, to standardize table schema between all metadata tables try (Scanner scanner = new ScannerImpl(context, RootTable.ID, Authorizations.EMPTY)) { scanner.setRange(oldDeletesRange); for (Entry<Key, Value> entry : scanner) { String row = entry.getKey().getRow().toString(); if (row.startsWith(oldDeletesPrefix)) { moveDeleteEntry(context, RootTable.OLD_EXTENT, entry, row, oldDeletesPrefix); } else { break; } } } } public static void moveMetaDeleteMarkersFrom14(ClientContext context) { // new KeyExtent is only added to force update to write to the metadata table, not the root table KeyExtent notMetadata = new KeyExtent("anythingNotMetadata", null, null); // move delete markers from the normal delete keyspace to the root tablet delete keyspace if the files are for the !METADATA table try (Scanner scanner = new ScannerImpl(context, MetadataTable.ID, Authorizations.EMPTY)) { scanner.setRange(MetadataSchema.DeletesSection.getRange()); for (Entry<Key, Value> entry : scanner) { String row = entry.getKey().getRow().toString(); if (row.startsWith(MetadataSchema.DeletesSection.getRowPrefix() + "/" + MetadataTable.ID)) { moveDeleteEntry(context, notMetadata, entry, row, MetadataSchema.DeletesSection.getRowPrefix()); } else { break; } } } } private static void moveDeleteEntry(ClientContext context, KeyExtent oldExtent, Entry<Key, Value> entry, String rowID, String prefix) { String filename = rowID.substring(prefix.length()); // add the new entry first log.info("Moving " + filename + " marker in " + RootTable.NAME); Mutation m = new Mutation(MetadataSchema.DeletesSection.getRowPrefix() + filename); m.put(EMPTY_BYTES, EMPTY_BYTES, EMPTY_BYTES); update(context, m, RootTable.EXTENT); // then remove the old entry m = new Mutation(entry.getKey().getRow()); m.putDelete(EMPTY_BYTES, EMPTY_BYTES); update(context, m, oldExtent); } public static SortedMap<Text, SortedMap<ColumnFQ, Value>> getTabletEntries( SortedMap<Key, Value> tabletKeyValues, List<ColumnFQ> columns) { TreeMap<Text, SortedMap<ColumnFQ, Value>> tabletEntries = new TreeMap<>(); HashSet<ColumnFQ> colSet = null; if (columns != null) { colSet = new HashSet<>(columns); } for (Entry<Key, Value> entry : tabletKeyValues.entrySet()) { if (columns != null && !colSet.contains(new ColumnFQ(entry.getKey()))) { continue; } Text row = entry.getKey().getRow(); SortedMap<ColumnFQ, Value> colVals = tabletEntries.get(row); if (colVals == null) { colVals = new TreeMap<>(); tabletEntries.put(row, colVals); } colVals.put(new ColumnFQ(entry.getKey()), entry.getValue()); } return tabletEntries; } }