Java tutorial
/** * Copyright (C) 2013 EURECOM (www.eurecom.fr) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package fr.eurecom.hybris.mds; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.curator.RetryPolicy; import org.apache.curator.framework.CuratorFramework; import org.apache.curator.framework.CuratorFrameworkFactory; import org.apache.curator.framework.api.CuratorWatcher; import org.apache.curator.framework.state.ConnectionState; import org.apache.curator.framework.state.ConnectionStateListener; import org.apache.curator.retry.ExponentialBackoffRetry; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.data.Stat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import fr.eurecom.hybris.Config; import fr.eurecom.hybris.HybrisException; import fr.eurecom.hybris.Utils; import fr.eurecom.hybris.kvs.drivers.Kvs; import fr.eurecom.hybris.mds.Metadata.Timestamp; /** * Wraps the Zookeeper client and * Provides read&write access to the metadata storage. * @author P. Viotti */ public class MdsManager implements ConnectionStateListener { private static final Logger logger = LoggerFactory.getLogger(Config.LOGGER_NAME); private final CuratorFramework zkCli; private final String storageRoot; /* Integer marker to tell whether a znode has to be created. * As Zookeeper setData API parameter it implies overwriting no matter which znode version. */ public static int NONODE = -1; private final String gcRoot; private final String gcStaleDir; private final String gcOrphansDir; enum GcType { STALE, ORPHAN }; /** * Constructs a new MdsManager. * @param zkConnectionStr Zookeeper cluster connection string (e.g. "zksrv1.net:2181,zksrv2.net:2181") * @param zkRoot the Hybris metadata root folder * @throws IOException thrown in case of error while initializing the Zookeeper client */ public MdsManager(String zkConnectionStr, String zkRoot) throws IOException { this.storageRoot = "/" + zkRoot; this.gcRoot = this.storageRoot + "-gc"; this.gcStaleDir = this.gcRoot + "/stale"; this.gcOrphansDir = this.gcRoot + "/orphans"; try { RetryPolicy retryPolicy = new ExponentialBackoffRetry(1000, 3); this.zkCli = CuratorFrameworkFactory.newClient(zkConnectionStr, retryPolicy); this.zkCli.getConnectionStateListenable().addListener(this); this.zkCli.start(); for (String dir : new String[] { this.storageRoot, this.gcRoot, this.gcStaleDir, this.gcOrphansDir }) try { this.zkCli.create().forPath(dir); logger.debug("Created {}.", dir); } catch (KeeperException e) { if (e.code() != KeeperException.Code.NODEEXISTS) throw e; } } catch (Exception e) { logger.error("Could not initialize the Zookeeper client. " + e.getMessage(), e); throw new IOException(e); } } public void stateChanged(CuratorFramework client, ConnectionState newState) { if (!ConnectionState.CONNECTED.equals(newState)) logger.warn("Zookeeper connection state changed to: " + newState); } /** * Worker thread class in charge of marking stale and orphan keys. */ public class GcMarker extends Thread { private final String key; private ArrayList<String> keylist; private Timestamp ts; private List<Kvs> replicas; private final GcType type; public GcMarker(String key, Timestamp ts, ArrayList<String> keylist, List<Kvs> savedReplicas) { this.key = key; this.ts = ts; this.keylist = keylist; this.replicas = savedReplicas; this.type = GcType.ORPHAN; } public GcMarker(String key) { this.key = key; this.type = GcType.STALE; } public void run() { String path; switch (this.type) { case STALE: // create ZNode <root>-gc/stale/<key> path = MdsManager.this.gcStaleDir + "/" + this.key; try { MdsManager.this.zkCli.create().forPath(path); logger.debug("GcMarker: marked {} as stale", path); } catch (KeeperException e) { if (e.code() != KeeperException.Code.NODEEXISTS) logger.warn("GcMarker: could not create stale node " + path, e); } catch (Exception e) { logger.warn("GcMarker: could not create stale node " + path, e); } break; case ORPHAN: // create ZNode <root>-gc/orphans/<KvsKey> path = MdsManager.this.gcOrphansDir + "/" + Utils.getKvsKey(this.key, this.ts); byte[] value = new Metadata(this.ts, null, null, 0, this.keylist, this.replicas, null).serialize(); try { MdsManager.this.zkCli.create().forPath(path, value); logger.debug("GcMarker: marked {} as orphan", path); } catch (KeeperException e) { if (e.code() != KeeperException.Code.NODEEXISTS) logger.warn("GcMarker: could not create orphan node " + path, e); } catch (Exception e) { logger.warn("GcMarker: could not create orphan node " + path, e); } break; } } } /* --------------------------------------------------------------------------------------- Public APIs --------------------------------------------------------------------------------------- */ /** * Timestamped write on metadata storage. * @param key - the key * @param md - the metadata to be written * @param zkVersion - the znode version expected to be overwritten; -1 when the znode does not exist * @return boolean: true if a znode has been modified and stale old values need to be garbage-collected * false otherwise: a new znode has been created * @throws HybrisException */ public boolean tsWrite(String key, Metadata md, int zkVersion) throws HybrisException { String path = this.storageRoot + "/" + key; try { if (zkVersion == NONODE) { this.zkCli.create().forPath(path, md.serialize()); logger.debug("ZNode {} created.", path); return false; } else { this.zkCli.setData().withVersion(zkVersion).forPath(path, md.serialize()); logger.debug("ZNode {} modified.", path); return true; } } catch (KeeperException e) { // NONODE exception should not happen since we set a tombstone value upon deletion if (e.code() == KeeperException.Code.NODEEXISTS || // multiple clients tried to create e.code() == KeeperException.Code.BADVERSION) { // or modify the same znode concurrently Stat stat = new Stat(); byte[] newValue = null; try { newValue = this.zkCli.getData().storingStatIn(stat).forPath(path); } catch (Exception e1) { throw new HybrisException(e1); } Metadata newmd = new Metadata(newValue); if (md.getTs().isGreater(newmd.getTs())) { logger.debug("Found smaller version ({}) writing {}: retrying.", newmd.getTs(), key); return this.tsWrite(key, md, stat.getVersion()); } else { logger.warn("Found greater version ({}) writing {}: failing.", newmd.getTs(), key); return false; // XXX // throw new HybrisException("KeeperException, could not write the key.", e); } } else { logger.error("Could not write ZNode " + key); throw new HybrisException("Could not write the ZNode " + key, e); } } catch (Exception e) { logger.error("Could not write ZNode " + key, e); throw new HybrisException("Could not write ZNode " + key + ": " + e.getMessage(), e); } } /** * Timestamped read ("slow read" in ZooKeeper parlance) from metadata storage. * @param key the key to read * @param stat the Stat Zookeeper object to be written with znode details (can be null) * @return Metadata object * or null in case the znode does not exist or there is a tombstone Metadata object * (to distinguish these two cases one must use the Stat object) * @throws HybrisException */ public Metadata tsRead(String key, Stat stat) throws HybrisException { String path = this.storageRoot + "/" + key; try { this.zkCli.sync().forPath(path); byte[] rawMd = this.zkCli.getData().storingStatIn(stat).forPath(path); return new Metadata(rawMd); } catch (KeeperException e) { if (e.code() == KeeperException.Code.NONODE) return null; else { logger.error("Could not read ZNode " + path, e); throw new HybrisException("Could not read the ZNode " + path, e); } } catch (Exception e) { logger.error("Could not read ZNode " + path, e); throw new HybrisException("Could not read the ZNode " + path + e.getMessage(), e); } } /** * Timestamped read ("slow read" in ZooKeeper parlance) from metadata storage. * @param key the key to read * @param stat the Stat Zookeeper object to be written with znode details (can be null) * @param watcher to set upon executing the getData operation * @return Metadata object * or null in case the znode does not exist or there is a tombstone Metadata object * (to distinguish these two cases one must use the Stat object) * @throws HybrisException */ public Metadata tsRead(String key, Stat stat, CuratorWatcher watcher) throws HybrisException { String path = this.storageRoot + "/" + key; try { this.zkCli.sync().forPath(path); byte[] rawMd = this.zkCli.getData().storingStatIn(stat).usingWatcher(watcher).forPath(path); return new Metadata(rawMd); } catch (KeeperException e) { if (e.code() == KeeperException.Code.NONODE) return null; else { logger.error("Could not read ZNode " + path, e); throw new HybrisException("Could not read the ZNode " + path, e); } } catch (Exception e) { logger.error("Could not read ZNode " + path, e); throw new HybrisException("Could not read the ZNode " + path + e.getMessage(), e); } } /** * Get the list of metadata keys stored (filtering out tombstone values). * @return the list of metadata keys stored in the * @throws HybrisException */ public List<String> list() throws HybrisException { List<String> znodes; try { znodes = this.zkCli.getChildren().forPath(this.storageRoot); } catch (Exception e) { logger.error("Could not list the children of ZNode " + this.storageRoot, e); throw new HybrisException(e); } for (Iterator<String> it = znodes.iterator(); it.hasNext();) { String znode = it.next(); String znodePath = this.storageRoot + "/" + znode; byte[] rawMd = null; try { rawMd = this.zkCli.getData().forPath(znodePath); } catch (Exception e) { logger.warn("Could not read metadata for ZNode " + znodePath, e); } Metadata md = new Metadata(rawMd); if (md.isTombstone()) it.remove(); } return znodes; } /** * Mark a key as deleted writing a tombstone value. * @param key * @param tombstone the tombstone metadata to be written * @param zkVersion * @throws HybrisException */ public void delete(String key, Metadata tombstone, int zkVersion) throws HybrisException { this.tsWrite(key, tombstone, zkVersion); } public synchronized byte[] getOrCreateIv() { String ivKey = this.storageRoot + "-iv"; byte[] iv = null; try { this.zkCli.sync().forPath(ivKey); iv = this.zkCli.getData().forPath(ivKey); } catch (Exception e) { } if (iv == null) { iv = new byte[16]; Utils.generateRandomBytes(iv); try { iv = new byte[16]; this.zkCli.create().forPath(ivKey, iv); } catch (Exception e) { logger.warn("Could not store the generated IV on ZooKeeper"); } } return iv; } /** * Get all the stored metadata (filtering out tombstone values). * XXX not scalable - for debugging purposes * @return a map of keys (String) and Metadata objects * @throws HybrisException */ public Map<String, Metadata> getAll() throws HybrisException { try { return this.getAllChildrenMetadata(this.storageRoot); } catch (Exception e) { logger.error("Could not get all the metadata from children ZNodes of " + this.gcRoot, e); throw new HybrisException(e); } } /** * Empty the metadata storage root container. * ATTENTION: it erases all metadata stored in the root container! * @throws HybrisException */ public void emptyMetadataContainer() throws HybrisException { try { List<String> znodes = this.zkCli.getChildren().forPath(this.storageRoot); for (String key : znodes) { String path = this.storageRoot + "/" + key; this.recursiveDelete(path); } } catch (Exception e) { logger.warn("Could not empty the root container", e); throw new HybrisException("Could not empty the root container", e); } } public void shutdown() { this.zkCli.close(); } /* -------------------------------------- GC functions -------------------------------------- */ /** * Get orphan keys and their metadata. * @return * @throws HybrisException */ public Map<String, Metadata> getOrphans() throws HybrisException { try { return this.getAllChildrenMetadata(this.gcOrphansDir); } catch (Exception e) { logger.error("Could not get all the metadata from children ZNodes of " + this.gcOrphansDir, e); throw new HybrisException(e); } } /** * Get stale keys. * @return * @throws HybrisException */ public List<String> getStaleKeys() throws HybrisException { try { return this.zkCli.getChildren().forPath(this.gcStaleDir); } catch (Exception e) { logger.error("Could not list the children of ZNode " + this.storageRoot, e); throw new HybrisException(e); } } /** * Delete the set of orphan keys passed as argument. * @param orphanKeys */ public void removeOrphanKeys(Set<String> orphanKeys) { for (String key : orphanKeys) { String znodePath = this.gcOrphansDir + "/" + key; try { this.zkCli.delete().forPath(znodePath); } catch (KeeperException e) { if (e.code() != KeeperException.Code.NONODE) logger.warn("Could not delete orphan ZNode " + znodePath, e); } catch (Exception e) { logger.warn("Could not delete orphan ZNode " + znodePath, e); } } } /** * Delete the key passed as argument from the list of keys * to be checked as outdated for gc. * @param staleKeys */ public void removeStaleKey(String staleKey) { String znodePath = this.gcStaleDir + "/" + staleKey; try { this.zkCli.delete().forPath(znodePath); } catch (KeeperException e) { if (e.code() != KeeperException.Code.NONODE) logger.warn("Could not delete orphan ZNode " + znodePath, e); } catch (Exception e) { logger.warn("Could not delete orphan ZNode " + znodePath, e); } } /** * Empty stale and orphan keys containers. * @throws HybrisException */ public void emptyStaleAndOrphansContainers() throws HybrisException { try { for (String path : new String[] { this.gcOrphansDir, this.gcStaleDir }) { List<String> znodes = this.zkCli.getChildren().forPath(path); for (String key : znodes) { String znode = path + "/" + key; this.recursiveDelete(znode); } } } catch (Exception e) { logger.warn("Could not empty the GC containers", e); throw new HybrisException("Could not empty the GC containers", e); } } /* --------------------------------------------------------------------------------------- Private methods --------------------------------------------------------------------------------------- */ /** * Retrieve all Metadata objects associated to the children of a certain key. * @param key * @return a Map<String, Metadata> of all metadata associated with the ZNode children of key * @throws Exception */ public Map<String, Metadata> getAllChildrenMetadata(String key) throws Exception { List<String> znodes; try { znodes = this.zkCli.getChildren().forPath(key); } catch (Exception e) { throw e; } HashMap<String, Metadata> retMap = new HashMap<String, Metadata>(); for (String znode : znodes) { String znodePath = key + "/" + znode; byte[] rawMd = null; try { rawMd = this.zkCli.getData().forPath(znodePath); } catch (Exception e) { logger.warn("Could not read metadata for ZNode " + znodePath, e); } Metadata md = new Metadata(rawMd); if (!md.isTombstone()) retMap.put(znode, md); } return retMap; } /** * Recursively delete a key and all its children. * @param key * @throws Exception */ private void recursiveDelete(String key) throws Exception { Stat s = this.zkCli.checkExists().forPath(key); if (s != null) { List<String> children = this.zkCli.getChildren().forPath(key); for (String child : children) { String node = key + "/" + child; this.recursiveDelete(node); } this.zkCli.delete().forPath(key); } } }