Java tutorial
/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.filesystem; import com.google.common.base.Strings; import java.io.IOException; import java.net.URI; import java.util.ArrayList; import org.apache.commons.configuration.Configuration; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.security.UserGroupInformation; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static com.linkedin.pinot.common.utils.CommonConstants.SegmentOperations.HadoopSegmentOperations.*; import static com.linkedin.pinot.common.utils.CommonConstants.SegmentOperations.*; /** * Implementation of PinotFS for the Hadoop Filesystem */ public class HadoopPinotFS extends PinotFS { private static final Logger LOGGER = LoggerFactory.getLogger(HadoopPinotFS.class); private org.apache.hadoop.fs.FileSystem hadoopFS = null; private int retryCount = RETRY_DEFAULT; private int retryWaitMs = RETRY_WAITIME_MS_DEFAULT; private org.apache.hadoop.conf.Configuration hadoopConf; public HadoopPinotFS() { } @Override public void init(Configuration config) { try { retryCount = config.getInt(RETRY, retryCount); retryWaitMs = config.getInt(RETRY_WAITIME_MS, retryWaitMs); hadoopConf = getConf(config.getString(HADOOP_CONF_PATH)); authenticate(hadoopConf, config); hadoopFS = org.apache.hadoop.fs.FileSystem.get(hadoopConf); LOGGER.info("successfully initialized HadoopPinotFS"); } catch (IOException e) { throw new RuntimeException("Could not initialize HadoopPinotFS", e); } } @Override public boolean delete(URI segmentUri) throws IOException { return hadoopFS.delete(new Path(segmentUri), true); } @Override public boolean move(URI srcUri, URI dstUri) throws IOException { return hadoopFS.rename(new Path(srcUri), new Path(dstUri)); } /** * Note that this method copies within a cluster. If you want to copy outside the cluster, you will * need to create a new configuration and filesystem. Keeps files if copy/move is partial. */ @Override public boolean copy(URI srcUri, URI dstUri) throws IOException { Path source = new Path(srcUri); Path target = new Path(dstUri); RemoteIterator<LocatedFileStatus> sourceFiles = hadoopFS.listFiles(source, true); if (sourceFiles != null) { while (sourceFiles.hasNext()) { boolean succeeded = FileUtil.copy(hadoopFS, sourceFiles.next().getPath(), hadoopFS, target, true, hadoopConf); if (!succeeded) { return false; } } } return true; } @Override public boolean exists(URI fileUri) throws IOException { return hadoopFS.exists(new Path(fileUri)); } @Override public long length(URI fileUri) throws IOException { return hadoopFS.getLength(new Path(fileUri)); } @Override public String[] listFiles(URI fileUri) throws IOException { ArrayList<String> filePathStrings = new ArrayList<>(); Path path = new Path(fileUri); if (hadoopFS.exists(path)) { RemoteIterator<LocatedFileStatus> fileListItr = hadoopFS.listFiles(path, true); while (fileListItr != null && fileListItr.hasNext()) { LocatedFileStatus file = fileListItr.next(); filePathStrings.add(file.getPath().toUri().toString()); } } else { throw new IllegalArgumentException("segmentUri is not valid"); } String[] retArray = new String[filePathStrings.size()]; filePathStrings.toArray(retArray); return retArray; } @Override public void copyToLocalFile(URI srcUri, URI dstUri) throws IOException { hadoopFS.copyToLocalFile(new Path(srcUri), new Path(dstUri)); } @Override public void copyFromLocalFile(URI srcUri, URI dstUri) throws IOException { hadoopFS.copyFromLocalFile(new Path(srcUri), new Path(dstUri)); } private void authenticate(org.apache.hadoop.conf.Configuration hadoopConf, org.apache.commons.configuration.Configuration configs) { String principal = configs.getString(PRINCIPAL); String keytab = configs.getString(KEYTAB); if (!Strings.isNullOrEmpty(principal) && !Strings.isNullOrEmpty(keytab)) { UserGroupInformation.setConfiguration(hadoopConf); if (UserGroupInformation.isSecurityEnabled()) { try { if (!UserGroupInformation.getCurrentUser().hasKerberosCredentials() || !UserGroupInformation.getCurrentUser().getUserName().equals(principal)) { LOGGER.info("Trying to authenticate user [%s] with keytab [%s]..", principal, keytab); UserGroupInformation.loginUserFromKeytab(principal, keytab); } } catch (IOException e) { throw new RuntimeException(String.format( "Failed to authenticate user principal [%s] with keytab [%s]", principal, keytab), e); } } } } private org.apache.hadoop.conf.Configuration getConf(String hadoopConfPath) { org.apache.hadoop.conf.Configuration hadoopConf = new org.apache.hadoop.conf.Configuration(); if (Strings.isNullOrEmpty(hadoopConfPath)) { LOGGER.warn("no hadoop conf path is provided, will rely on default config"); } else { hadoopConf.addResource(new Path(hadoopConfPath, "core-site.xml")); hadoopConf.addResource(new Path(hadoopConfPath, "hdfs-site.xml")); } return hadoopConf; } }