Java tutorial
/* * Copyright 2011 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.springframework.data.hadoop.pig; import java.security.PrivilegedExceptionAction; import java.util.Collection; import org.apache.hadoop.security.UserGroupInformation; import org.apache.pig.PigServer; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.impl.PigContext; import org.springframework.beans.factory.BeanCreationException; import org.springframework.beans.factory.BeanNameAware; import org.springframework.beans.factory.FactoryBean; import org.springframework.beans.factory.ObjectFactory; import org.springframework.util.CollectionUtils; import org.springframework.util.StringUtils; /** * Factory for creating a {@link PigServer} instance. Note that since PigServer is not thread-safe and the Pig API does not * provide some type of factory, the factory bean returns an instance of {@link ObjectFactory} (which handles the creation of {@link PigServer} instances) * instead of the raw {@link PigServer} object which cannot be reused. * * Note that the caller needs to handle the object clean-up, specifically calling {@link PigServer#shutdown()}. * * In general, to avoid leaks it is recommended to use the {@link PigTemplate}. * * @author Costin Leau */ public class PigServerFactoryBean implements FactoryBean<PigServerFactory>, BeanNameAware { private PigContext pigContext; private Collection<String> pathToSkip; private Collection<PigScript> scripts; private Integer parallelism; private String jobName; private String jobPriority; private Boolean validateEachStatement; private String beanName; private String user; private class DefaultPigServerFactory implements PigServerFactory { @Override public PigServer getPigServer() { try { return createPigInstance(); } catch (Exception ex) { throw new BeanCreationException("Cannot create PigServer instance", ex); } } }; public PigServerFactory getObject() throws Exception { return new DefaultPigServerFactory(); } public Class<?> getObjectType() { return PigServerFactory.class; } public boolean isSingleton() { return true; } protected PigServer createPigInstance() throws Exception { final PigContext ctx = (pigContext != null ? pigContext : new PigContext()); // apparently if not connected, pig can cause all kind of errors PigServer pigServer = null; try { if (StringUtils.hasText(user)) { UserGroupInformation ugi = UserGroupInformation.createProxyUser(user, UserGroupInformation.getLoginUser()); pigServer = ugi.doAs(new PrivilegedExceptionAction<PigServer>() { @Override public PigServer run() throws Exception { return new PigServer(ctx, true); } }); } else { pigServer = new PigServer(ctx, true); } } catch (ExecException ex) { throw PigUtils.convert(ex); } if (!CollectionUtils.isEmpty(pathToSkip)) { for (String path : pathToSkip) { pigServer.addPathToSkip(path); } } if (parallelism != null) { pigServer.setDefaultParallel(parallelism); } if (StringUtils.hasText(jobName)) { pigServer.setJobName(jobName); } else { if (StringUtils.hasText(beanName)) { pigServer.setJobName(beanName); } } if (StringUtils.hasText(jobPriority)) { pigServer.setJobPriority(jobPriority); } if (validateEachStatement != null) { PigUtils.validateEachStatement(pigServer, validateEachStatement); } if (!CollectionUtils.isEmpty(scripts)) { PigUtils.runWithConversion(pigServer, scripts, false); } return pigServer; } public void setBeanName(String name) { this.beanName = name; } /** * Sets the {@link PigContext} to use. * * @param pigContext The pigContext to set. */ public void setPigContext(PigContext pigContext) { this.pigContext = pigContext; } /** * Sets the paths to skip. * * @param pathToSkip The pathToSkip to set. */ public void setPathsToSkip(Collection<String> pathToSkip) { this.pathToSkip = pathToSkip; } /** * Sets the scripts to execute at startup. * * @param scripts The scripts to set. */ public void setScripts(Collection<PigScript> scripts) { this.scripts = scripts; } /** * Sets the parallelism. * * @param parallelism The parallelism to set. */ public void setParallelism(Integer parallelism) { this.parallelism = parallelism; } /** * Sets the job name. * * @param jobName The jobName to set. */ public void setJobName(String jobName) { this.jobName = jobName; } /** * Sets the job priority. * * @param jobPriority The jobPriority to set. */ public void setJobPriority(String jobPriority) { this.jobPriority = jobPriority; } /** * Indicates whether each statement should be validated or not. By default it is unset, * relying on the Pig defaults. * * @param validateEachStatement whether to validate each statement or not. */ public void setValidateEachStatement(Boolean validateEachStatement) { this.validateEachStatement = validateEachStatement; } /** * Sets the user impersonation (optional) for executing Pig jobs. * Should be used when running against a Hadoop Kerberos cluster. * * @param user user/group information */ public void setUser(String user) { this.user = user; } }