List of usage examples for org.apache.hadoop.io Text Text
public Text(byte[] utf8)
From source file:cn.uc.hadoop.mapreduce.lib.input.FileNameLineRecordReader.java
License:Apache License
public boolean nextKeyValue() throws IOException { if (key == null) { //IMPOSSIBLE TO RUN HERE.As key was set in initialize call key = new Text("NO_FILE_NAME"); }/* w ww . jav a 2 s. c o m*/ if (value == null) { value = new Text(); } int newSize = 0; // We always read one extra line, which lies outside the upper // split limit i.e. (end - 1) while (getFilePosition() <= end) { newSize = in.readLine(value, maxLineLength, Math.max(maxBytesToConsume(pos), maxLineLength)); if (newSize == 0) { break; } pos += newSize; if (newSize < maxLineLength) { break; } // line too long. try again LOG.info("Skipped line of size " + newSize + " at pos " + (pos - newSize)); } if (newSize == 0) { key = null; value = null; return false; } else { return true; } }
From source file:cn.uc.hadoop.mapreduce.lib.input.FilePathLineRecordReader.java
License:Apache License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE); start = split.getStart();//from w w w . j ava2 s . c om end = start + split.getLength(); final Path file = split.getPath(); //ADD by qiujw key? key = new Text(file.toString()); compressionCodecs = new CompressionCodecFactory(job); codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split final FileSystem fs = file.getFileSystem(job); fileIn = fs.open(file); if (isCompressedInput()) { decompressor = CodecPool.getDecompressor(codec); if (codec instanceof SplittableCompressionCodec) { final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream( fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK); if (null == this.recordDelimiterBytes) { in = new LineReader(cIn, job); } else { in = new LineReader(cIn, job, this.recordDelimiterBytes); } start = cIn.getAdjustedStart(); end = cIn.getAdjustedEnd(); filePosition = cIn; } else { if (null == this.recordDelimiterBytes) { in = new LineReader(codec.createInputStream(fileIn, decompressor), job); } else { in = new LineReader(codec.createInputStream(fileIn, decompressor), job, this.recordDelimiterBytes); } filePosition = fileIn; } } else { fileIn.seek(start); if (null == this.recordDelimiterBytes) { in = new LineReader(fileIn, job); } else { in = new LineReader(fileIn, job, this.recordDelimiterBytes); } filePosition = fileIn; } // If this is not the first split, we always throw away first record // because we always (except the last split) read one extra line in // next() method. if (start != 0) { start += in.readLine(new Text(), 0, maxBytesToConsume(start)); } this.pos = start; }
From source file:co.cask.cdap.app.runtime.spark.SparkCredentialsUpdaterTest.java
License:Apache License
@Test public void testUpdater() throws Exception { Location credentialsDir = Locations.toLocation(TEMPORARY_FOLDER.newFolder()); // Create a updater that don't do any auto-update within the test time and don't cleanup SparkCredentialsUpdater updater = new SparkCredentialsUpdater(createCredentialsSupplier(), credentialsDir, "credentials", TimeUnit.DAYS.toMillis(1), TimeUnit.DAYS.toMillis(1), Integer.MAX_VALUE) { @Override/*from w ww. ja v a2s. co m*/ long getNextUpdateDelay(Credentials credentials) throws IOException { return TimeUnit.DAYS.toMillis(1); } }; // Before the updater starts, the directory is empty Assert.assertTrue(credentialsDir.list().isEmpty()); UserGroupInformation.getCurrentUser().addToken( new Token<>(Bytes.toBytes("id"), Bytes.toBytes("pass"), new Text("kind"), new Text("service"))); updater.startAndWait(); try { List<Location> expectedFiles = new ArrayList<>(); expectedFiles.add(credentialsDir.append("credentials-1")); for (int i = 1; i <= 10; i++) { Assert.assertEquals(expectedFiles, listAndSort(credentialsDir)); // Read the credentials from the last file Credentials newCredentials = new Credentials(); try (DataInputStream is = new DataInputStream( expectedFiles.get(expectedFiles.size() - 1).getInputStream())) { newCredentials.readTokenStorageStream(is); } // Should contains all tokens of the current user Credentials userCredentials = UserGroupInformation.getCurrentUser().getCredentials(); for (Token<? extends TokenIdentifier> token : userCredentials.getAllTokens()) { Assert.assertEquals(token, newCredentials.getToken(token.getService())); } UserGroupInformation.getCurrentUser().addToken(new Token<>(Bytes.toBytes("id" + i), Bytes.toBytes("pass" + i), new Text("kind" + i), new Text("service" + i))); updater.run(); expectedFiles.add(credentialsDir.append("credentials-" + (i + 1))); } } finally { updater.stopAndWait(); } }
From source file:co.cask.cdap.common.security.YarnTokenUtils.java
License:Apache License
/** * Gets a Yarn delegation token and stores it in the given Credentials. * * @return the same Credentials instance as the one given in parameter. *//*from w ww . j a v a2 s. c o m*/ public static Credentials obtainToken(YarnConfiguration configuration, Credentials credentials) { if (!UserGroupInformation.isSecurityEnabled()) { return credentials; } try { YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(configuration); yarnClient.start(); try { Text renewer = new Text(UserGroupInformation.getCurrentUser().getShortUserName()); org.apache.hadoop.yarn.api.records.Token rmDelegationToken = yarnClient .getRMDelegationToken(renewer); // TODO: The following logic should be replaced with call to ClientRMProxy.getRMDelegationTokenService after // CDAP-4825 is resolved List<String> services = new ArrayList<>(); if (HAUtil.isHAEnabled(configuration)) { // If HA is enabled, we need to enumerate all RM hosts // and add the corresponding service name to the token service // Copy the yarn conf since we need to modify it to get the RM addresses YarnConfiguration yarnConf = new YarnConfiguration(configuration); for (String rmId : HAUtil.getRMHAIds(configuration)) { yarnConf.set(YarnConfiguration.RM_HA_ID, rmId); InetSocketAddress address = yarnConf.getSocketAddr(YarnConfiguration.RM_ADDRESS, YarnConfiguration.DEFAULT_RM_ADDRESS, YarnConfiguration.DEFAULT_RM_PORT); services.add(SecurityUtil.buildTokenService(address).toString()); } } else { services.add(SecurityUtil.buildTokenService(YarnUtils.getRMAddress(configuration)).toString()); } Token<TokenIdentifier> token = ConverterUtils.convertFromYarn(rmDelegationToken, (InetSocketAddress) null); token.setService(new Text(Joiner.on(',').join(services))); credentials.addToken(new Text(token.getService()), token); // OK to log, it won't log the credential, only information about the token. LOG.info("Added RM delegation token: {}", token); } finally { yarnClient.stop(); } return credentials; } catch (Exception e) { LOG.error("Failed to get secure token for Yarn.", e); throw Throwables.propagate(e); } }
From source file:co.cask.cdap.dq.test.AggregationFunctionsTest.java
License:Apache License
@Test public void uniqueValuesGenerateAggregationTest() throws Exception { DataQualityWritable val1 = new DataQualityWritable(); DataQualityWritable val2 = new DataQualityWritable(); DataQualityWritable val3 = new DataQualityWritable(); val1.set(new Text("a")); val2.set(new Text("a")); val3.set(new Text("a")); UniqueValues uniqueValues = new UniqueValues(); uniqueValues.add(val1); uniqueValues.add(val2); uniqueValues.add(val3); byte[] output = uniqueValues.aggregate(); Assert.assertEquals("[a]", Bytes.toString(output)); }
From source file:co.cask.cdap.dq.test.AggregationFunctionsTest.java
License:Apache License
@Test public void discreteValuesGenerateAggregationTest() throws Exception { DataQualityWritable val1 = new DataQualityWritable(); DataQualityWritable val2 = new DataQualityWritable(); DataQualityWritable val3 = new DataQualityWritable(); val1.set(new Text("a")); val2.set(new Text("a")); val3.set(new Text("b")); DiscreteValuesHistogram discreteValuesHistogram = new DiscreteValuesHistogram(); discreteValuesHistogram.add(val1); discreteValuesHistogram.add(val2); discreteValuesHistogram.add(val3); Map<String, Integer> expectedMap = Maps.newHashMap(); expectedMap.put("a", 2); expectedMap.put("b", 1); byte[] outputVal = discreteValuesHistogram.aggregate(); Map<String, Integer> outputMap = GSON.fromJson(Bytes.toString(outputVal), TOKEN_TYPE_MAP_STRING_INTEGER); Assert.assertEquals(expectedMap, outputMap); }
From source file:co.cask.cdap.etl.batch.connector.ConnectorSink.java
License:Apache License
@Override public void transform(StructuredRecord input, Emitter<KeyValue<NullWritable, Text>> emitter) throws Exception { if (writeSchema) { input = modifyRecord(input);/* w w w .ja va 2s . c o m*/ } emitter.emit( new KeyValue<>(NullWritable.get(), new Text(StructuredRecordStringConverter.toJsonString(input)))); }
From source file:co.cask.cdap.explore.security.HiveTokenUtils.java
License:Apache License
public static Credentials obtainToken(Credentials credentials) { ClassLoader hiveClassloader = ExploreServiceUtils.getExploreClassLoader(); ClassLoader contextClassloader = Thread.currentThread().getContextClassLoader(); Thread.currentThread().setContextClassLoader(hiveClassloader); try {/*from w w w . j a v a 2 s . c o m*/ LOG.info("Obtaining delegation token for Hive"); Class hiveConfClass = hiveClassloader.loadClass("org.apache.hadoop.hive.conf.HiveConf"); Object hiveConf = hiveConfClass.newInstance(); Class hiveClass = hiveClassloader.loadClass("org.apache.hadoop.hive.ql.metadata.Hive"); @SuppressWarnings("unchecked") Method hiveGet = hiveClass.getMethod("get", hiveConfClass); Object hiveObject = hiveGet.invoke(null, hiveConf); String user = UserGroupInformation.getCurrentUser().getShortUserName(); @SuppressWarnings("unchecked") Method getDelegationToken = hiveClass.getMethod("getDelegationToken", String.class, String.class); String tokenStr = (String) getDelegationToken.invoke(hiveObject, user, user); Token<DelegationTokenIdentifier> delegationToken = new Token<>(); delegationToken.decodeFromUrlString(tokenStr); delegationToken.setService(new Text(HiveAuthFactory.HS2_CLIENT_TOKEN)); LOG.info("Adding delegation token {} from MetaStore for service {} for user {}", delegationToken, delegationToken.getService(), user); credentials.addToken(delegationToken.getService(), delegationToken); return credentials; } catch (Exception e) { LOG.error("Exception when fetching delegation token from Hive MetaStore", e); throw Throwables.propagate(e); } finally { Thread.currentThread().setContextClassLoader(contextClassloader); } }
From source file:co.cask.cdap.explore.security.JobHistoryServerTokenUtils.java
License:Apache License
/** * Gets a JHS delegation token and stores it in the given Credentials. * * @return the same Credentials instance as the one given in parameter. *///from w w w . j a v a 2s . co m public static Credentials obtainToken(Configuration configuration, Credentials credentials) { if (!UserGroupInformation.isSecurityEnabled()) { return credentials; } String historyServerAddress = configuration.get("mapreduce.jobhistory.address"); HostAndPort hostAndPort = HostAndPort.fromString(historyServerAddress); try { LOG.info("Obtaining delegation token for JHS"); ResourceMgrDelegate resourceMgrDelegate = new ResourceMgrDelegate(new YarnConfiguration(configuration)); MRClientCache clientCache = new MRClientCache(configuration, resourceMgrDelegate); MRClientProtocol hsProxy = clientCache.getInitializedHSProxy(); GetDelegationTokenRequest request = new GetDelegationTokenRequestPBImpl(); request.setRenewer(YarnUtils.getYarnTokenRenewer(configuration)); InetSocketAddress address = new InetSocketAddress(hostAndPort.getHostText(), hostAndPort.getPort()); Token<TokenIdentifier> token = ConverterUtils .convertFromYarn(hsProxy.getDelegationToken(request).getDelegationToken(), address); credentials.addToken(new Text(token.getService()), token); return credentials; } catch (Exception e) { LOG.error("Failed to get secure token for JHS at {}.", hostAndPort, e); throw Throwables.propagate(e); } }
From source file:co.cask.cdap.hive.objectinspector.SimpleMapEqualComparerTest.java
License:Apache License
Object serializeAndDeserialize(TextStringMapHolder o1, StructObjectInspector oi1, LazySimpleSerDe serde, SerDeParameters serdeParams) throws IOException, SerDeException { ByteStream.Output serializeStream = new ByteStream.Output(); LazySimpleSerDe.serialize(serializeStream, o1, oi1, serdeParams.getSeparators(), 0, serdeParams.getNullSequence(), serdeParams.isEscaped(), serdeParams.getEscapeChar(), serdeParams.getNeedsEscape()); Text t = new Text(serializeStream.toByteArray()); return serde.deserialize(t); }