Example usage for org.apache.hadoop.io Text Text

List of usage examples for org.apache.hadoop.io Text Text

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text Text.

Prototype

public Text(byte[] utf8) 

Source Link

Document

Construct from a byte array.

Usage

From source file:cn.uc.hadoop.mapreduce.lib.input.FileNameLineRecordReader.java

License:Apache License

public boolean nextKeyValue() throws IOException {
    if (key == null) {
        //IMPOSSIBLE TO RUN HERE.As key was set in initialize call
        key = new Text("NO_FILE_NAME");
    }/* w ww . jav a 2  s. c  o m*/
    if (value == null) {
        value = new Text();
    }
    int newSize = 0;
    // We always read one extra line, which lies outside the upper
    // split limit i.e. (end - 1)
    while (getFilePosition() <= end) {
        newSize = in.readLine(value, maxLineLength, Math.max(maxBytesToConsume(pos), maxLineLength));
        if (newSize == 0) {
            break;
        }
        pos += newSize;
        if (newSize < maxLineLength) {
            break;
        }

        // line too long. try again
        LOG.info("Skipped line of size " + newSize + " at pos " + (pos - newSize));
    }
    if (newSize == 0) {
        key = null;
        value = null;
        return false;
    } else {
        return true;
    }
}

From source file:cn.uc.hadoop.mapreduce.lib.input.FilePathLineRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
    start = split.getStart();//from  w  w w  .  j  ava2  s . c om
    end = start + split.getLength();
    final Path file = split.getPath();
    //ADD by qiujw key?
    key = new Text(file.toString());

    compressionCodecs = new CompressionCodecFactory(job);
    codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);
    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            if (null == this.recordDelimiterBytes) {
                in = new LineReader(cIn, job);
            } else {
                in = new LineReader(cIn, job, this.recordDelimiterBytes);
            }

            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            if (null == this.recordDelimiterBytes) {
                in = new LineReader(codec.createInputStream(fileIn, decompressor), job);
            } else {
                in = new LineReader(codec.createInputStream(fileIn, decompressor), job,
                        this.recordDelimiterBytes);
            }
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        if (null == this.recordDelimiterBytes) {
            in = new LineReader(fileIn, job);
        } else {
            in = new LineReader(fileIn, job, this.recordDelimiterBytes);
        }

        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;
}

From source file:co.cask.cdap.app.runtime.spark.SparkCredentialsUpdaterTest.java

License:Apache License

@Test
public void testUpdater() throws Exception {
    Location credentialsDir = Locations.toLocation(TEMPORARY_FOLDER.newFolder());

    // Create a updater that don't do any auto-update within the test time and don't cleanup
    SparkCredentialsUpdater updater = new SparkCredentialsUpdater(createCredentialsSupplier(), credentialsDir,
            "credentials", TimeUnit.DAYS.toMillis(1), TimeUnit.DAYS.toMillis(1), Integer.MAX_VALUE) {
        @Override/*from   w  ww.  ja v a2s.  co m*/
        long getNextUpdateDelay(Credentials credentials) throws IOException {
            return TimeUnit.DAYS.toMillis(1);
        }
    };

    // Before the updater starts, the directory is empty
    Assert.assertTrue(credentialsDir.list().isEmpty());

    UserGroupInformation.getCurrentUser().addToken(
            new Token<>(Bytes.toBytes("id"), Bytes.toBytes("pass"), new Text("kind"), new Text("service")));

    updater.startAndWait();
    try {
        List<Location> expectedFiles = new ArrayList<>();
        expectedFiles.add(credentialsDir.append("credentials-1"));

        for (int i = 1; i <= 10; i++) {
            Assert.assertEquals(expectedFiles, listAndSort(credentialsDir));

            // Read the credentials from the last file
            Credentials newCredentials = new Credentials();
            try (DataInputStream is = new DataInputStream(
                    expectedFiles.get(expectedFiles.size() - 1).getInputStream())) {
                newCredentials.readTokenStorageStream(is);
            }

            // Should contains all tokens of the current user
            Credentials userCredentials = UserGroupInformation.getCurrentUser().getCredentials();
            for (Token<? extends TokenIdentifier> token : userCredentials.getAllTokens()) {
                Assert.assertEquals(token, newCredentials.getToken(token.getService()));
            }

            UserGroupInformation.getCurrentUser().addToken(new Token<>(Bytes.toBytes("id" + i),
                    Bytes.toBytes("pass" + i), new Text("kind" + i), new Text("service" + i)));
            updater.run();
            expectedFiles.add(credentialsDir.append("credentials-" + (i + 1)));
        }
    } finally {
        updater.stopAndWait();
    }
}

From source file:co.cask.cdap.common.security.YarnTokenUtils.java

License:Apache License

/**
 * Gets a Yarn delegation token and stores it in the given Credentials.
 *
 * @return the same Credentials instance as the one given in parameter.
 *//*from w ww .  j a  v a2  s. c  o  m*/
public static Credentials obtainToken(YarnConfiguration configuration, Credentials credentials) {
    if (!UserGroupInformation.isSecurityEnabled()) {
        return credentials;
    }

    try {
        YarnClient yarnClient = YarnClient.createYarnClient();
        yarnClient.init(configuration);
        yarnClient.start();

        try {
            Text renewer = new Text(UserGroupInformation.getCurrentUser().getShortUserName());
            org.apache.hadoop.yarn.api.records.Token rmDelegationToken = yarnClient
                    .getRMDelegationToken(renewer);

            // TODO: The following logic should be replaced with call to ClientRMProxy.getRMDelegationTokenService after
            // CDAP-4825 is resolved
            List<String> services = new ArrayList<>();
            if (HAUtil.isHAEnabled(configuration)) {
                // If HA is enabled, we need to enumerate all RM hosts
                // and add the corresponding service name to the token service
                // Copy the yarn conf since we need to modify it to get the RM addresses
                YarnConfiguration yarnConf = new YarnConfiguration(configuration);
                for (String rmId : HAUtil.getRMHAIds(configuration)) {
                    yarnConf.set(YarnConfiguration.RM_HA_ID, rmId);
                    InetSocketAddress address = yarnConf.getSocketAddr(YarnConfiguration.RM_ADDRESS,
                            YarnConfiguration.DEFAULT_RM_ADDRESS, YarnConfiguration.DEFAULT_RM_PORT);
                    services.add(SecurityUtil.buildTokenService(address).toString());
                }
            } else {
                services.add(SecurityUtil.buildTokenService(YarnUtils.getRMAddress(configuration)).toString());
            }

            Token<TokenIdentifier> token = ConverterUtils.convertFromYarn(rmDelegationToken,
                    (InetSocketAddress) null);
            token.setService(new Text(Joiner.on(',').join(services)));
            credentials.addToken(new Text(token.getService()), token);

            // OK to log, it won't log the credential, only information about the token.
            LOG.info("Added RM delegation token: {}", token);

        } finally {
            yarnClient.stop();
        }

        return credentials;
    } catch (Exception e) {
        LOG.error("Failed to get secure token for Yarn.", e);
        throw Throwables.propagate(e);
    }
}

From source file:co.cask.cdap.dq.test.AggregationFunctionsTest.java

License:Apache License

@Test
public void uniqueValuesGenerateAggregationTest() throws Exception {
    DataQualityWritable val1 = new DataQualityWritable();
    DataQualityWritable val2 = new DataQualityWritable();
    DataQualityWritable val3 = new DataQualityWritable();

    val1.set(new Text("a"));
    val2.set(new Text("a"));
    val3.set(new Text("a"));

    UniqueValues uniqueValues = new UniqueValues();
    uniqueValues.add(val1);
    uniqueValues.add(val2);
    uniqueValues.add(val3);
    byte[] output = uniqueValues.aggregate();

    Assert.assertEquals("[a]", Bytes.toString(output));
}

From source file:co.cask.cdap.dq.test.AggregationFunctionsTest.java

License:Apache License

@Test
public void discreteValuesGenerateAggregationTest() throws Exception {
    DataQualityWritable val1 = new DataQualityWritable();
    DataQualityWritable val2 = new DataQualityWritable();
    DataQualityWritable val3 = new DataQualityWritable();

    val1.set(new Text("a"));
    val2.set(new Text("a"));
    val3.set(new Text("b"));

    DiscreteValuesHistogram discreteValuesHistogram = new DiscreteValuesHistogram();
    discreteValuesHistogram.add(val1);
    discreteValuesHistogram.add(val2);
    discreteValuesHistogram.add(val3);

    Map<String, Integer> expectedMap = Maps.newHashMap();
    expectedMap.put("a", 2);
    expectedMap.put("b", 1);
    byte[] outputVal = discreteValuesHistogram.aggregate();
    Map<String, Integer> outputMap = GSON.fromJson(Bytes.toString(outputVal), TOKEN_TYPE_MAP_STRING_INTEGER);
    Assert.assertEquals(expectedMap, outputMap);
}

From source file:co.cask.cdap.etl.batch.connector.ConnectorSink.java

License:Apache License

@Override
public void transform(StructuredRecord input, Emitter<KeyValue<NullWritable, Text>> emitter) throws Exception {
    if (writeSchema) {
        input = modifyRecord(input);/* w w w .ja  va 2s  .  c o m*/
    }
    emitter.emit(
            new KeyValue<>(NullWritable.get(), new Text(StructuredRecordStringConverter.toJsonString(input))));
}

From source file:co.cask.cdap.explore.security.HiveTokenUtils.java

License:Apache License

public static Credentials obtainToken(Credentials credentials) {
    ClassLoader hiveClassloader = ExploreServiceUtils.getExploreClassLoader();
    ClassLoader contextClassloader = Thread.currentThread().getContextClassLoader();
    Thread.currentThread().setContextClassLoader(hiveClassloader);

    try {/*from  w  w  w  . j a v  a  2  s . c o  m*/
        LOG.info("Obtaining delegation token for Hive");
        Class hiveConfClass = hiveClassloader.loadClass("org.apache.hadoop.hive.conf.HiveConf");
        Object hiveConf = hiveConfClass.newInstance();

        Class hiveClass = hiveClassloader.loadClass("org.apache.hadoop.hive.ql.metadata.Hive");
        @SuppressWarnings("unchecked")
        Method hiveGet = hiveClass.getMethod("get", hiveConfClass);
        Object hiveObject = hiveGet.invoke(null, hiveConf);

        String user = UserGroupInformation.getCurrentUser().getShortUserName();
        @SuppressWarnings("unchecked")
        Method getDelegationToken = hiveClass.getMethod("getDelegationToken", String.class, String.class);
        String tokenStr = (String) getDelegationToken.invoke(hiveObject, user, user);

        Token<DelegationTokenIdentifier> delegationToken = new Token<>();
        delegationToken.decodeFromUrlString(tokenStr);
        delegationToken.setService(new Text(HiveAuthFactory.HS2_CLIENT_TOKEN));
        LOG.info("Adding delegation token {} from MetaStore for service {} for user {}", delegationToken,
                delegationToken.getService(), user);
        credentials.addToken(delegationToken.getService(), delegationToken);
        return credentials;
    } catch (Exception e) {
        LOG.error("Exception when fetching delegation token from Hive MetaStore", e);
        throw Throwables.propagate(e);
    } finally {
        Thread.currentThread().setContextClassLoader(contextClassloader);
    }
}

From source file:co.cask.cdap.explore.security.JobHistoryServerTokenUtils.java

License:Apache License

/**
 * Gets a JHS delegation token and stores it in the given Credentials.
 *
 * @return the same Credentials instance as the one given in parameter.
 *///from  w w w  . j a v  a  2s  .  co  m
public static Credentials obtainToken(Configuration configuration, Credentials credentials) {
    if (!UserGroupInformation.isSecurityEnabled()) {
        return credentials;
    }

    String historyServerAddress = configuration.get("mapreduce.jobhistory.address");
    HostAndPort hostAndPort = HostAndPort.fromString(historyServerAddress);
    try {
        LOG.info("Obtaining delegation token for JHS");

        ResourceMgrDelegate resourceMgrDelegate = new ResourceMgrDelegate(new YarnConfiguration(configuration));
        MRClientCache clientCache = new MRClientCache(configuration, resourceMgrDelegate);
        MRClientProtocol hsProxy = clientCache.getInitializedHSProxy();
        GetDelegationTokenRequest request = new GetDelegationTokenRequestPBImpl();
        request.setRenewer(YarnUtils.getYarnTokenRenewer(configuration));

        InetSocketAddress address = new InetSocketAddress(hostAndPort.getHostText(), hostAndPort.getPort());
        Token<TokenIdentifier> token = ConverterUtils
                .convertFromYarn(hsProxy.getDelegationToken(request).getDelegationToken(), address);

        credentials.addToken(new Text(token.getService()), token);
        return credentials;
    } catch (Exception e) {
        LOG.error("Failed to get secure token for JHS at {}.", hostAndPort, e);
        throw Throwables.propagate(e);
    }
}

From source file:co.cask.cdap.hive.objectinspector.SimpleMapEqualComparerTest.java

License:Apache License

Object serializeAndDeserialize(TextStringMapHolder o1, StructObjectInspector oi1, LazySimpleSerDe serde,
        SerDeParameters serdeParams) throws IOException, SerDeException {
    ByteStream.Output serializeStream = new ByteStream.Output();
    LazySimpleSerDe.serialize(serializeStream, o1, oi1, serdeParams.getSeparators(), 0,
            serdeParams.getNullSequence(), serdeParams.isEscaped(), serdeParams.getEscapeChar(),
            serdeParams.getNeedsEscape());
    Text t = new Text(serializeStream.toByteArray());
    return serde.deserialize(t);
}