com.thinkbiganalytics.nifi.v2.ingest.StripHeader.java Source code

Java tutorial

Introduction

Here is the source code for com.thinkbiganalytics.nifi.v2.ingest.StripHeader.java

Source

package com.thinkbiganalytics.nifi.v2.ingest;

/*-
 * #%L
 * thinkbig-nifi-core-processors
 * %%
 * Copyright (C) 2017 ThinkBig Analytics
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */

import com.thinkbiganalytics.ingest.StripHeaderSupport;
import com.thinkbiganalytics.nifi.processor.AbstractNiFiProcessor;

import org.apache.commons.lang3.mutable.MutableLong;
import org.apache.nifi.annotation.behavior.EventDriven;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
import org.apache.nifi.annotation.behavior.SideEffectFree;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.ProcessorInitializationContext;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.util.StandardValidators;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

@EventDriven
@SideEffectFree
@Tags({ "header", "text" })
@InputRequirement(Requirement.INPUT_REQUIRED)
@CapabilityDescription("Splits a text file(s) content from its header. The content of the header is passed through a separate relationship for validation")
public class StripHeader extends AbstractNiFiProcessor {

    public static final PropertyDescriptor ENABLED = new PropertyDescriptor.Builder().name("Enable processing")
            .description("Whether to strip the header").required(true)
            .addValidator(StandardValidators.BOOLEAN_VALIDATOR).defaultValue("false")
            .expressionLanguageSupported(true).build();

    public static final PropertyDescriptor HEADER_LINE_COUNT = new PropertyDescriptor.Builder()
            .name("Header Line Count")
            .description("The number of lines that should be considered part of the header").required(true)
            .addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR).defaultValue("1")
            .expressionLanguageSupported(true).build();

    public static final Relationship REL_ORIGINAL = new Relationship.Builder().name("original")
            .description("The original input file will be routed to this destination").build();
    public static final Relationship REL_CONTENT = new Relationship.Builder().name("content")
            .description("The content (stripped of header if enabled) will be routed to this destination").build();
    public static final Relationship REL_HEADER = new Relationship.Builder().name("header")
            .description("The header will be routed to this destination when header is stripped").build();
    public static final Relationship REL_FAILURE = new Relationship.Builder().name("failure").description(
            "If a file cannot be split for some reason, the original file will be routed to this destination and nothing will be routed elsewhere")
            .build();

    private List<PropertyDescriptor> properties;
    private Set<Relationship> relationships;

    @Override
    protected void init(final ProcessorInitializationContext context) {
        super.init(context);

        final List<PropertyDescriptor> properties = new ArrayList<>();
        properties.add(ENABLED);
        properties.add(HEADER_LINE_COUNT);

        this.properties = Collections.unmodifiableList(properties);

        final Set<Relationship> relationships = new HashSet<>();
        relationships.add(REL_ORIGINAL);
        relationships.add(REL_CONTENT);
        relationships.add(REL_HEADER);
        relationships.add(REL_FAILURE);
        this.relationships = Collections.unmodifiableSet(relationships);
    }

    @Override
    public Set<Relationship> getRelationships() {
        return relationships;
    }

    @Override
    protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
        return properties;
    }

    @Override
    public void onTrigger(final ProcessContext context, final ProcessSession session) {
        final StripHeaderSupport headerSupport = new StripHeaderSupport();
        final FlowFile flowFile = session.get();
        if (flowFile == null) {
            return;
        }

        final boolean isEnabled = context.getProperty(ENABLED).evaluateAttributeExpressions(flowFile).asBoolean();
        final int headerCount = context.getProperty(HEADER_LINE_COUNT).evaluateAttributeExpressions(flowFile)
                .asInteger();

        // Empty files and no work to do will simply pass along content
        if (!isEnabled || headerCount == 0 || flowFile.getSize() == 0L) {
            final FlowFile contentFlowFile = session.clone(flowFile);
            session.transfer(contentFlowFile, REL_CONTENT);
            session.transfer(flowFile, REL_ORIGINAL);
            return;
        }

        final MutableLong headerBoundaryInBytes = new MutableLong(-1);

        session.read(flowFile, false, rawIn -> {
            try {
                // Identify the byte boundary of the header
                long bytes = headerSupport.findHeaderBoundary(headerCount, rawIn);
                headerBoundaryInBytes.setValue(bytes);

                if (bytes < 0) {
                    getLog().error("Unable to strip header {} expecting at least {} lines in file",
                            new Object[] { flowFile, headerCount });
                }

            } catch (IOException e) {
                getLog().error("Unable to strip header {} due to {}; routing to failure",
                        new Object[] { flowFile, e.getLocalizedMessage() }, e);
            }

        });

        long headerBytes = headerBoundaryInBytes.getValue();
        if (headerBytes < 0) {
            session.transfer(flowFile, REL_FAILURE);
        } else {
            // Transfer header
            final FlowFile headerFlowFile = session.clone(flowFile, 0, headerBytes);
            session.transfer(headerFlowFile, REL_HEADER);

            // Transfer content
            long contentBytes = flowFile.getSize() - headerBytes;
            final FlowFile contentFlowFile = session.clone(flowFile, headerBytes, contentBytes);
            session.transfer(contentFlowFile, REL_CONTENT);

            session.transfer(flowFile, REL_ORIGINAL);
        }
    }
}