org.apache.pdfbox.pdmodel.common.PDPageLabels.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.pdfbox.pdmodel.common.PDPageLabels.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pdfbox.pdmodel.common;

import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.TreeMap;
import java.util.Map.Entry;
import java.util.NavigableSet;
import java.util.TreeSet;

import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;

/**
 * Represents the page label dictionary of a document.
 * 
 * @author Igor Podolskiy
 */
public class PDPageLabels implements COSObjectable {

    private Map<Integer, PDPageLabelRange> labels;

    private PDDocument doc;

    /**
     * Creates an empty page label dictionary for the given document.
     * 
     * <p>
     * Note that the page label dictionary won't be automatically added to the
     * document; you will still need to do it manually (see
     * {@link org.apache.pdfbox.pdmodel.PDDocumentCatalog#setPageLabels(PDPageLabels)}.
     * </p>
     * 
     * @param document
     *            The document the page label dictionary is created for.
     * @see org.apache.pdfbox.pdmodel.PDDocumentCatalog#setPageLabels(PDPageLabels)
     */
    public PDPageLabels(PDDocument document) {
        labels = new TreeMap<>();
        this.doc = document;
        PDPageLabelRange defaultRange = new PDPageLabelRange();
        defaultRange.setStyle(PDPageLabelRange.STYLE_DECIMAL);
        labels.put(0, defaultRange);
    }

    /**
     * Creates an page label dictionary for a document using the information in
     * the given COS dictionary.
     * 
     * <p>
     * Note that the page label dictionary won't be automatically added to the
     * document; you will still need to do it manually (see
     * {@link org.apache.pdfbox.pdmodel.PDDocumentCatalog#setPageLabels(PDPageLabels)}.
     * </p>
     * 
     * @param document
     *            The document the page label dictionary is created for.
     * @param dict
     *            an existing page label dictionary
     * @see org.apache.pdfbox.pdmodel.PDDocumentCatalog#setPageLabels(PDPageLabels)
     * @throws IOException
     *             If something goes wrong during the number tree conversion.
     */
    public PDPageLabels(PDDocument document, COSDictionary dict) throws IOException {
        this(document);
        if (dict == null) {
            return;
        }
        PDNumberTreeNode root = new PDNumberTreeNode(dict, PDPageLabelRange.class);
        findLabels(root);
    }

    private void findLabels(PDNumberTreeNode node) throws IOException {
        if (node.getKids() != null) {
            List<PDNumberTreeNode> kids = node.getKids();
            for (PDNumberTreeNode kid : kids) {
                findLabels(kid);
            }
        } else if (node.getNumbers() != null) {
            Map<Integer, COSObjectable> numbers = node.getNumbers();
            for (Entry<Integer, COSObjectable> i : numbers.entrySet()) {
                if (i.getKey() >= 0) {
                    labels.put(i.getKey(), (PDPageLabelRange) i.getValue());
                }
            }
        }
    }

    /**
     * Returns the number of page label ranges.
     * 
     * <p>
     * This will be always &gt;= 1, as the required default entry for the page
     * range starting at the first page is added automatically by this
     * implementation (see PDF32000-1:2008, p. 375).
     * </p>
     * 
     * @return the number of page label ranges.
     */
    public int getPageRangeCount() {
        return labels.size();
    }

    /**
     * Returns the page label range starting at the given page, or {@code null}
     * if no such range is defined.
     * 
     * @param startPage
     *            the 0-based page index representing the start page of the page
     *            range the item is defined for.
     * @return the page label range or {@code null} if no label range is defined
     *         for the given start page.
     */
    public PDPageLabelRange getPageLabelRange(int startPage) {
        return labels.get(startPage);
    }

    /**
     * Sets the page label range beginning at the specified start page.
     * 
     * @param startPage
     *            the 0-based index of the page representing the start of the
     *            page label range.
     * @param item
     *            the page label item to set.
     * @throws IllegalArgumentException if the startPage parameter is &lt; 0.
     */
    public void setLabelItem(int startPage, PDPageLabelRange item) {
        if (startPage < 0) {
            throw new IllegalArgumentException("startPage parameter of setLabelItem may not be < 0");
        }
        labels.put(startPage, item);
    }

    /**
     * {@inheritDoc} 
     */
    @Override
    public COSBase getCOSObject() {
        COSDictionary dict = new COSDictionary();
        COSArray arr = new COSArray();
        for (Entry<Integer, PDPageLabelRange> i : labels.entrySet()) {
            arr.add(COSInteger.get(i.getKey()));
            arr.add(i.getValue());
        }
        dict.setItem(COSName.NUMS, arr);
        return dict;
    }

    /**
     * Returns a mapping with computed page labels as keys and corresponding
     * 0-based page indices as values. The returned map will contain at most as
     * much entries as the document has pages.
     * 
     * <p>
     * <strong>NOTE:</strong> If the document contains duplicate page labels,
     * the returned map will contain <em>less</em> entries than the document has
     * pages. The page index returned in this case is the <em>highest</em> index
     * among all pages sharing the same label.
     * </p>
     * 
     * @return a mapping from labels to 0-based page indices.
     */
    public Map<String, Integer> getPageIndicesByLabels() {
        final Map<String, Integer> labelMap = new HashMap<>(doc.getNumberOfPages());
        computeLabels((pageIndex, label) -> labelMap.put(label, pageIndex));
        return labelMap;
    }

    /**
     * Returns a mapping with 0-based page indices as keys and corresponding
     * page labels as values as an array. The array will have exactly as much
     * entries as the document has pages.
     * 
     * @return an array mapping from 0-based page indices to labels.
     */
    public String[] getLabelsByPageIndices() {
        final String[] map = new String[doc.getNumberOfPages()];
        computeLabels((pageIndex, label) -> {
            if (pageIndex < doc.getNumberOfPages()) {
                map[pageIndex] = label;
            }
        });
        return map;
    }

    /**
     * Get an ordered set of page indices having a page label range.
     *
     * @return set of page indices.
     */
    public NavigableSet<Integer> getPageIndices() {
        return new TreeSet<>(labels.keySet());
    }

    /**
     * Internal interface for the control flow support.
     * 
     * @author Igor Podolskiy
     */
    private interface LabelHandler {
        void newLabel(int pageIndex, String label);
    }

    private void computeLabels(LabelHandler handler) {
        Iterator<Entry<Integer, PDPageLabelRange>> iterator = labels.entrySet().iterator();
        if (!iterator.hasNext()) {
            return;
        }
        int pageIndex = 0;
        Entry<Integer, PDPageLabelRange> lastEntry = iterator.next();
        while (iterator.hasNext()) {
            Entry<Integer, PDPageLabelRange> entry = iterator.next();
            int numPages = entry.getKey() - lastEntry.getKey();
            LabelGenerator gen = new LabelGenerator(lastEntry.getValue(), numPages);
            while (gen.hasNext()) {
                handler.newLabel(pageIndex, gen.next());
                pageIndex++;
            }
            lastEntry = entry;
        }
        LabelGenerator gen = new LabelGenerator(lastEntry.getValue(), doc.getNumberOfPages() - lastEntry.getKey());
        while (gen.hasNext()) {
            handler.newLabel(pageIndex, gen.next());
            pageIndex++;
        }
    }

    /**
     * Generates the labels in a page range.
     * 
     * @author Igor Podolskiy
     * 
     */
    private static class LabelGenerator implements Iterator<String> {
        private final PDPageLabelRange labelInfo;
        private final int numPages;
        private int currentPage;

        LabelGenerator(PDPageLabelRange label, int pages) {
            this.labelInfo = label;
            this.numPages = pages;
            this.currentPage = 0;
        }

        @Override
        public boolean hasNext() {
            return currentPage < numPages;
        }

        @Override
        public String next() {
            if (!hasNext()) {
                throw new NoSuchElementException();
            }
            StringBuilder buf = new StringBuilder();
            if (labelInfo.getPrefix() != null) {
                String label = labelInfo.getPrefix();
                // there may be some labels with some null bytes at the end
                // which will lead to an incomplete output, see PDFBOX-1047
                while (label.lastIndexOf(0) != -1) {
                    label = label.substring(0, label.length() - 1);
                }
                buf.append(label);
            }
            if (labelInfo.getStyle() != null) {
                buf.append(getNumber(labelInfo.getStart() + currentPage, labelInfo.getStyle()));
            }
            currentPage++;
            return buf.toString();
        }

        private String getNumber(int pageIndex, String style) {
            if (style != null) {
                switch (style) {
                case PDPageLabelRange.STYLE_DECIMAL:
                    return Integer.toString(pageIndex);
                case PDPageLabelRange.STYLE_LETTERS_LOWER:
                    return makeLetterLabel(pageIndex);
                case PDPageLabelRange.STYLE_LETTERS_UPPER:
                    return makeLetterLabel(pageIndex).toUpperCase();
                case PDPageLabelRange.STYLE_ROMAN_LOWER:
                    return makeRomanLabel(pageIndex);
                case PDPageLabelRange.STYLE_ROMAN_UPPER:
                    return makeRomanLabel(pageIndex).toUpperCase();
                default:
                    break;
                }
            }
            // Fall back to decimals.
            return Integer.toString(pageIndex);
        }

        /**
         * Lookup table used by the {@link #makeRomanLabel(int)} method.
         */
        private static final String[][] ROMANS = new String[][] {
                { "", "i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix" },
                { "", "x", "xx", "xxx", "xl", "l", "lx", "lxx", "lxxx", "xc" },
                { "", "c", "cc", "ccc", "cd", "d", "dc", "dcc", "dccc", "cm" } };

        private static String makeRomanLabel(int pageIndex) {
            StringBuilder buf = new StringBuilder();
            int power = 0;
            while (power < 3 && pageIndex > 0) {
                buf.insert(0, ROMANS[power][pageIndex % 10]);
                pageIndex /= 10;
                power++;
            }
            // Prepend as many m as there are thousands (which is
            // incorrect by the roman numeral rules for numbers > 3999,
            // but is unbounded and Adobe Acrobat does it this way).
            // This code is somewhat inefficient for really big numbers,
            // but those don't occur too often (and the numbers in those cases
            // would be incomprehensible even if we and Adobe
            // used strict Roman rules).
            for (int i = 0; i < pageIndex; i++) {
                buf.insert(0, 'm');
            }
            return buf.toString();
        }

        /**
         * a..z, aa..zz, aaa..zzz ... labeling as described in PDF32000-1:2008,
         * Table 159, Page 375.
         */
        private static String makeLetterLabel(int num) {
            StringBuilder buf = new StringBuilder();
            int numLetters = num / 26 + Integer.signum(num % 26);
            int letter = num % 26 + 26 * (1 - Integer.signum(num % 26)) + 'a' - 1;
            for (int i = 0; i < numLetters; i++) {
                buf.appendCodePoint(letter);
            }
            return buf.toString();
        }

        @Override
        public void remove() {
            // This is a generator, no removing allowed.
            throw new UnsupportedOperationException();
        }
    }
}