Android Open Source - apps-android-commons Media Data Extractor






From Project

Back to project page apps-android-commons.

License

The source code is released under:

Apache License

If you think the Android project apps-android-commons listed in this page is inappropriate, such as containing malicious code/tools or violating the copyright, please email info at java2s dot com, thanks.

Java Source Code

package org.wikimedia.commons;
//from   w w  w.j a v a  2  s  .  c om
import android.util.Log;
import org.mediawiki.api.ApiResult;
import org.mediawiki.api.MWApi;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Fetch additional media data from the network that we don't store locally.
 *
 * This includes things like category lists and multilingual descriptions,
 * which are not intrinsic to the media and may change due to editing.
 */
public class MediaDataExtractor {
    private boolean fetched;
    private boolean processed;

    private String filename;
    private ArrayList<String> categories;
    private Map<String, String> descriptions;
    private String author;
    private Date date;
    private String license;
    private LicenseList licenseList;

    /**
     * @param filename of the target media object, should include 'File:' prefix
     */
    public MediaDataExtractor(String filename, LicenseList licenseList) {
        this.filename = filename;
        categories = new ArrayList<String>();
        descriptions = new HashMap<String, String>();
        fetched = false;
        processed = false;
        this.licenseList = licenseList;
    }

    /**
     * Actually fetch the data over the network.
     * todo: use local caching?
     *
     * Warning: synchronous i/o, call on a background thread
     */
    public void fetch() throws IOException {
        if (fetched) {
            throw new IllegalStateException("Tried to call MediaDataExtractor.fetch() again.");
        }

        MWApi api = CommonsApplication.createMWApi();
        ApiResult result = api.action("query")
                .param("prop", "revisions")
                .param("titles", filename)
                .param("rvprop", "content")
                .param("rvlimit", 1)
                .param("rvgeneratexml", 1)
                .get();

        processResult(result);
        fetched = true;
    }

    private void processResult(ApiResult result) throws IOException {

        String wikiSource = result.getString("/api/query/pages/page/revisions/rev");
        String parseTreeXmlSource = result.getString("/api/query/pages/page/revisions/rev/@parsetree");

        // In-page category links are extracted from source, as XML doesn't cover [[links]]
        extractCategories(wikiSource);

        // Description template info is extracted from preprocessor XML
        processWikiParseTree(parseTreeXmlSource);
    }

    /**
     * We could fetch all category links from API, but we actually only want the ones
     * directly in the page source so they're editable. In the future this may change.
     *
     * @param source wikitext source code
     */
    private void extractCategories(String source) {
        Pattern regex = Pattern.compile("\\[\\[\\s*Category\\s*:([^]]*)\\s*\\]\\]", Pattern.CASE_INSENSITIVE);
        Matcher matcher = regex.matcher(source);
        while (matcher.find()) {
            String cat = matcher.group(1).trim();
            categories.add(cat);
        }
    }

    private void processWikiParseTree(String source) throws IOException {
        Document doc;
        try {
            DocumentBuilder docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
            doc = docBuilder.parse(new ByteArrayInputStream(source.getBytes("UTF-8")));
        } catch (ParserConfigurationException e) {
            throw new RuntimeException(e);
        } catch (IllegalStateException e) {
            throw new IOException(e);
        } catch (SAXException e) {
            throw new IOException(e);
        }
        Node templateNode = findTemplate(doc.getDocumentElement(), "information");
        if (templateNode != null) {
            Node descriptionNode = findTemplateParameter(templateNode, "description");
            descriptions = getMultilingualText(descriptionNode);

            Node authorNode = findTemplateParameter(templateNode, "author");
            author = getFlatText(authorNode);
        }

        /*
        Pull up the license data list...
        look for the templates in two ways:
            * look for 'self' template and check its first parameter
            * if none, look for any of the known templates
         */
        Log.d("Commons", "MediaDataExtractor searching for license");
        Node selfLicenseNode = findTemplate(doc.getDocumentElement(), "self");
        if (selfLicenseNode != null) {
            Node firstNode = findTemplateParameter(selfLicenseNode, 1);
            String licenseTemplate = getFlatText(firstNode);
            License license = licenseList.licenseForTemplate(licenseTemplate);
            if (license == null) {
                Log.d("Commons", "MediaDataExtractor found no matching license for self parameter: " + licenseTemplate + "; faking it");
                this.license = licenseTemplate; // hack hack! For non-selectable licenses that are still in the system.
            } else {
                // fixme: record the self-ness in here too... sigh
                // all this needs better server-side metadata
                this.license = license.getKey();
                Log.d("Commons", "MediaDataExtractor found self-license " + this.license);
            }
        } else {
            for (License license : licenseList.values()) {
                String templateName = license.getTemplate();
                Node template = findTemplate(doc.getDocumentElement(), templateName);
                if (template != null) {
                    // Found!
                    this.license = license.getKey();
                    Log.d("Commons", "MediaDataExtractor found non-self license " + this.license);
                    break;
                }
            }
        }
    }

    private Node findTemplate(Element parentNode, String title) throws IOException {
        String ucTitle= Utils.capitalize(title);
        NodeList nodes = parentNode.getChildNodes();
        for (int i = 0; i < nodes.getLength(); i++) {
            Node node = nodes.item(i);
            if (node.getNodeName().equals("template")) {
                String foundTitle = getTemplateTitle(node);
                if (Utils.capitalize(foundTitle).equals(ucTitle)) {
                    return node;
                }
            }
        }
        return null;
    }

    private String getTemplateTitle(Node templateNode) throws IOException {
        NodeList nodes = templateNode.getChildNodes();
        for (int i = 0; i < nodes.getLength(); i++) {
            Node node = nodes.item(i);
            if (node.getNodeName().equals("title")) {
                return node.getTextContent().trim();
            }
        }
        throw new IOException("Template has no title element.");
    }

    private static abstract class TemplateChildNodeComparator {
        abstract public boolean match(Node node);
    }

    private Node findTemplateParameter(Node templateNode, String name) throws IOException {
        final String theName = name;
        return findTemplateParameter(templateNode, new TemplateChildNodeComparator() {
            @Override
            public boolean match(Node node) {
                return (Utils.capitalize(node.getTextContent().trim()).equals(Utils.capitalize(theName)));
            }
        });
    }

    private Node findTemplateParameter(Node templateNode, int index) throws IOException {
        final String theIndex = "" + index;
        return findTemplateParameter(templateNode, new TemplateChildNodeComparator() {
            @Override
            public boolean match(Node node) {
                Element el = (Element)node;
                if (el.getTextContent().trim().equals(theIndex)) {
                    return true;
                } else if (el.getAttribute("index") != null && el.getAttribute("index").trim().equals(theIndex)) {
                    return true;
                } else {
                    return false;
                }
            }
        });
    }

    private Node findTemplateParameter(Node templateNode, TemplateChildNodeComparator comparator) throws IOException {
        NodeList nodes = templateNode.getChildNodes();
        for (int i = 0; i < nodes.getLength(); i++) {
            Node node = nodes.item(i);
            if (node.getNodeName().equals("part")) {
                NodeList childNodes = node.getChildNodes();
                for (int j = 0; j < childNodes.getLength(); j++) {
                    Node childNode = childNodes.item(j);
                    if (childNode.getNodeName().equals("name")) {
                        if (comparator.match(childNode)) {
                            // yay! Now fetch the value node.
                            for (int k = j + 1; k < childNodes.getLength(); k++) {
                                Node siblingNode = childNodes.item(k);
                                if (siblingNode.getNodeName().equals("value")) {
                                    return siblingNode;
                                }
                            }
                            throw new IOException("No value node found for matched template parameter.");
                        }
                    }
                }
            }
        }
        throw new IOException("No matching template parameter node found.");
    }

    private String getFlatText(Node parentNode) throws IOException {
        return parentNode.getTextContent();
    }

    // Extract a dictionary of multilingual texts from a subset of the parse tree.
    // Texts are wrapped in things like {{en|foo} or {{en|1=foo bar}}.
    // Text outside those wrappers is stuffed into a 'default' faux language key if present.
    private Map<String, String> getMultilingualText(Node parentNode) throws IOException {
        Map<String, String> texts = new HashMap<String, String>();
        StringBuilder localText = new StringBuilder();

        NodeList nodes = parentNode.getChildNodes();
        for (int i = 0; i < nodes.getLength(); i++) {
            Node node = nodes.item(i);
            if (node.getNodeName().equals("template")) {
                // process a template node
                String title = getTemplateTitle(node);
                if (title.length() < 3) {
                    // Hopefully a language code. Nasty hack!
                    String lang = title;
                    Node valueNode = findTemplateParameter(node, 1);
                    String value = valueNode.getTextContent(); // hope there's no subtemplates or formatting for now
                    texts.put(lang, value);
                }
            } else if (node.getNodeType() == Node.TEXT_NODE) {
                localText.append(node.getTextContent());
            }
        }

        // Some descriptions don't list multilingual variants
        String defaultText = localText.toString().trim();
        if (defaultText.length() > 0) {
            texts.put("default", localText.toString());
        }
        return texts;
    }

    /**
     * Take our metadata and inject it into a live Media object.
     * Media object might contain stale or cached data, or emptiness.
     * @param media
     */
    public void fill(Media media) {
        if (!fetched) {
            throw new IllegalStateException("Tried to call MediaDataExtractor.fill() before fetch().");
        }

        media.setCategories(categories);
        media.setDescriptions(descriptions);
        if (license != null) {
            media.setLicense(license);
        }

        // add author, date, etc fields
    }
}




Java Source Code List

com.gst_sdk.GStreamer.java
com.gst_sdk.GStreamer.java
org.wikimedia.commons.AboutActivity.java
org.wikimedia.commons.CommonsApplication.java
org.wikimedia.commons.EventLog.java
org.wikimedia.commons.HandlerService.java
org.wikimedia.commons.LicenseList.java
org.wikimedia.commons.License.java
org.wikimedia.commons.MediaDataExtractor.java
org.wikimedia.commons.MediaWikiImageView.java
org.wikimedia.commons.Media.java
org.wikimedia.commons.Prefs.java
org.wikimedia.commons.SettingsActivity.java
org.wikimedia.commons.Utils.java
org.wikimedia.commons.WelcomeActivity.java
org.wikimedia.commons.api.MWApi.java
org.wikimedia.commons.auth.AuthenticatedActivity.java
org.wikimedia.commons.auth.LoginActivity.java
org.wikimedia.commons.auth.WikiAccountAuthenticatorService.java
org.wikimedia.commons.auth.WikiAccountAuthenticator.java
org.wikimedia.commons.campaigns.CampaignActivity.java
org.wikimedia.commons.campaigns.CampaignContribution.java
org.wikimedia.commons.campaigns.Campaign.java
org.wikimedia.commons.campaigns.CampaignsContentProvider.java
org.wikimedia.commons.campaigns.CampaignsListAdapter.java
org.wikimedia.commons.campaigns.CampaignsSyncAdapter.java
org.wikimedia.commons.campaigns.CampaignsSyncService.java
org.wikimedia.commons.category.CategorizationFragment.java
org.wikimedia.commons.category.CategoryContentProvider.java
org.wikimedia.commons.category.Category.java
org.wikimedia.commons.contributions.ContributionController.java
org.wikimedia.commons.contributions.ContributionViewHolder.java
org.wikimedia.commons.contributions.Contribution.java
org.wikimedia.commons.contributions.ContributionsActivity.java
org.wikimedia.commons.contributions.ContributionsContentProvider.java
org.wikimedia.commons.contributions.ContributionsListAdapter.java
org.wikimedia.commons.contributions.ContributionsListFragment.java
org.wikimedia.commons.contributions.ContributionsSyncAdapter.java
org.wikimedia.commons.contributions.ContributionsSyncService.java
org.wikimedia.commons.contributions.MediaListAdapter.java
org.wikimedia.commons.data.DBOpenHelper.java
org.wikimedia.commons.media.CategoryImagesLoader.java
org.wikimedia.commons.media.MediaDetailFragment.java
org.wikimedia.commons.media.MediaDetailPagerFragment.java
org.wikimedia.commons.media.MediaDetailSpacer.java
org.wikimedia.commons.modifications.CategoryModifier.java
org.wikimedia.commons.modifications.ModificationsContentProvider.java
org.wikimedia.commons.modifications.ModificationsSyncAdapter.java
org.wikimedia.commons.modifications.ModificationsSyncService.java
org.wikimedia.commons.modifications.ModifierSequence.java
org.wikimedia.commons.modifications.PageModifier.java
org.wikimedia.commons.modifications.TemplateRemoveModifier.java
org.wikimedia.commons.upload.MultipleShareActivity.java
org.wikimedia.commons.upload.MultipleUploadListFragment.java
org.wikimedia.commons.upload.ShareActivity.java
org.wikimedia.commons.upload.SingleUploadFragment.java
org.wikimedia.commons.upload.UploadController.java
org.wikimedia.commons.upload.UploadService.java