Java URL Read getUrlInfos(String urlAsString, int timeout)

Here you can find the source of getUrlInfos(String urlAsString, int timeout)

Description

get Url Infos

License

Apache License

Declaration

public static String[] getUrlInfos(String urlAsString, int timeout) 

Method Source Code

//package com.java2s;
/**//from   w  ww.j a  v  a2  s .  c o m
 * Copyright (C) 2010 Peter Karich <jetwick_@_pannous_._info>
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.BufferedInputStream;

import java.net.HttpURLConnection;
import java.net.Proxy;
import java.net.URL;

public class Main {
    final static String DESCRIPTION = "<meta name=\"description\" content=\"";
    final static String DESCRIPTION2 = "<meta name=\"Description\" content=\"";

    public static String[] getUrlInfos(String urlAsString, int timeout) {
        try {
            URL url = new URL(urlAsString);
            //using proxy may increase latency
            HttpURLConnection hConn = (HttpURLConnection) url
                    .openConnection(Proxy.NO_PROXY);
            hConn.setRequestProperty("User-Agent",
                    "Mozilla/5.0 Gecko/20100915 Firefox/3.6.10");
            hConn.setConnectTimeout(timeout);
            hConn.setReadTimeout(timeout);
            // default length of bufferedinputstream is 8k
            byte[] arr = new byte[4096];
            BufferedInputStream in = new BufferedInputStream(
                    hConn.getInputStream(), arr.length);
            in.read(arr);
            return getUrlInfosFromText(arr);
        } catch (Exception ex) {
        }
        return new String[] { "", "" };
    }

    /**
     * Returns title and description of a specified string (as byte array)
     */
    public static String[] getUrlInfosFromText(byte[] arr) {
        String res = new String(arr);
        int index = getStartTitleEndPos(res);
        if (index < 0)
            return new String[] { "", "" };

        int encIndex = res.indexOf("charset=");
        if (encIndex > 0) {
            int lastEncIndex = res.indexOf("\"", encIndex + 8);

            // if we have charset="something"
            if (lastEncIndex == encIndex + 8)
                lastEncIndex = res.indexOf("\"", ++encIndex + 8);

            if (lastEncIndex > encIndex + 8) {
                String encoding = res.substring(encIndex + 8, lastEncIndex);
                try {
                    res = new String(arr, encoding);
                    index = getStartTitleEndPos(res);
                    if (index < 0)
                        return new String[] { "", "" };
                } catch (Exception ex) {
                }
            }
        }

        int lastIndex = res.indexOf("</title>");
        if (lastIndex <= index)
            return new String[] { "", "" };

        String title = res.substring(index, lastIndex);
        index = res.indexOf(DESCRIPTION);
        if (index < 0)
            index = res.indexOf(DESCRIPTION2);

        lastIndex = res.indexOf("\"", index + DESCRIPTION.length());
        if (index < 0 || lastIndex < 0)
            return new String[] { title, "" };

        index += DESCRIPTION.length();
        return new String[] { title, res.substring(index, lastIndex) };
    }

    public static int getStartTitleEndPos(String res) {
        int index = res.indexOf("<title>");
        if (index < 0) {
            index = res.indexOf("<title ");
            if (index < 0)
                return -1;

            index = res.indexOf(">", index);
            if (index >= 0)
                index++;
        } else
            index += "<title>".length();

        return index;
    }
}

Related

  1. getURL(URL url, String params)
  2. getUrlContent(String url)
  3. getURLContent_old(final String uri, final StringBuffer content)
  4. getUrlContentWithRetries(String url, long timeoutMs, long retryDelayMs)
  5. getUrlFollowingRedirects(String possibleRedirectionUrl)
  6. getUrlSource(String url)
  7. getUrlStatus(String url)
  8. getUrlTxt(String url)
  9. readAsString(final URL url)