Java URL Connection getHTML(String url, boolean removeNonLatinChars)

Here you can find the source of getHTML(String url, boolean removeNonLatinChars)

Description

get HTML

License

Open Source License

Declaration

public static String getHTML(String url, boolean removeNonLatinChars) throws IOException 

Method Source Code


//package com.java2s;
/*//from  ww w.  j av  a2s.com
 * Copyright (C) 2014 desrever <desrever at nubits.com>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */

import java.io.BufferedReader;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;

import java.net.URL;
import java.net.URLConnection;

public class Main {
    public static String getHTML(String url, boolean removeNonLatinChars) throws IOException {
        String line = "", all = "";
        URL myUrl = null;
        BufferedReader br = null;
        try {
            myUrl = new URL(url);

            URLConnection con = myUrl.openConnection();
            con.setConnectTimeout(1000 * 8);
            con.setReadTimeout(1000 * 8);
            InputStream in = con.getInputStream();

            br = new BufferedReader(new InputStreamReader(in));

            while ((line = br.readLine()) != null) {
                all += line;
            }
        } finally {
            if (br != null) {
                br.close();
            }
        }

        if (removeNonLatinChars) {
            all = all.replaceAll("[^\\x00-\\x7F]", "");
        }
        return all;
    }
}

Related

  1. getFeedReader(URL feedUrl)
  2. getFromUrl(String url)
  3. getGlobalAddress(String url)
  4. getGlobalIPAddress(URL automationPage)
  5. getHeaderFieldLong(URLConnection conn, String name, long Default)
  6. getHTML(URL url)
  7. getHttpGetContent(String strUrl, String charSet)
  8. getHttpHeaders(URLConnection connection)
  9. getHttpResponseHeader(URLConnection http)