Java tutorial
/* * Copyright (C) 2015 hu * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ package cn.edu.hfut.dmic.webcollector.model; //import cn.edu.hfut.dmic.webcollector.util.CrawlDatumFormater; import cn.edu.hfut.dmic.webcollector.util.CrawlDatumFormater; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.util.HashMap; import java.util.Map.Entry; import org.apache.hadoop.io.Writable; /** * ?? * @author hu */ public class CrawlDatum implements Writable { public final static int STATUS_DB_UNFETCHED = 0; public final static int STATUS_DB_INJECT = 1; public final static int STATUS_DB_FORCED_INJECT = 2; public final static int STATUS_DB_FETCHED = 5; private String url = null; private long fetchTime = System.currentTimeMillis(); private int httpCode = -1; private int status = STATUS_DB_UNFETCHED; private int retry = 0; /** * WebCollector 2.5????URL??key? * ?getKey()CrawlDatumkey,keynull,getKey()URL * ?keyURL?key? */ private String key = null; /** * WebCollector 2.5???CrawlDatum?metaData * ????? * visit?page.getMetaData()?CrawlDatummetaData */ private HashMap<String, String> metaData = new HashMap<String, String>(); public CrawlDatum() { } public CrawlDatum(String url) { this.url = url; } public CrawlDatum(String url, String[] metas) throws Exception { this(url); if (metas.length % 2 != 0) { throw new Exception("length of metas must be even"); } else { for (int i = 0; i < metas.length; i += 2) { putMetaData(metas[i * 2], metas[i * 2 + 1]); } } } public int incrRetry(int count) { retry = retry + count; return retry; } public int getHttpCode() { return httpCode; } public void setHttpCode(int httpCode) { this.httpCode = httpCode; } public String getUrl() { return url; } public CrawlDatum setUrl(String url) { this.url = url; return this; } public long getFetchTime() { return fetchTime; } public void setFetchTime(long fetchTime) { this.fetchTime = fetchTime; } public int getRetry() { return retry; } public void setRetry(int retry) { this.retry = retry; } public int getStatus() { return status; } public void setStatus(int status) { this.status = status; } public HashMap<String, String> getMetaData() { return metaData; } public void setMetaData(HashMap<String, String> metaData) { this.metaData = metaData; } public CrawlDatum putMetaData(String key, String value) { this.metaData.put(key, value); return this; } public String getMetaData(String key) { return this.metaData.get(key); } public String getKey() { if (key == null) { return getUrl(); } else { return key; } } public CrawlDatum setKey(String key) { this.key = key; return this; } @Override public String toString() { return CrawlDatumFormater.datumToString(this); } @Override public void write(DataOutput d) throws IOException { d.writeUTF(getKey()); d.writeUTF(url); d.writeInt(status); d.writeLong(fetchTime); d.writeInt(httpCode); d.writeInt(retry); int metaLen = metaData.size(); d.writeInt(metaLen); for (Entry<String, String> entry : metaData.entrySet()) { d.writeUTF(entry.getKey()); d.writeUTF(entry.getValue()); } } @Override public void readFields(DataInput di) throws IOException { key = di.readUTF(); url = di.readUTF(); status = di.readInt(); fetchTime = di.readLong(); httpCode = di.readInt(); retry = di.readInt(); int metaLen = di.readInt(); for (int i = 0; i < metaLen; i++) { String key = di.readUTF(); String value = di.readUTF(); metaData.put(key, value); } } public CrawlDatum copy() { CrawlDatum datum = new CrawlDatum(url); datum.setKey(key); datum.setStatus(status); datum.setFetchTime(fetchTime); datum.setHttpCode(httpCode); for (Entry<String, String> entry : metaData.entrySet()) { datum.putMetaData(entry.getKey(), entry.getValue()); } return datum; } }