WebExtractHandler.java :  » HTML-Parser » HTMLParser2 » org » vietspider » html » renderer » extractor » Java Open Source

Java Open Source » HTML Parser » HTMLParser2 
HTMLParser2 » org » vietspider » html » renderer » extractor » WebExtractHandler.java
/***************************************************************************
 * Copyright 2001-2009 The VietSpider         All rights reserved.       *
 **************************************************************************/
package org.vietspider.html.renderer.extractor;

import org.vietspider.html.HTMLNode;
import org.vietspider.html.renderer.checker.ContentChecker;

/** 
 * Author : Nhu Dinh Thuan
 *          nhudinhthuan@yahoo.com
 * Feb 10, 2009  
 */
public class WebExtractHandler {
  
  protected ContentChecker contentChecker = new ContentChecker();
  
  public void removeNode(HTMLNode node) {
    if(node == null) return;
    HTMLNode parent = node.getParent();
    if(parent == null || !parent.hasChildren()) return ;
    parent.removeChild(node);
//    if(parent.getChildren().size() < 1) removeNode(parent);
  }
  
  protected boolean isBlockElement(HTMLNode node) {
    switch (node.getName()) {
    case DIV:
    case TABLE:
      return true;
    default:
      return false;
    }
  }
  
  protected boolean isTextElement(HTMLNode node) {
    switch (node.getName()) {
    case CONTENT:
    case SPAN:
    case P:
      return contentChecker.isTextBlock(node, false, 10, 2);
    default:
      return false;
    }
  }
}
java2s.com  | Contact Us | Privacy Policy
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.