Python自带网页解析器
from html.parser import HTMLParserfrom html.entities import name2codepoint
class MyHTMLParser(HTMLParser):
def handle_starttag(self, tag, attrs):
print("Start tag:", tag)
for attr in attrs:
print(" attr:", attr)
def handle_endtag(self, tag):
print("End tag:", tag)
def handle_data(self, data):
print("Data :", data)
def handle_comment(self, data):
print("Comment:", data)
def handle_entityref(self, name):
c = chr(name2codepoint)
print("Named ent:", c)
def handle_charref(self, name):
if name.startswith('x'):
c = chr(int(name, 16))
else:
c = chr(int(name))
print("Num ent:", c)
def handle_decl(self, data):
print("Decl :", data)
parser = MyHTMLParser()
页:
[1]