Use SGML paraser : sgmllib « XML « Python Tutorial

Home
Python Tutorial
1.Introduction
2.Data Type
3.Statement
4.Operator
5.String
6.Tuple
7.List
8.Dictionary
9.Collections
10.Function
11.Class
12.File
13.Buildin Function
14.Buildin Module
15.Database
16.Regular Expressions
17.Thread
18.Tkinker
19.wxPython
20.XML
21.Network
22.CGI Web
23.Windows
Python Tutorial » XML » sgmllib 
20.7.1.Use SGML paraser
import sgmllib
import string

filename = "index.html"
class CleanExit(Exception):
    pass

class Titlefinder(sgmllib.SGMLParser):
    def __init__(self, verbose=0):
        sgmllib.SGMLParser.__init__(self, verbose)
        self.title = self.data = None
    def start_title(self, attributes):
        self.data = []
    def end_title(self):
        self.title = string.join(self.data, "")
        raise CleanExit
    def handle_data(self, data):
        if self.data is not None:
            self.data.append(data)

def get_title(filehandle):
    Parser = Titlefinder()
    try:
        while 1:
            sgmldata = filehandle.read(1024)
            if not sgmldata:
                break
            Parser.feed(sgmldata)
        Parser.close()
    except CleanExit:
        return Parser.title
    return None

filehandle = open(filename)
title = get_title(filehandle)

print "The page's title is: %s" (title)
20.7.sgmllib
20.7.1.Use SGML paraser
java2s.com  | Contact Us | Privacy Policy
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.