diff options
author | Gunnar Wrobel <wrobel@gentoo.org> | 2006-09-24 08:28:38 +0000 |
---|---|---|
committer | Gunnar Wrobel <wrobel@gentoo.org> | 2006-09-24 08:28:38 +0000 |
commit | 017aa77364b3174d8037b68e5541c631eb74a444 (patch) | |
tree | 5dc7a57a5b45e6ae90db80c44302aa17fac07f9c | |
parent | Updated pardus library to 0.9. Bug fix 2. (diff) | |
download | overlay-017aa77364b3174d8037b68e5541c631eb74a444.tar.gz overlay-017aa77364b3174d8037b68e5541c631eb74a444.tar.bz2 overlay-017aa77364b3174d8037b68e5541c631eb74a444.zip |
Added modified autoping.py
svn path=/stable/; revision=771
-rw-r--r-- | www-apps/pyblosxom-plugins/Manifest | 12 | ||||
-rwxr-xr-x | www-apps/pyblosxom-plugins/files/autoping.py | 252 | ||||
-rw-r--r-- | www-apps/pyblosxom-plugins/pyblosxom-plugins-1.3.2.ebuild | 1 |
3 files changed, 261 insertions, 4 deletions
diff --git a/www-apps/pyblosxom-plugins/Manifest b/www-apps/pyblosxom-plugins/Manifest index 8a28a91..ad77504 100644 --- a/www-apps/pyblosxom-plugins/Manifest +++ b/www-apps/pyblosxom-plugins/Manifest @@ -1,3 +1,7 @@ +AUX autoping.py 8211 RMD160 c55abdb05fe28e3857ed6006310466b075986fec SHA1 e462cc942be77ca390019c6f6b7fbb5b41e370ac SHA256 86ece2a6b5d38ed3b4fd02b9fad098418b7120dd38d7e517f0133c2eb32a7918 +MD5 9c27438017d0539f56b59799ca242d73 files/autoping.py 8211 +RMD160 c55abdb05fe28e3857ed6006310466b075986fec files/autoping.py 8211 +SHA256 86ece2a6b5d38ed3b4fd02b9fad098418b7120dd38d7e517f0133c2eb32a7918 files/autoping.py 8211 AUX blocks.py 6567 RMD160 471467203f5f06cc9e48898f1f26aa6781593386 SHA1 b5c6cd8ee99acaaef2d5ceb1294c9893f26294cb SHA256 ff6b9848689b897a280580305901f1295ea1d4abe7484d098b83d9de0f1c622a MD5 f67ef44f565d2cfa49ce8e728267081b files/blocks.py 6567 RMD160 471467203f5f06cc9e48898f1f26aa6781593386 files/blocks.py 6567 @@ -74,10 +78,10 @@ AUX xmlrpc_pingback.py 5696 RMD160 5454bc762bdf5e4bfc41e1a7600f7487de4111e0 SHA1 MD5 9d1afb745bb12ea8b1ab7a2ab2be7e6a files/xmlrpc_pingback.py 5696 RMD160 5454bc762bdf5e4bfc41e1a7600f7487de4111e0 files/xmlrpc_pingback.py 5696 SHA256 c71abde791d91e0ebabbc52af83a2704b74429f7fbb18ec3441c8d7a9688d876 files/xmlrpc_pingback.py 5696 -EBUILD pyblosxom-plugins-1.3.2.ebuild 1128 RMD160 75561d056473acfcf34d3e22ed2a54ebef205ad6 SHA1 7806a8ab9dfd588c4373c458fd50efac2d6c23e3 SHA256 2d8cfa1f7235c0798b52a73b5dbcd9d028415ff46a45935b977b2cb25ded1437 -MD5 3a1cd48b2ee5f5175cf74f8ad606ad13 pyblosxom-plugins-1.3.2.ebuild 1128 -RMD160 75561d056473acfcf34d3e22ed2a54ebef205ad6 pyblosxom-plugins-1.3.2.ebuild 1128 -SHA256 2d8cfa1f7235c0798b52a73b5dbcd9d028415ff46a45935b977b2cb25ded1437 pyblosxom-plugins-1.3.2.ebuild 1128 +EBUILD pyblosxom-plugins-1.3.2.ebuild 1153 RMD160 90d9959970749f61df6c63d88bf31a2cd2bec086 SHA1 c9c5a48daa2c3b9b0da22aea7ae8525530daef16 SHA256 87e99c7d7ac53f57ef6c4ebcdcd7678baa67fffe31f49e5ae760b350778b2c63 +MD5 0a397fcd33e9d4a37b13eea5b46abd4b pyblosxom-plugins-1.3.2.ebuild 1153 +RMD160 90d9959970749f61df6c63d88bf31a2cd2bec086 pyblosxom-plugins-1.3.2.ebuild 1153 +SHA256 87e99c7d7ac53f57ef6c4ebcdcd7678baa67fffe31f49e5ae760b350778b2c63 pyblosxom-plugins-1.3.2.ebuild 1153 MISC trac.BAK 135 RMD160 a0176def7e99c80344eee0a78e0e5ef4886255ad SHA1 8d33f26243a6b05b91b8aeb67bd219030785f94e SHA256 fc951abe2f3dad748fab09d07ba7e9ac1d20d8c9da0d68e12ce68aa07d1156c3 MD5 890d871165b6e92e46908e5f49b37216 trac.BAK 135 RMD160 a0176def7e99c80344eee0a78e0e5ef4886255ad trac.BAK 135 diff --git a/www-apps/pyblosxom-plugins/files/autoping.py b/www-apps/pyblosxom-plugins/files/autoping.py new file mode 100755 index 0000000..87be026 --- /dev/null +++ b/www-apps/pyblosxom-plugins/files/autoping.py @@ -0,0 +1,252 @@ +#!/usr/bin/env python +"""Ping all traceback-eligable or pingback-elibable servers associated with +hrefs found in a given blog entry - Most code is by Sam Ruby + +One requirement for this is that you run this code in your py['datadir']. This +script can be placed anywhere. If your entry is placed in, +technology/stuff.txt, run autoping this way. + +cd /your/blog/dir +/path/to/autoping.py technology/stuff.txt + +Autoping will try to send a trackback and/or pingback based on the URLs it +found on technology/stuff.txt. There's a limitation that, if the autodiscovery +in the trackback RDF in the site does not properly point to the correct URL to +ping, autoping will not be able to send out the trackback. Alert the author of +the site. +""" + +# Modify this to where your pyblosxom and config.py is installed +import sys + +import re, sgmllib, sys, urllib, xmlrpclib +from xml.sax import parseString, SAXParseException +from xml.sax.handler import ContentHandler +import cPickle, os, os.path + +# Get our pyblosxom specifics here +from Pyblosxom import tools +from Pyblosxom.pyblosxom import blosxom_entry_parser +from Pyblosxom.pyblosxom import Request +from config import py as cfg + +logdir = cfg.get("logdir", "/tmp") +logfile = os.path.normpath(logdir + os.sep + "autoping.log") +tools.initialize(cfg) +logger = tools.getLogger(logfile) + +def excerpt(filename, title, body, blogname): + """ filename,title,body => url,args + + Excerpt the body and urlencode the trackback arguments. + """ + + body = re.split('<div\s+class="excerpt">(.*?)<\/div>',body)[:2][-1] + + body = re.sub('\n',' ',body) + body = re.sub(' ',' ',body) + body = re.sub('^(<p>)?<a\s+href="\S+">[\w\s\.]+<\/a>:\s*','',body) + body = re.sub('<em>.*?<\/em>\.?\s*','',body) + body = re.sub('<.*?>','',body) + + body = body[:252] + + url = cfg.get("base_url") + '/' + filename + url = re.sub('\.[a-zA-Z]+$','',url) + + arg = {} + arg['url'] = url + arg['title'] = title + arg['blog_name'] = blogname + arg['excerpt'] = body + + return url, urllib.urlencode(arg) + + +class link(sgmllib.SGMLParser): + """ source -> list of trackbacks, list of pingbacks + + Parse a given html page, and retrieve the trackbacks associated with + pages referenced via href found. + """ + + def __init__(self, name, title, body, blogname): + sgmllib.SGMLParser.__init__(self) + self.trackbacks = [] + self.pingbacks = [] + self.title = title + (self.url,self.args) = excerpt(name, title, body, blogname) + #print 'In link constructor: %s %s %s %s' % (name, title, body, blogname) + self.feed(body) + + def start_a(self, attrs): + attrs = dict(attrs) + print 'attrs = %s ' % attrs + if attrs.has_key('href'): + try: + href = attrs['href'] + print 'href = %s' % href + trackback,pingback = backrefs(href) + self.trackbacks = self.trackbacks + trackback + self.pingbacks = self.pingbacks + pingback + print trackback + print pingback + except: + logger.error("Caught Exception") + + +tb_re=re.compile('(<rdf:RDF .*?</rdf:RDF>)') +pb_re=re.compile('<link rel="pingback" href="([^"]+)" ?/?>') +def backrefs(href): + """ href -> ([trackbacks],[pingbacks]) + + Parse a given html page, and retrieve the rdf:about, X-Pingback header, + or pingback link information associated with a given href. At most + one is returned (in the above priority). + """ + + base = href.split("#")[0] + file = urllib.urlopen(base) + info = file.info() + data = file.read().replace('\n',' ') + print base + file.close() + + trackback = [] + pingback = pb_re.findall(data)[:1] + + for x in tb_re.findall(data): + try: + parseString(x, rdf()) + except SAXParseException: + pass + + if info.has_key("X-Pingback"): pingback=[info["X-Pingback"]] + if rdf.ids.has_key(href): trackback = [rdf.ids[href]] + if not trackback and not pingback and href.find("#")>0: + if rdf.ids.has_key(base): trackback = [rdf.ids[base]] + + if trackback: pingback=[] + if pingback: pingback=[(href, pingback[0])] + + return (trackback, pingback) + + +class rdf(ContentHandler): + """ xml -> dictionary of {dc:identifier => trackback:ping|rdf:about} + + Parse a given html page, and retrieve the rdf:about information associated + with a given href. + """ + + ids = {} + def startElement(self, name, attrs): + if name == 'rdf:Description': + attrs=dict(attrs) + if attrs.has_key('dc:identifier'): + if attrs.has_key('trackback:ping'): + self.ids[attrs['dc:identifier']] = attrs['trackback:ping'] + elif attrs.has_key('about'): + self.ids[attrs['dc:identifier']] = attrs['about'] + elif attrs.has_key('rdf:about'): + self.ids[attrs['dc:identifier']] = attrs['rdf:about'] + +def trackback(parser): + """ parser -> None + + Ping all trackbacks encountered with the url, title, blog_name, and + excerpt. + """ + print 'Entered trackback parse' + for url in parser.trackbacks: + try: + logger.info("") + logger.info("*** Trackback " + url) + logger.info(parser.args) + if url.find('?tb_id=') >= 0: + file=urllib.urlopen(url + "&" + parser.args) + else: + file=urllib.urlopen(url, parser.args) + logger.info(file.read()) + file.close() + except Exception, e: + logger.error(e) + + +def pingback(parser): + """ parser -> None + + Ping all pingbacks encountered with the source and targets + """ + + for target,server in parser.pingbacks: + try: + logger.info("") + logger.info("*** Pingback " + server) + server=xmlrpclib.Server(server) + print 'Parser: ' + parser.url + target + logger.info(server.pingback.ping(parser.url,target)) + except Exception, e: + logger.error(e) + +def autoping(name): + import os, sys + from Pyblosxom.pyblosxom import PyBlosxom + from config import py as cfg + + if cfg.has_key("codebase"): + import sys + sys.path.insert(0, cfg["codebase"]) + + env = {} + # names taken from wsgi instead of inventing something new + env['wsgi.input'] = sys.stdin + env['wsgi.errors'] = sys.stderr + env['wsgi.url_scheme'] = "http" + if os.environ.get("HTTPS") in ('yes','on','1'): + env['wsgi.url_scheme'] = "https" + # setup url_scheme for static rendering + if not os.environ.get("REQUEST_METHOD", ""): + if 'base_url' in cfg: + env['wsgi.url_scheme'] = cfg['base_url'][:cfg['base_url'].find("://")] + + for mem in ["HTTP_HOST", "HTTP_USER_AGENT", "HTTP_REFERER", "PATH_INFO", + "QUERY_STRING", "REMOTE_ADDR", "REQUEST_METHOD", "REQUEST_URI", + "SCRIPT_NAME", "HTTP_IF_NONE_MATCH", "HTTP_IF_MODIFIED_SINCE", + "HTTP_COOKIE", "CONTENT_LENGTH", "HTTP_ACCEPT", "HTTP_ACCEPT_ENCODING"]: + env[mem] = os.environ.get(mem, "") + + data=None + request = Request(cfg, env, data) + # Load up the cache (You can just import the base cache here) + cache_driver = tools.importName('Pyblosxom.cache', cfg.get('cacheDriver', 'base')) + cache = cache_driver.BlosxomCache(request, cfg.get('cacheConfig', '')) + try: + filename = os.path.join(cfg['datadir'], name) + logger.error(filename) + entryData = {} + cache.load(filename) + # Look for cached documents + if cache.isCached(): + entryData = cache.getEntry() + + # Cached? Try our entryparsers then. + if not entryData: + fileExt = re.search(r'\.([\w]+)$', filename) + try: + entryData = blosxom_entry_parser(filename, request) + except IOError, e: + logger.error(e) + + name = re.sub(cfg['datadir'],'',name) + parser = link(name, entryData['title'].strip(), entryData['body'].strip(), cfg['blog_title']) + logger.error(parser) + trackback(parser) + pingback(parser) + except Exception, e: + logger.error(e) + + +if __name__ == '__main__': + for name in sys.argv[1:]: + autoping(name) diff --git a/www-apps/pyblosxom-plugins/pyblosxom-plugins-1.3.2.ebuild b/www-apps/pyblosxom-plugins/pyblosxom-plugins-1.3.2.ebuild index 44026b9..c3a909b 100644 --- a/www-apps/pyblosxom-plugins/pyblosxom-plugins-1.3.2.ebuild +++ b/www-apps/pyblosxom-plugins/pyblosxom-plugins-1.3.2.ebuild @@ -40,6 +40,7 @@ src_install() { blocks.py trackback.py commentAPI.py + autoping.py xmlrpc_pingback.py wbglast10summary.py" |