Added modified autoping.py

svn path=/stable/; revision=771
author: Gunnar Wrobel <wrobel@gentoo.org> 2006-09-24 08:28:38 +0000
committer: Gunnar Wrobel <wrobel@gentoo.org> 2006-09-24 08:28:38 +0000
commit: 017aa77364b3174d8037b68e5541c631eb74a444 (patch)
tree: 5dc7a57a5b45e6ae90db80c44302aa17fac07f9c
parent: Updated pardus library to 0.9. Bug fix 2. (diff)
download: overlay-017aa77364b3174d8037b68e5541c631eb74a444.tar.gz
overlay-017aa77364b3174d8037b68e5541c631eb74a444.tar.bz2
overlay-017aa77364b3174d8037b68e5541c631eb74a444.zip
3 files changed, 261 insertions, 4 deletions
diff --git a/www-apps/pyblosxom-plugins/Manifest b/www-apps/pyblosxom-plugins/Manifest
index 8a28a91..ad77504 100644
--- a/www-apps/pyblosxom-plugins/Manifest
+++ b/www-apps/pyblosxom-plugins/Manifest
@@ -1,3 +1,7 @@
+AUX autoping.py 8211 RMD160 c55abdb05fe28e3857ed6006310466b075986fec SHA1 e462cc942be77ca390019c6f6b7fbb5b41e370ac SHA256 86ece2a6b5d38ed3b4fd02b9fad098418b7120dd38d7e517f0133c2eb32a7918
+MD5 9c27438017d0539f56b59799ca242d73 files/autoping.py 8211
+RMD160 c55abdb05fe28e3857ed6006310466b075986fec files/autoping.py 8211
+SHA256 86ece2a6b5d38ed3b4fd02b9fad098418b7120dd38d7e517f0133c2eb32a7918 files/autoping.py 8211
 AUX blocks.py 6567 RMD160 471467203f5f06cc9e48898f1f26aa6781593386 SHA1 b5c6cd8ee99acaaef2d5ceb1294c9893f26294cb SHA256 ff6b9848689b897a280580305901f1295ea1d4abe7484d098b83d9de0f1c622a
 MD5 f67ef44f565d2cfa49ce8e728267081b files/blocks.py 6567
 RMD160 471467203f5f06cc9e48898f1f26aa6781593386 files/blocks.py 6567
@@ -74,10 +78,10 @@ AUX xmlrpc_pingback.py 5696 RMD160 5454bc762bdf5e4bfc41e1a7600f7487de4111e0 SHA1
 MD5 9d1afb745bb12ea8b1ab7a2ab2be7e6a files/xmlrpc_pingback.py 5696
 RMD160 5454bc762bdf5e4bfc41e1a7600f7487de4111e0 files/xmlrpc_pingback.py 5696
 SHA256 c71abde791d91e0ebabbc52af83a2704b74429f7fbb18ec3441c8d7a9688d876 files/xmlrpc_pingback.py 5696
-EBUILD pyblosxom-plugins-1.3.2.ebuild 1128 RMD160 75561d056473acfcf34d3e22ed2a54ebef205ad6 SHA1 7806a8ab9dfd588c4373c458fd50efac2d6c23e3 SHA256 2d8cfa1f7235c0798b52a73b5dbcd9d028415ff46a45935b977b2cb25ded1437
-MD5 3a1cd48b2ee5f5175cf74f8ad606ad13 pyblosxom-plugins-1.3.2.ebuild 1128
-RMD160 75561d056473acfcf34d3e22ed2a54ebef205ad6 pyblosxom-plugins-1.3.2.ebuild 1128
-SHA256 2d8cfa1f7235c0798b52a73b5dbcd9d028415ff46a45935b977b2cb25ded1437 pyblosxom-plugins-1.3.2.ebuild 1128
+EBUILD pyblosxom-plugins-1.3.2.ebuild 1153 RMD160 90d9959970749f61df6c63d88bf31a2cd2bec086 SHA1 c9c5a48daa2c3b9b0da22aea7ae8525530daef16 SHA256 87e99c7d7ac53f57ef6c4ebcdcd7678baa67fffe31f49e5ae760b350778b2c63
+MD5 0a397fcd33e9d4a37b13eea5b46abd4b pyblosxom-plugins-1.3.2.ebuild 1153
+RMD160 90d9959970749f61df6c63d88bf31a2cd2bec086 pyblosxom-plugins-1.3.2.ebuild 1153
+SHA256 87e99c7d7ac53f57ef6c4ebcdcd7678baa67fffe31f49e5ae760b350778b2c63 pyblosxom-plugins-1.3.2.ebuild 1153
 MISC trac.BAK 135 RMD160 a0176def7e99c80344eee0a78e0e5ef4886255ad SHA1 8d33f26243a6b05b91b8aeb67bd219030785f94e SHA256 fc951abe2f3dad748fab09d07ba7e9ac1d20d8c9da0d68e12ce68aa07d1156c3
 MD5 890d871165b6e92e46908e5f49b37216 trac.BAK 135
 RMD160 a0176def7e99c80344eee0a78e0e5ef4886255ad trac.BAK 135
diff --git a/www-apps/pyblosxom-plugins/files/autoping.py b/www-apps/pyblosxom-plugins/files/autoping.py
new file mode 100755
index 0000000..87be026
--- /dev/null
+++ b/www-apps/pyblosxom-plugins/files/autoping.py
@@ -0,0 +1,252 @@
+#!/usr/bin/env python
+"""Ping all traceback-eligable or pingback-elibable servers associated with 
+hrefs found in a given blog entry - Most code is by Sam Ruby
+
+One requirement for this is that you run this code in your py['datadir']. This
+script can be placed anywhere. If your entry is placed in,
+technology/stuff.txt, run autoping this way.
+
+cd /your/blog/dir
+/path/to/autoping.py technology/stuff.txt
+
+Autoping will try to send a trackback and/or pingback based on the URLs it
+found on technology/stuff.txt. There's a limitation that, if the autodiscovery
+in the trackback RDF in the site does not properly point to the correct URL to
+ping, autoping will not be able to send out the trackback. Alert the author of
+the site.
+"""
+
+# Modify this to where your pyblosxom and config.py is installed
+import sys
+
+import re, sgmllib, sys, urllib, xmlrpclib
+from xml.sax import parseString, SAXParseException
+from xml.sax.handler import ContentHandler
+import cPickle, os, os.path
+
+# Get our pyblosxom specifics here
+from Pyblosxom import tools
+from Pyblosxom.pyblosxom import blosxom_entry_parser
+from Pyblosxom.pyblosxom import Request
+from config import py as cfg
+
+logdir = cfg.get("logdir", "/tmp")
+logfile = os.path.normpath(logdir + os.sep + "autoping.log")
+tools.initialize(cfg)
+logger = tools.getLogger(logfile)
+
+def excerpt(filename, title, body, blogname):
+    """ filename,title,body => url,args
+
+    Excerpt the body and urlencode the trackback arguments.
+    """
+
+    body = re.split('<div\s+class="excerpt">(.*?)<\/div>',body)[:2][-1]
+
+    body = re.sub('\n',' ',body)
+    body = re.sub('&nbsp;',' ',body)
+    body = re.sub('^(<p>)?<a\s+href="\S+">[\w\s\.]+<\/a>:\s*','',body)
+    body = re.sub('<em>.*?<\/em>\.?\s*','',body)
+    body = re.sub('<.*?>','',body)
+
+    body = body[:252]
+
+    url = cfg.get("base_url") + '/' + filename
+    url = re.sub('\.[a-zA-Z]+$','',url)
+
+    arg = {}
+    arg['url'] = url
+    arg['title'] = title
+    arg['blog_name'] = blogname
+    arg['excerpt'] = body
+
+    return url, urllib.urlencode(arg)
+
+
+class link(sgmllib.SGMLParser):
+    """ source -> list of trackbacks, list of pingbacks
+
+    Parse a given html page, and retrieve the trackbacks associated with
+    pages referenced via href found.
+    """
+
+    def __init__(self, name, title, body, blogname):
+        sgmllib.SGMLParser.__init__(self)
+        self.trackbacks = []
+        self.pingbacks  = []
+        self.title = title
+        (self.url,self.args) = excerpt(name, title, body, blogname)
+	#print 'In link constructor: %s %s %s %s' % (name, title, body, blogname)
+        self.feed(body)
+
+    def start_a(self, attrs):
+        attrs = dict(attrs)
+	print 'attrs = %s ' % attrs
+        if attrs.has_key('href'):
+            try:
+                href = attrs['href']
+		print 'href = %s' % href
+                trackback,pingback = backrefs(href)
+                self.trackbacks = self.trackbacks + trackback
+                self.pingbacks  = self.pingbacks  + pingback
+		print trackback
+		print pingback
+            except:
+                logger.error("Caught Exception")
+
+
+tb_re=re.compile('(<rdf:RDF .*?</rdf:RDF>)')
+pb_re=re.compile('<link rel="pingback" href="([^"]+)" ?/?>')
+def backrefs(href):
+    """ href -> ([trackbacks],[pingbacks])
+
+    Parse a given html page, and retrieve the rdf:about, X-Pingback header,
+    or pingback link information associated with a given href.  At most
+    one is returned (in the above priority).
+    """
+
+    base = href.split("#")[0]
+    file = urllib.urlopen(base)
+    info = file.info()
+    data = file.read().replace('\n',' ')
+    print base
+    file.close()
+
+    trackback = []
+    pingback = pb_re.findall(data)[:1]
+
+    for x in tb_re.findall(data):
+        try:
+            parseString(x, rdf())
+        except SAXParseException:
+            pass
+
+    if info.has_key("X-Pingback"): pingback=[info["X-Pingback"]]
+    if rdf.ids.has_key(href): trackback = [rdf.ids[href]]
+    if not trackback and not pingback and href.find("#")>0:
+        if rdf.ids.has_key(base): trackback = [rdf.ids[base]]
+
+    if trackback: pingback=[]
+    if pingback:  pingback=[(href, pingback[0])]
+
+    return (trackback, pingback)
+
+
+class rdf(ContentHandler):
+    """ xml -> dictionary of {dc:identifier => trackback:ping|rdf:about}
+
+    Parse a given html page, and retrieve the rdf:about information associated
+    with a given href.
+    """
+
+    ids = {}
+    def startElement(self, name, attrs):
+        if name == 'rdf:Description':
+            attrs=dict(attrs)
+            if attrs.has_key('dc:identifier'):
+                if attrs.has_key('trackback:ping'):
+                        self.ids[attrs['dc:identifier']] = attrs['trackback:ping']
+                elif attrs.has_key('about'):
+                        self.ids[attrs['dc:identifier']] = attrs['about']
+                elif attrs.has_key('rdf:about'):
+                        self.ids[attrs['dc:identifier']] = attrs['rdf:about']
+
+def trackback(parser):
+    """ parser -> None
+
+    Ping all trackbacks encountered with the url, title, blog_name, and 
+    excerpt.
+    """
+    print 'Entered trackback parse'
+    for url in parser.trackbacks:
+        try:
+            logger.info("")
+            logger.info("*** Trackback " + url)
+            logger.info(parser.args)
+            if url.find('?tb_id=') >= 0:
+                file=urllib.urlopen(url + "&" + parser.args)
+            else:
+                file=urllib.urlopen(url, parser.args)
+            logger.info(file.read())
+            file.close()
+        except Exception, e:
+            logger.error(e)
+
+
+def pingback(parser):
+    """ parser -> None
+
+    Ping all pingbacks encountered with the source and targets
+    """
+
+    for target,server in parser.pingbacks:
+        try:
+            logger.info("")
+            logger.info("*** Pingback " + server)
+            server=xmlrpclib.Server(server)
+	    print 'Parser: ' + parser.url + target
+            logger.info(server.pingback.ping(parser.url,target))
+        except Exception, e:
+            logger.error(e)
+
+def autoping(name):
+    import os, sys
+    from Pyblosxom.pyblosxom import PyBlosxom
+    from config import py as cfg
+    
+    if cfg.has_key("codebase"):
+        import sys
+        sys.path.insert(0, cfg["codebase"])
+
+    env = {}
+    # names taken from wsgi instead of inventing something new
+    env['wsgi.input'] = sys.stdin
+    env['wsgi.errors'] = sys.stderr
+    env['wsgi.url_scheme'] = "http"
+    if os.environ.get("HTTPS") in ('yes','on','1'):
+    	env['wsgi.url_scheme'] = "https"
+    # setup url_scheme for static rendering
+    if not os.environ.get("REQUEST_METHOD", ""):
+    	if 'base_url' in cfg:
+		env['wsgi.url_scheme'] = cfg['base_url'][:cfg['base_url'].find("://")]
+		
+    for mem in ["HTTP_HOST", "HTTP_USER_AGENT", "HTTP_REFERER", "PATH_INFO",
+                "QUERY_STRING", "REMOTE_ADDR", "REQUEST_METHOD", "REQUEST_URI",
+		"SCRIPT_NAME", "HTTP_IF_NONE_MATCH", "HTTP_IF_MODIFIED_SINCE",
+		"HTTP_COOKIE", "CONTENT_LENGTH", "HTTP_ACCEPT", "HTTP_ACCEPT_ENCODING"]:
+	env[mem] = os.environ.get(mem, "")
+
+    data=None
+    request = Request(cfg, env, data)
+    # Load up the cache (You can just import the base cache here)
+    cache_driver = tools.importName('Pyblosxom.cache', cfg.get('cacheDriver', 'base'))
+    cache = cache_driver.BlosxomCache(request, cfg.get('cacheConfig', ''))
+    try:
+        filename = os.path.join(cfg['datadir'], name)
+	logger.error(filename)
+        entryData = {}
+        cache.load(filename)
+        # Look for cached documents
+        if cache.isCached():
+            entryData = cache.getEntry()
+         
+        # Cached? Try our entryparsers then.
+        if not entryData:
+            fileExt = re.search(r'\.([\w]+)$', filename)
+            try:
+                entryData = blosxom_entry_parser(filename, request)
+            except IOError, e:
+                logger.error(e)
+		
+        name = re.sub(cfg['datadir'],'',name)
+        parser = link(name, entryData['title'].strip(), entryData['body'].strip(), cfg['blog_title'])
+	logger.error(parser)
+        trackback(parser)
+        pingback(parser)
+    except Exception, e:
+        logger.error(e)
+    
+
+if __name__ == '__main__':
+    for name in sys.argv[1:]:
+        autoping(name)
diff --git a/www-apps/pyblosxom-plugins/pyblosxom-plugins-1.3.2.ebuild b/www-apps/pyblosxom-plugins/pyblosxom-plugins-1.3.2.ebuild
index 44026b9..c3a909b 100644
--- a/www-apps/pyblosxom-plugins/pyblosxom-plugins-1.3.2.ebuild
+++ b/www-apps/pyblosxom-plugins/pyblosxom-plugins-1.3.2.ebuild
@@ -40,6 +40,7 @@ src_install() {
              blocks.py
              trackback.py
              commentAPI.py
+             autoping.py
              xmlrpc_pingback.py
              wbglast10summary.py"
author	Gunnar Wrobel <wrobel@gentoo.org>	2006-09-24 08:28:38 +0000
committer	Gunnar Wrobel <wrobel@gentoo.org>	2006-09-24 08:28:38 +0000
commit	017aa77364b3174d8037b68e5541c631eb74a444 (patch)
tree	5dc7a57a5b45e6ae90db80c44302aa17fac07f9c
parent	Updated pardus library to 0.9. Bug fix 2. (diff)
download	overlay-017aa77364b3174d8037b68e5541c631eb74a444.tar.gz overlay-017aa77364b3174d8037b68e5541c631eb74a444.tar.bz2 overlay-017aa77364b3174d8037b68e5541c631eb74a444.zip