root/confluence/feedfilters/twitterfilter.py

Revision 797, 2.1 kB (checked in by confluence, 10 days ago)

added keywords to filter

  • Property svn:executable set to *
Line 
1#!/usr/bin/env python
2
3# This filter removes posts which contain any of the specified keywords
4# from a Twitter feed.  It also makes urls and @usernames into clickable
5# links, and shortens item titles to poster usernames.
6#
7# Written by Adrianna Pinska
8# Licence: GPLv3
9
10import sys
11import re
12import xml.dom.minidom
13
14# Add crap you don't want to see to this list:
15noise = ["#postcrossing", "Just helped someone .* \.\.\.on Aardvark!","#ebz"]
16
17noisepattern = re.compile("(%s)" % "|".join(noise))
18titlepattern = re.compile(r"^([^:]*):.*$", re.DOTALL)
19linkurls = re.compile(r"(https?://[^ ]*)")
20linknames = re.compile(r"@([a-zA-Z0-9_]+)")
21hashtags = re.compile(r"(^| )#([a-zA-Z0-9_]+)")
22statusidpattern = re.compile(r"http://twitter.com/.*/statuses/([0-9]*)")
23
24def url_replace(mobj):
25    url = mobj.group(1).replace("@", "%40")
26    return r"<a href='%s'>%s</a>" % (url, url)
27
28xml = xml.dom.minidom.parseString(sys.stdin.read())
29
30channel = xml.getElementsByTagName("rss")[0].getElementsByTagName("channel")[0]
31for item in channel.getElementsByTagName("item"):
32    description = item.getElementsByTagName("description")[0].firstChild
33    text = description.data
34
35    weird_tags = item.getElementsByTagName("georss:point")
36    if weird_tags:
37        item.removeChild(weird_tags[0])
38
39    if noise and noisepattern.search(text):
40        channel.removeChild(item)
41        item.unlink()
42        continue
43
44    title = item.getElementsByTagName("title")[0].firstChild
45    title.data = titlepattern.sub(r"\1", title.data)
46    username = title.data.strip(":")
47
48    guid = item.getElementsByTagName("guid")[0].firstChild
49    statusid = statusidpattern.match(guid.data).group(1)
50
51    text = linkurls.sub(url_replace, text)
52    text = linknames.sub(r"<a href='http://twitter.com/\1'>@\1</a>", text)
53    text = hashtags.sub(r"\1<a href='http://search.twitter.com/search?q=%23\1'>#\2</a>", text)
54    text = re.sub(r"^%s:" % username, "<b>%s:</b>" % username, text)
55    text += " <a href='http://twitter.com/home?status=@%s%%20&in_reply_to_status_id=%s&in_reply_to=%s'>Reply</a>" % (username, statusid, username)
56    description.data = text
57
58print xml.toxml(encoding="UTF-8")
Note: See TracBrowser for help on using the browser.