| 1 | #!/usr/bin/env python |
|---|
| 2 | |
|---|
| 3 | # This filter removes posts which contain any of the specified keywords |
|---|
| 4 | # from a Twitter feed. It also makes urls and @usernames into clickable |
|---|
| 5 | # links, and shortens item titles to poster usernames. |
|---|
| 6 | # |
|---|
| 7 | # Written by Adrianna Pinska |
|---|
| 8 | # Licence: GPLv3 |
|---|
| 9 | |
|---|
| 10 | import sys |
|---|
| 11 | import re |
|---|
| 12 | import xml.dom.minidom |
|---|
| 13 | |
|---|
| 14 | # Add crap you don't want to see to this list: |
|---|
| 15 | noise = ["#postcrossing", "Just helped someone .* \.\.\.on Aardvark!","#ebz"] |
|---|
| 16 | |
|---|
| 17 | noisepattern = re.compile("(%s)" % "|".join(noise)) |
|---|
| 18 | titlepattern = re.compile(r"^([^:]*):.*$", re.DOTALL) |
|---|
| 19 | linkurls = re.compile(r"(https?://[^ ]*)") |
|---|
| 20 | linknames = re.compile(r"@([a-zA-Z0-9_]+)") |
|---|
| 21 | hashtags = re.compile(r"(^| )#([a-zA-Z0-9_]+)") |
|---|
| 22 | statusidpattern = re.compile(r"http://twitter.com/.*/statuses/([0-9]*)") |
|---|
| 23 | |
|---|
| 24 | def url_replace(mobj): |
|---|
| 25 | url = mobj.group(1).replace("@", "%40") |
|---|
| 26 | return r"<a href='%s'>%s</a>" % (url, url) |
|---|
| 27 | |
|---|
| 28 | xml = xml.dom.minidom.parseString(sys.stdin.read()) |
|---|
| 29 | |
|---|
| 30 | channel = xml.getElementsByTagName("rss")[0].getElementsByTagName("channel")[0] |
|---|
| 31 | for item in channel.getElementsByTagName("item"): |
|---|
| 32 | description = item.getElementsByTagName("description")[0].firstChild |
|---|
| 33 | text = description.data |
|---|
| 34 | |
|---|
| 35 | weird_tags = item.getElementsByTagName("georss:point") |
|---|
| 36 | if weird_tags: |
|---|
| 37 | item.removeChild(weird_tags[0]) |
|---|
| 38 | |
|---|
| 39 | if noise and noisepattern.search(text): |
|---|
| 40 | channel.removeChild(item) |
|---|
| 41 | item.unlink() |
|---|
| 42 | continue |
|---|
| 43 | |
|---|
| 44 | title = item.getElementsByTagName("title")[0].firstChild |
|---|
| 45 | title.data = titlepattern.sub(r"\1", title.data) |
|---|
| 46 | username = title.data.strip(":") |
|---|
| 47 | |
|---|
| 48 | guid = item.getElementsByTagName("guid")[0].firstChild |
|---|
| 49 | statusid = statusidpattern.match(guid.data).group(1) |
|---|
| 50 | |
|---|
| 51 | text = linkurls.sub(url_replace, text) |
|---|
| 52 | text = linknames.sub(r"<a href='http://twitter.com/\1'>@\1</a>", text) |
|---|
| 53 | text = hashtags.sub(r"\1<a href='http://search.twitter.com/search?q=%23\1'>#\2</a>", text) |
|---|
| 54 | text = re.sub(r"^%s:" % username, "<b>%s:</b>" % username, text) |
|---|
| 55 | text += " <a href='http://twitter.com/home?status=@%s%%20&in_reply_to_status_id=%s&in_reply_to=%s'>Reply</a>" % (username, statusid, username) |
|---|
| 56 | description.data = text |
|---|
| 57 | |
|---|
| 58 | print xml.toxml(encoding="UTF-8") |
|---|