| 1 | #!/usr/bin/env python |
|---|
| 2 | |
|---|
| 3 | # This filter removes posts which contain any of the specified keywords |
|---|
| 4 | # from a Twitter feed. It also makes urls and @usernames into clickable |
|---|
| 5 | # links, and shortens item titles to poster usernames. |
|---|
| 6 | # |
|---|
| 7 | # Written by Adrianna Pinska |
|---|
| 8 | # Licence: GPLv3 |
|---|
| 9 | |
|---|
| 10 | import sys |
|---|
| 11 | import re |
|---|
| 12 | import xml.dom.minidom |
|---|
| 13 | |
|---|
| 14 | # Add crap you don't want to see to this list: |
|---|
| 15 | noise = ["#postcrossing", "[Nn]et[Pp]rophet", "#[Aa]frica09"] |
|---|
| 16 | |
|---|
| 17 | noisepattern = re.compile("(%s)" % "|".join(noise)) |
|---|
| 18 | titlepattern = re.compile(r"^([^:]*):.*$", re.DOTALL) |
|---|
| 19 | linkurls = re.compile(r"(https?://[^ ]*)") |
|---|
| 20 | linknames = re.compile(r"@([a-zA-Z0-9_]+)") |
|---|
| 21 | hashtags = re.compile(r"#([a-zA-Z0-9_]+)") |
|---|
| 22 | statusidpattern = re.compile(r"http://twitter.com/.*/statuses/([0-9]*)") |
|---|
| 23 | |
|---|
| 24 | xml = xml.dom.minidom.parseString(sys.stdin.read()) |
|---|
| 25 | |
|---|
| 26 | channel = xml.getElementsByTagName("rss")[0].getElementsByTagName("channel")[0] |
|---|
| 27 | for item in channel.getElementsByTagName("item"): |
|---|
| 28 | description = item.getElementsByTagName("description")[0].firstChild |
|---|
| 29 | text = description.data |
|---|
| 30 | |
|---|
| 31 | if noise and noisepattern.search(text): |
|---|
| 32 | channel.removeChild(item) |
|---|
| 33 | item.unlink() |
|---|
| 34 | continue |
|---|
| 35 | |
|---|
| 36 | title = item.getElementsByTagName("title")[0].firstChild |
|---|
| 37 | title.data = titlepattern.sub(r"\1", title.data) |
|---|
| 38 | username = title.data.strip(":") |
|---|
| 39 | |
|---|
| 40 | guid = item.getElementsByTagName("guid")[0].firstChild |
|---|
| 41 | statusid = statusidpattern.match(guid.data).group(1) |
|---|
| 42 | |
|---|
| 43 | text = linkurls.sub(r"<a href='\1'>\1</a>", text) |
|---|
| 44 | text = linknames.sub(r"<a href='http://twitter.com/\1'>@\1</a>", text) |
|---|
| 45 | text = hashtags.sub(r"<a href='http://search.twitter.com/search?q=%23\1'>#\1</a>", text) |
|---|
| 46 | text += " <a href='http://twitter.com/home?status=@%s%%20&in_reply_to_status_id=%s&in_reply_to=%s'>Reply</a>" % (username, statusid, username) |
|---|
| 47 | description.data = text |
|---|
| 48 | |
|---|
| 49 | print xml.toxml(encoding="UTF-8") |
|---|