root/confluence/feedfilters/onemanga.py

Revision 706, 1.6 kB (checked in by confluence, 7 months ago)

added stuff

  • Property svn:executable set to *
Line 
1#!/usr/bin/env python
2
3# This is a generic filter for selecting items in an rss feed on the basis of subelement blacklists and whitelists
4#
5# Written by Adrianna Pinska
6# Licence: GPLv3
7
8import sys
9import re
10import xml.dom.minidom
11
12include = {
13    "title": ["Ressentiment", "Pluto", "Vinland Saga", "Twin Spica", "Historie", "Full Metal Alchemist", "Billy Bat", "Cherry", "Cesare"],
14}
15
16exclude = {
17}
18
19whitelist = {}
20blacklist = {}
21
22for element, lst in include.items():
23    whitelist[element] = re.compile("(%s)" % "|".join(lst))
24for element, lst in exclude.items():
25    blacklist[element] = re.compile("(%s)" % "|".join(lst))
26
27elements_of_interest = set(whitelist.keys()+blacklist.keys())
28
29xml = xml.dom.minidom.parseString(sys.stdin.read())
30channel = xml.getElementsByTagName("rss")[0].getElementsByTagName("channel")[0]
31for item in channel.getElementsByTagName("item"):
32    elements = {}
33    for element in elements_of_interest:
34        elements[element] = item.getElementsByTagName(element)[0].firstChild.data
35
36    if whitelist:
37        allowed = False
38        for element, regex in whitelist.items():
39            if regex.search(elements[element]):
40                allowed = True
41                break
42        if not allowed:
43            channel.removeChild(item)
44            item.unlink()
45            continue
46
47    for element, regex in blacklist.items():
48        if regex.search(elements[element]):
49            channel.removeChild(item)
50            item.unlink()
51            continue
52
53text = xml.toxml(encoding="UTF-8")
54text = re.sub(" *\n","\n",text)
55text = re.sub("\n+","\n",text)
56print text
Note: See TracBrowser for help on using the browser.