1
2
3
4 """
5 Contains a plugin that harvests all events from the Veranstaltungskalender from
6 C{http://www.uni-freiburg.de/universitaet/presse/veranstaltungskalender} .
7
8 @author: Johannes Schwenk
9 @copyright: 2010, Johannes Schwenk
10 @version: 1.0
11 @date: 2010-09-15
12
13
14 """
15
16 import sys
17 import shutil
18
19
20 reload(sys)
21 sys.setdefaultencoding('utf-8')
22
23
24 from tools.BeautifulSoup import BeautifulStoneSoup
25 from xmlgetter.plugin import BaseSyncPlugin
26 from xmlgetter.xml import XMLEntry
27
28
30 """
31 A plugin that harvests all events from the Veranstaltungskalender from
32 C{http://www.uni-freiburg.de/universitaet/presse/veranstaltungskalender} .
33
34 There is no need for consolidation, so L{_getData} is the only Function
35 that is overridden.
36
37 """
38
39
41 """
42 Get all events from the Veranstaltungskalender.
43
44 It loads all events from the URL provided in L{config.PLUGINS} as
45 XML and then fetches the details for each entry seperately.
46
47 """
48
49
50 self._stats.entries = 0
51 self._stats.new_entries = 0
52
53 response = self._requestURL(self._url)
54 if not response:
55 return False
56
57 soup = BeautifulStoneSoup(response.read())
58 if not soup:
59 self.logger.warn(u'No soup!')
60 return False
61
62
63 veranstaltungen = soup.findAll('veranstaltung')
64 if not veranstaltungen:
65 self.logger.warn(u'No soup!')
66 return False
67
68
69 for veranstaltung in veranstaltungen:
70 url = u"http://www.uni-freiburg.de/universitaet/presse/" +\
71 u"veranstaltungskalender/?layout=v3&modus=9&" +\
72 u"beschreibungid=%s&terminid=%s" \
73 % (veranstaltung.langbeschreibungid.string, \
74 veranstaltung.termin["id"])
75 content = u"%s %s %s %s %s %s %s %s %s %s %s" \
76 % (veranstaltung.publikationsdatum.string,
77 veranstaltung.termin.zeitpunkt.string,
78 veranstaltung.termin.ende.string,
79 veranstaltung.termin.ort.string,
80 veranstaltung.termin.referent.string,
81 veranstaltung.telefon.string,
82 veranstaltung.fax.string,
83 veranstaltung.email.string,
84 veranstaltung.url.string,
85 veranstaltung.ansprechpartner.string,
86 veranstaltung.veranstalter.string)
87 entry = XMLEntry(url,
88 veranstaltung.termin.thema.string,
89 content,
90 veranstaltung.beschreibung.string,
91 created=veranstaltung.publikationsdatum.string,
92 creator=veranstaltung.veranstalter.string,
93 portal_type=u'_vkal_entry',
94 sources=[self._source_name,])
95 self._entries.append(entry)
96
97
98 self._stats.entries = self._stats.entries + 1
99 self._stats.static_entries = self._stats.static_entries + 1
100
101
102 if len(self._entries) % 10 == 0:
103 self._writeEntries()
104
105 self.logger.debug(u'Writing remaining entries')
106 self._writeEntries()
107
108 if self._stats.entries == 0:
109 self.logger.warn(u'No entries!')
110 self._stats.messages.append(u'WARNING: No entries! This could be'
111 u' due to the webpage beeing unreachable!')
112 self._stats.status = u'W'
113 return False
114
115 shutil.move(self._intermediate_temp_filename, self._temp_filename)
116 return True
117