1
2
3
4 """
5 Contains a plugin that harvests all vacancies from the Stellenbörse from
6 C{http://www.uni-freiburg.de/universitaet/organisation/stellenboerse} .
7
8 @author: Johannes Schwenk
9 @copyright: 2010, Johannes Schwenk
10 @version: 1.0
11 @date: 2010-09-15
12
13
14 """
15
16 import sys
17 import shutil
18
19
20 reload(sys)
21 sys.setdefaultencoding('utf-8')
22
23 from tools.BeautifulSoup import BeautifulStoneSoup
24 from xmlgetter.plugin import BaseSyncPlugin
25 from xmlgetter.xml import XMLEntry
26
27
29 """
30 A plugin that harvests all vacancies from the Stellenbörse from
31 C{http://www.uni-freiburg.de/universitaet/organisation/stellenboerse} .
32
33 There is no need for consolidation, so L{_getData} is the only Function
34 that is overridden.
35
36 """
37
39 """
40 Get all vacancies from the Stellenbörse.
41
42 It loads all vacancies from the URL provided in L{config.PLUGINS} as
43 XML and then fetches the details for each entry seperately.
44
45 """
46
47
48 self._stats.entries = 0
49 self._stats.new_entries = 0
50
51
52 response = self._requestURL(self._url)
53 if not response:
54 return False
55
56
57 soup = BeautifulStoneSoup(response.read())
58 soup = soup.findAll(u'stelle')
59 if not soup:
60 self.logger.warn(u'No soup!')
61 return False
62
63 """
64 ... and construct for every entry the url to the full description
65 of the vacancy.
66 """
67 for line in soup:
68 url = u'http://info.verwaltung.uni-' +\
69 u'freiburg.de/servuni/stellenuni.abfr1?ausgabeart=xml&' +\
70 u'stellenid=%s&layout=v3' % line[u'stellenid']
71 response = self._requestURL(url)
72 if not response:
73 self.logger.warn(u'Could not retrieve entry!')
74 continue
75 detail_soup = BeautifulStoneSoup(response.read())
76
77
78 descr = detail_soup.beschreibung and \
79 detail_soup.beschreibung.string or u''
80 pubdat = detail_soup.publikationsdatum and \
81 detail_soup.publikationsdatum.string or u''
82 creator = detail_soup.ansprechpartner and \
83 detail_soup.ansprechpartner.string or u''
84 kopfz = detail_soup.kopfzeile and \
85 detail_soup.kopfzeile.string or u''
86 kurzbeschr = detail_soup.kurzbeschreibung and \
87 detail_soup.kurzbeschreibung.string or u''
88 bewerba = detail_soup.bewerbungsadresse and \
89 detail_soup.bewerbungsadresse.string or u''
90 rueckfr = detail_soup.rueckfragen and \
91 detail_soup.rueckfragen.string or u''
92 rueckm = detail_soup.rueckemail and \
93 detail_soup.rueckemail.string or u''
94 email = detail_soup.email and detail_soup.email.string or u''
95 tel = detail_soup.telefon and detail_soup.telefon.string or u''
96 title = detail_soup.titel and detail_soup.titel.string or u''
97 content = u'%s %s %s %s %s %s %s' \
98 % (kopfz,
99 kurzbeschr,
100 bewerba,
101 rueckfr,
102 rueckm,
103 email,
104 tel,)
105
106 entry = XMLEntry(url, title, content, descr, created=pubdat,
107 creator=creator, portal_type=u'_stb_entry',
108 sources=[self._source_name,])
109
110
111 self._stats.entries = self._stats.entries + 1
112 self._stats.static_entries = self._stats.static_entries + 1
113
114
115 self._entries.append(entry)
116 if len(self._entries) % 10 == 0:
117 self._writeEntries()
118
119 self.logger.debug(u'Writing remaining entries')
120 self._writeEntries()
121
122 if self._stats.entries == 0:
123 self.logger.warn(u'No entries!')
124 self._stats.messages.append(u'WARNING: No entries! This could be'
125 u' due to the webpage beeing unreachable!')
126 self._stats.status = u'W'
127 return False
128
129 shutil.move(self._intermediate_temp_filename, self._temp_filename)
130 return True
131