1
2
3
4 """
5 This is the configuration file for the packege.
6
7 The variables L{PORTALS} and L{PLUGINS} together hold all sources to be
8 included in the search index.
9
10
11 @author: Johannes Schwenk
12 @copyright: 2010, Johannes Schwenk
13 @version: 2.0
14 @date: 2010-09-15
15
16
17 """
18
19 import os
20 import os.path
21 import codecs
22 import logging
23
24 from datetime import datetime
25
26
27
28
29
30 USER_AGENT = u'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.1.5) ' + \
31 u'Gecko/20091102 Firefox/3.5.5'
32 """
33 The user agent string the client should use to identify itself to servers.
34
35 @type: string
36
37 """
38
39
40
41
42
43 PORTAL_PLUGIN_NAME = u'portal'
44 """
45 The name of the generic plugin module name for Plone-Portals.
46
47 @type: string
48
49 """
50
51 PORTAL_RETRY_WAIT = 10
52 """
53 If the portals server could not fulfill the request, wait for C{X} seconds
54 before a retry.
55
56 @type: int
57
58 """
59
60 MAX_PORTAL_RETRIES = 3
61 """
62 Number of retries before failure. If the portals server could not fulfill the
63 request, wait for L{PORTAL_RETRY_WAIT} seconds before retrying.
64 Retry a maximum of C{X} times.
65
66 @type: int
67
68 """
69
70 PORTAL_REQUEST_INCREMENT = 3
71 """
72 Number of entries to get from the portal's server for each incremental request.
73
74 @type: int
75
76 """
77
78 REQUEST_TIMEOUT = 1200
79 """
80 Number of seconds to wait for the servers response.
81
82 @type: int
83
84 """
85
86 LAST_QUERY_DEFAULT = datetime(1970, 1, 1)
87 """
88 The date from which to start the querying of portals if no last update is
89 specified, e.g. on the first run.
90
91 @see: L{BaseSyncPlugin} and L{PortalSourceState}
92
93 @type: datetime
94
95 """
96
97 PORTALS = [
98 {u'url':
99 u'http://cmsdev.rektorat.uni-freiburg.de:23456/' + \
100 u'remoteSyncQueryXML',
101 u'name': u'cmsdev'},
102 {u'url':
103 u'http://zope5.ruf.uni-freiburg.de:12285/exzellenz/' + \
104 u'remoteSyncQueryXML',
105 u'name': u'exzellenz'},
106 {u'url':
107 u'http://zope5.ruf.uni-freiburg.de:12285/podcasts/' + \
108 u'remoteSyncQueryXML',
109 u'name': u'podcasts'},
110 {u'url':
111 u'http://zope5.ruf.uni-freiburg.de:12285/pr/remoteSyncQueryXML',
112 u'name': u'pr'},
113 {u'url':
114 u'http://zope5.ruf.uni-freiburg.de:12285/mw/remoteSyncQueryXML',
115 u'name': u'mw'},
116 {u'url':
117 u'http://zope3.ruf.uni-freiburg.de:12281/uni/remoteSyncQueryXML',
118 u'name': u'uni'},
119 {u'url':
120 u'http://zope5.ruf.uni-freiburg.de:12285/studium/' + \
121 u'remoteSyncQueryXML',
122 u'name': u'studium'},
123 {u'url':
124 u'http://zope5.ruf.uni-freiburg.de:12285/alumni/' + \
125 u'remoteSyncQueryXML',
126 u'name': u'alumni'},
127 ]
128 """
129 List of portals to query. Each entry is a dictionary with C{url}, and C{name},
130 where C{url} is the URL to the portal's C{remoteSyncQueryXML} script and
131 C{name} is beeing used in statistics and logging.
132
133 @type: list of dict
134
135 """
136
137
138
139
140 PLUGIN_DIR_NAME = u'plugins'
141 """
142 Name of the directory containing the plugins.
143
144 @type: string
145
146 """
147
148
149 PLUGINS = [
150 {u'name': u'stb',
151 u'url':
152 u'http://info.verwaltung.uni-freiburg.de/servuni/' + \
153 u'stellenuni.abfr1?kategorieid=alle&layout=v3' + \
154 u'&sprache=d&ausgabeart=xml'},
155 {u'name': u'vkal',
156 u'url':
157 u'http://info.verwaltung.uni-freiburg.de/servuni/' + \
158 u'vkaluni.abfr1?layout=v3&ausgabeart=xml&' + \
159 u'modus=2&zeitpunkt=4'},
160 {u'name': u'studentenwerk',
161 u'url':
162 u'http://www.studentenwerk.uni-freiburg.de/' + \
163 u'index.php?id=272',},
164 {u'name': u'forschdb',
165 u'url':
166 (u'http://forschdb.verwaltung.uni-freiburg.de/servuni/'
167 u'forschdbuni.fdbfbr1?Fakultaet=${fac}&Dokumentart='
168 u'Publikation&Ausgabeart=xml&Jahr=1900-${to_year}')},
169 ]
170 """
171 List of plugins to load and query. Each entry is a dictionary with C{url}, and
172 C{name}, where C{url} is passed to the plugin, usually as starting point for
173 the data retrieval process, and C{name} is beeing used in statistics and
174 logging.
175
176 @type: list of dict
177
178 """
179
180
181
182
183
184 LOG_LEVEL = logging.DEBUG
185 """
186 The debug level to be used by e.g. L{BaseLogger}. Can be one of C{DEBUG},
187 C{INFO}, C{WARNING}, C{ERROR} or C{CRITICAL} .
188
189 @type: int
190
191 """
192
193 LOG_FILE_DIR = u'./log'
194 """
195 The directory of the logfile.
196
197 @type: string
198
199 """
200
201 LOG_FILENAME = u'%s/getXML.log' % LOG_FILE_DIR
202 """
203 The full path and filename of the logfile.
204
205 @type: string
206
207 """
208
209 LOG_BACKUP_COUNT = 9
210 """
211 Number of logfile backups to keep.
212
213 @see: L{LOG_ROLLOVER_SIZE}
214
215 @type: int
216
217 """
218
219 LOG_ROLLOVER_SIZE = 10485760
220 """
221 If the logfile exceeds this size (in bytes), the logger will start a new
222 logfile and keep up to L{LOG_BACKUP_COUNT} old logfiles around.
223
224 @type: int
225
226 """
227
228
229 if LOG_LEVEL == logging.DEBUG:
230 LOG_ROLLOVER_SIZE = 0
231
232
233
234
235
236
237 STATE_FILE_DIR = u'./state'
238 """
239 The directory where to save the state for portals and plugins.
240
241 @see: L{xmlgetter.state}
242
243 @type: string
244
245 """
246
247 STATE_FILE_EXT = u'dat'
248 """
249 The extension of the state files written to L{STATE_FILE_DIR}. The name of the
250 state files will be the name of the plugin or portal defined in L{PORTALS}
251 or L{PLUGINS}
252
253 @see: L{PortalSourceState}
254
255 @type: string
256
257 """
258
259
260
261
262 TEMP_DIR = u'./tmp'
263 """
264 Name of the directory for temporary data, e.g. retrieval data.
265
266 @type: string
267
268 """
269
270 TEMP_FILE_EXT = u'tmp'
271 """
272 The extension of temporary files.
273
274 @type: string
275
276 """
277
278
279
280
281
282 TEMPLATES_DIR = u'./templates'
283 """
284 Name of a directory where to find templates and text snippets.
285
286 @type: string
287
288 """
289
290 XML_FILENAME = u'unifr.xml'
291 """
292 The filename of the resulting XML document ready to be fed to the parser for
293 search index generation. It will be built in L{TEMP_DIR} and on successful
294 generation moved to L{OUT_DIR} .
295
296 @type: string
297
298 """
299
300 OUT_DIR = u'/home/schwenk/dipl/completesearch/databases/unifr'
301 """
302 The file output file (L{XML_FILENAME}) will be moved to this location once the
303 retrieval process has finished successfully. Must be an absolute path!
304
305 @type: string
306
307 """
308
309 ALWAYS_OUTPUT_STATS_ON_EXIT = True
310 """
311 Whether to output the stats to stderr on exit of getXML.py, regardless
312 of an error or warning has occured or not. Useful if one wants to get
313 notified about every completed acquisition process.
314
315 @type: bool
316
317 """
318
319 COMPLETION_SERVER_PROGRAM = u'./codebase/server/startCompletionServer'
320 """
321 Command to start the CompletionServer.
322
323 @type: string
324
325 """
326
327 COMPLETION_SERVER = [
328 COMPLETION_SERVER_PROGRAM,
329 u'-d', u'0d',
330 u'-w', u'0d',
331 u'-S', u'SSSS',
332 u'-r',
333 u'-p', u'12345',
334 u'-l', u'unifr.log',
335 u'unifr.hybrid'
336 ]
337 """
338 Join the arguments to start the CompletionServer so they can be passed to
339 C{subprocess.call()}.
340
341 @type: list
342
343 """
344
345 COMPLETION_SERVER_START_DIR = (u'/home/schwenk/dipl/completesearch/databases'
346 u'/unifr')
347 """
348 Working directory from which to start the CompletionServer.
349
350 @type: string
351
352 """
353
354 PARSER_DIR = u'/home/schwenk/dipl/completesearch/databases/unifr'
355 """
356 The directory where the parser is located. Absolute path!
357
358 @type: string
359
360 """
361
362 PARSER = [
363 u'make', u'pall'
364 ]
365 """
366 The command to start the parsing of the XML file. Also in this case (set to
367 execute "make pall") it also rebuilds the index.
368
369 @type: list
370
371 """
372