Package buildxml :: Module config
[hide private]
[frames] | no frames]

Source Code for Module buildxml.config

  1  #!/usr/bin/python 
  2  # -*- coding: utf-8 -*- 
  3   
  4  """ 
  5  This is the configuration file for the packege. 
  6   
  7  The variables L{PORTALS} and L{PLUGINS} together hold all sources to be 
  8  included in the search index. 
  9   
 10   
 11  @author: Johannes Schwenk 
 12  @copyright: 2010, Johannes Schwenk 
 13  @version: 2.0 
 14  @date: 2010-09-15 
 15   
 16   
 17  """ 
 18   
 19  import os 
 20  import os.path 
 21  import codecs 
 22  import logging 
 23   
 24  from datetime import datetime 
 25   
 26   
 27  #--------------------------- HTTP CLIENT -------------------------------- 
 28   
 29   
 30  USER_AGENT = u'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.1.5) ' + \ 
 31          u'Gecko/20091102 Firefox/3.5.5' 
 32  """ 
 33  The user agent string the client should use to identify itself to servers. 
 34   
 35  @type: string 
 36   
 37  """ 
 38   
 39   
 40  #---------------------------- PORTALS ----------------------------------- 
 41   
 42   
 43  PORTAL_PLUGIN_NAME = u'portal' 
 44  """ 
 45  The name of the generic plugin module name for Plone-Portals. 
 46   
 47  @type: string 
 48   
 49  """ 
 50   
 51  PORTAL_RETRY_WAIT = 10 
 52  """ 
 53  If the portals server could not fulfill the request, wait for C{X} seconds 
 54  before a retry. 
 55   
 56  @type: int 
 57   
 58  """ 
 59   
 60  MAX_PORTAL_RETRIES = 3 
 61  """ 
 62  Number of retries before failure. If the portals server could not fulfill the 
 63  request, wait for L{PORTAL_RETRY_WAIT} seconds before retrying. 
 64  Retry a maximum of C{X} times. 
 65   
 66  @type: int 
 67   
 68  """ 
 69   
 70  PORTAL_REQUEST_INCREMENT = 3 
 71  """ 
 72  Number of entries to get from the portal's server for each incremental request. 
 73   
 74  @type: int 
 75   
 76  """ 
 77   
 78  REQUEST_TIMEOUT = 1200 
 79  """ 
 80  Number of seconds to wait for the servers response. 
 81   
 82  @type: int 
 83   
 84  """ 
 85   
 86  LAST_QUERY_DEFAULT = datetime(1970, 1, 1) 
 87  """ 
 88  The date from which to start the querying of portals if no last update is 
 89  specified, e.g. on the first run. 
 90   
 91  @see: L{BaseSyncPlugin} and L{PortalSourceState} 
 92   
 93  @type: datetime 
 94   
 95  """ 
 96   
 97  PORTALS = [ 
 98          {u'url': 
 99              u'http://cmsdev.rektorat.uni-freiburg.de:23456/' + \ 
100                      u'remoteSyncQueryXML', 
101              u'name': u'cmsdev'}, 
102          {u'url': 
103              u'http://zope5.ruf.uni-freiburg.de:12285/exzellenz/' + \ 
104                      u'remoteSyncQueryXML', 
105              u'name': u'exzellenz'}, 
106          {u'url': 
107              u'http://zope5.ruf.uni-freiburg.de:12285/podcasts/' + \ 
108                      u'remoteSyncQueryXML', 
109              u'name': u'podcasts'}, 
110          {u'url': 
111              u'http://zope5.ruf.uni-freiburg.de:12285/pr/remoteSyncQueryXML', 
112              u'name': u'pr'}, 
113          {u'url': 
114              u'http://zope5.ruf.uni-freiburg.de:12285/mw/remoteSyncQueryXML', 
115              u'name': u'mw'}, 
116          {u'url': 
117              u'http://zope3.ruf.uni-freiburg.de:12281/uni/remoteSyncQueryXML', 
118              u'name': u'uni'}, 
119          {u'url': 
120              u'http://zope5.ruf.uni-freiburg.de:12285/studium/' + \ 
121                      u'remoteSyncQueryXML', 
122              u'name': u'studium'}, 
123          {u'url': 
124              u'http://zope5.ruf.uni-freiburg.de:12285/alumni/' + \ 
125                      u'remoteSyncQueryXML', 
126              u'name': u'alumni'}, 
127          ] 
128  """ 
129  List of portals to query. Each entry is a dictionary with C{url}, and C{name}, 
130  where C{url} is the URL to the portal's C{remoteSyncQueryXML} script and 
131  C{name} is beeing used in statistics and logging. 
132   
133  @type: list of dict 
134   
135  """ 
136   
137   
138  #----------------------------- PLUGINS ---------------------------------- 
139   
140  PLUGIN_DIR_NAME = u'plugins' 
141  """ 
142  Name of the directory containing the plugins. 
143   
144  @type: string 
145   
146  """ 
147   
148   
149  PLUGINS = [ 
150              {u'name': u'stb', 
151                  u'url': 
152                      u'http://info.verwaltung.uni-freiburg.de/servuni/' + \ 
153                      u'stellenuni.abfr1?kategorieid=alle&layout=v3' + \ 
154                      u'&sprache=d&ausgabeart=xml'}, 
155              {u'name': u'vkal', 
156                  u'url': 
157                      u'http://info.verwaltung.uni-freiburg.de/servuni/' + \ 
158                      u'vkaluni.abfr1?layout=v3&ausgabeart=xml&' + \ 
159                      u'modus=2&zeitpunkt=4'}, 
160              {u'name': u'studentenwerk', 
161                  u'url': 
162                      u'http://www.studentenwerk.uni-freiburg.de/' + \ 
163                      u'index.php?id=272',}, 
164              {u'name': u'forschdb', 
165                  u'url': 
166                      (u'http://forschdb.verwaltung.uni-freiburg.de/servuni/' 
167                      u'forschdbuni.fdbfbr1?Fakultaet=${fac}&Dokumentart=' 
168                      u'Publikation&Ausgabeart=xml&Jahr=1900-${to_year}')}, 
169          ] 
170  """ 
171  List of plugins to load and query. Each entry is a dictionary with C{url}, and 
172  C{name}, where C{url} is passed to the plugin, usually as starting point for 
173  the data retrieval process, and C{name} is beeing used in statistics and 
174  logging. 
175   
176  @type: list of dict 
177   
178  """ 
179   
180   
181  #---------------------------- LOGGING ----------------------------------- 
182   
183   
184  LOG_LEVEL = logging.DEBUG 
185  """ 
186  The debug level to be used by e.g. L{BaseLogger}. Can be one of C{DEBUG}, 
187  C{INFO}, C{WARNING}, C{ERROR} or C{CRITICAL} . 
188   
189  @type: int 
190   
191  """ 
192   
193  LOG_FILE_DIR = u'./log' 
194  """ 
195  The directory of the logfile. 
196   
197  @type: string 
198   
199  """ 
200   
201  LOG_FILENAME = u'%s/getXML.log' % LOG_FILE_DIR 
202  """ 
203  The full path and filename of the logfile. 
204   
205  @type: string 
206   
207  """ 
208   
209  LOG_BACKUP_COUNT = 9 
210  """ 
211  Number of logfile backups to keep. 
212   
213  @see: L{LOG_ROLLOVER_SIZE} 
214   
215  @type: int 
216   
217  """ 
218   
219  LOG_ROLLOVER_SIZE = 10485760 
220  """ 
221  If the logfile exceeds this size (in bytes), the logger will start a new 
222  logfile and keep up to L{LOG_BACKUP_COUNT} old logfiles around. 
223   
224  @type: int 
225   
226  """ 
227   
228  # Disable rollover in logging if debug mode is enabled. 
229  if LOG_LEVEL == logging.DEBUG: 
230      LOG_ROLLOVER_SIZE = 0 
231   
232   
233   
234  #----------------------------- STATES ----------------------------------- 
235   
236   
237  STATE_FILE_DIR = u'./state' 
238  """ 
239  The directory where to save the state for portals and plugins. 
240   
241  @see: L{xmlgetter.state} 
242   
243  @type: string 
244   
245  """ 
246   
247  STATE_FILE_EXT = u'dat' 
248  """ 
249  The extension of the state files written to L{STATE_FILE_DIR}. The name of the 
250  state files will be the name of the plugin or portal defined in L{PORTALS} 
251  or L{PLUGINS} 
252   
253  @see: L{PortalSourceState} 
254   
255  @type: string 
256   
257  """ 
258   
259   
260  #---------------------------- TEMPORARY FILES --------------------------- 
261   
262  TEMP_DIR = u'./tmp' 
263  """ 
264  Name of the directory for temporary data, e.g. retrieval data. 
265   
266  @type: string 
267   
268  """ 
269   
270  TEMP_FILE_EXT = u'tmp' 
271  """ 
272  The extension of temporary files. 
273   
274  @type: string 
275   
276  """ 
277   
278   
279  #---------------------- OUTPUT AND OUTPUT GENERATION -------------------- 
280   
281   
282  TEMPLATES_DIR = u'./templates' 
283  """ 
284  Name of a directory where to find templates and text snippets. 
285   
286  @type: string 
287   
288  """ 
289   
290  XML_FILENAME = u'unifr.xml' 
291  """ 
292  The filename of the resulting XML document ready to be fed to the parser for 
293  search index generation. It will be built in L{TEMP_DIR} and on successful 
294  generation moved to L{OUT_DIR} . 
295   
296  @type: string 
297   
298  """ 
299   
300  OUT_DIR = u'/home/schwenk/dipl/completesearch/databases/unifr' 
301  """ 
302  The file output file (L{XML_FILENAME}) will be moved to this location once the 
303  retrieval process has finished successfully. Must be an absolute path! 
304   
305  @type: string 
306   
307  """ 
308   
309  ALWAYS_OUTPUT_STATS_ON_EXIT = True 
310  """ 
311  Whether to output the stats to stderr on exit of getXML.py, regardless 
312  of an error or warning has occured or not. Useful if one wants to get 
313  notified about every completed acquisition process. 
314   
315  @type: bool 
316   
317  """ 
318   
319  COMPLETION_SERVER_PROGRAM = u'./codebase/server/startCompletionServer' 
320  """ 
321  Command to start the CompletionServer. 
322   
323  @type: string 
324   
325  """ 
326   
327  COMPLETION_SERVER = [ 
328      COMPLETION_SERVER_PROGRAM, 
329      u'-d', u'0d', 
330      u'-w', u'0d', 
331      u'-S', u'SSSS', 
332      u'-r', 
333      u'-p', u'12345', 
334      u'-l', u'unifr.log', 
335      u'unifr.hybrid' 
336  ] 
337  """ 
338  Join the arguments to start the CompletionServer so they can be passed to 
339  C{subprocess.call()}. 
340   
341  @type: list 
342   
343  """ 
344   
345  COMPLETION_SERVER_START_DIR = (u'/home/schwenk/dipl/completesearch/databases' 
346          u'/unifr') 
347  """ 
348  Working directory from which to start the CompletionServer. 
349   
350  @type: string 
351   
352  """ 
353   
354  PARSER_DIR = u'/home/schwenk/dipl/completesearch/databases/unifr' 
355  """ 
356  The directory where the parser is located. Absolute path! 
357   
358  @type: string 
359   
360  """ 
361   
362  PARSER = [ 
363      u'make', u'pall' 
364  ] 
365  """ 
366  The command to start the parsing of the XML file. Also in this case (set to 
367  execute "make pall") it also rebuilds the index. 
368   
369  @type: list 
370   
371  """ 
372