Package buildxml :: Package xmlgetter :: Module plugin
[hide private]
[frames] | no frames]

Source Code for Module buildxml.xmlgetter.plugin

  1  #!/usr/bin/python 
  2  # -*- coding: utf-8 -*- 
  3   
  4  """ 
  5  This modules provides classes and functions that can be used to build new 
  6  plugins. 
  7   
  8  @author: Johannes Schwenk 
  9  @copyright: 2010, Johannes Schwenk 
 10  @version: 2.0 
 11  @date: 2010-09-15 
 12   
 13   
 14  """ 
 15   
 16  import sys 
 17   
 18  # Imortant! 
 19  reload(sys) 
 20  sys.setdefaultencoding('utf-8') 
 21   
 22  import codecs 
 23  import os 
 24  import shutil 
 25   
 26  from string import Template 
 27  from datetime import datetime, timedelta 
 28  from urlparse import urlsplit 
 29   
 30  from log import BaseLogger 
 31  from stats import Stats 
 32  from xml import XMLEntry 
 33  from state import PortalSourceState 
 34  from request import BaseRequester 
 35  from config import USER_AGENT, TEMP_DIR, TEMP_FILE_EXT, LAST_QUERY_DEFAULT 
36 37 38 39 -class BaseSyncPlugin(BaseRequester):
40 """ 41 This is the base class for all sync plugins. 42 43 """ 44 45 _from_date = LAST_QUERY_DEFAULT 46 """ 47 @ivar: The date to start the query from. 48 @type: datetime 49 50 """ 51 52 _url = None 53 """ 54 @ivar: The URL that is the starting point for the data acquisition. 55 @type: string 56 57 """ 58 59 _base_url = None 60 """ 61 @ivar: The base url of the server, extracted from L{_url} . 62 @type: string 63 64 """ 65 66 _intermediate_temp_filename = None 67 """ 68 @ivar: The temporary file to which all data from the queries will be 69 written. This file is necessary, to prevent destruction of useful 70 data from a previous run if an error in L{_getData()} should occurr. 71 @type: string 72 73 """ 74 75 _temp_filename = None 76 """ 77 @ivar: The temporary file to which all data from the queries will be 78 written. 79 @type: string 80 81 """ 82 83 _intermediate_xml_filename = None 84 """ 85 @ivar: The filename of the intermediate consolidated XML data. Necessary, 86 because errors during consolidation would destruct usable existing 87 data. 88 @type: string 89 90 """ 91 92 _xml_filename = None 93 """ 94 @ivar: The filename of the consolidated XML data. 95 @type: string 96 97 """ 98 99 _entries = None 100 """ 101 @ivar: A temporary storage for L{XMLEntry} instances. 102 @type: list of L{XMLEntry} 103 104 """ 105 106 _entries_written = -1 107 """ 108 @ivar: Overall number of elements written. 109 @type: int 110 111 """ 112 113 _stats = None 114 """ 115 @ivar: The statistics for the plugin, 116 @type: L{Stats} 117 118 """ 119 120 _NO_NET = None 121 """ 122 @ivar: Should we actually get the data from the net, or should we use 123 the data from the previous run? 124 @type: bool 125 126 """ 127 128 _base_url = None 129 """ 130 @ivar: Stores the url up to the location part. 131 @type: string 132 133 """ 134 135
136 - def __init__(self, source_name, url, NO_NET=False):
137 """ 138 Initialize the plugin. 139 140 If temporary data exists and C{NO_NET} is C{False}, the temporary 141 data from a previous run is deleted- 142 143 @param source_name: The name of the source. 144 @param url: The starting point for the retrieval of the source's data. 145 @param NO_NET: Should we operate on the data of the last run, or 146 retrieve new data? 147 148 @type source_name: string 149 @type url: string 150 @type NO_NET: bool 151 152 """ 153 BaseRequester.__init__(self, source_name) 154 self._url = url 155 self._temp_filename = (u'%s/%s.%s' 156 % (TEMP_DIR, self._source_name, TEMP_FILE_EXT)) 157 self._intermediate_temp_filename = (u'%s/%s.1.%s' 158 % (TEMP_DIR, self._source_name, TEMP_FILE_EXT)) 159 if not NO_NET and os.path.exists(self._temp_filename): 160 os.remove(self._temp_filename) 161 self._xml_filename = u'%s/%s.xml' % (TEMP_DIR, self._source_name) 162 self._intermediate_xml_filename = (u'%s/%s.2.%s' 163 % (TEMP_DIR, self._source_name, TEMP_FILE_EXT)) 164 self._entries = [] 165 self._entries_written = 0 166 self._stats = Stats(source_name) 167 self._NO_NET = NO_NET 168 self._split_url = urlsplit(url) 169 self._base_url = (u'%s://%s' % 170 (self._split_url.scheme, self._split_url.netloc))
171 172
173 - def _getData(self):
174 """ 175 Should be overwritten by the individual plugin's implementation. 176 All data has to be written to L{_intermediate_temp_filename}. 177 178 @return: C{True} or C{False} depending on the success of the 179 retrieval process. 180 @rtype: bool 181 182 """ 183 return True
184 185
186 - def _consolidate(self):
187 """ 188 Should be overwritten by the individual plugin's implementation if 189 a consolidation of the data is necessary. 190 191 This function copies the temporary data from L{_temp_filename} to 192 L{_xml_filename} . 193 194 """ 195 if os.path.isfile(self._temp_filename): 196 self.logger.info(u'Copying temporary file to %s' 197 % self._xml_filename) 198 shutil.copy(self._temp_filename, self._xml_filename) 199 return True 200 else: 201 self.logger.warn(u'Temporary file %s not found!' 202 % self._temp_filename) 203 self._stats.messages.append(u'WARNING: Temporary file %s not' 204 u' found!' % self._temp_filename) 205 self._stats.status = u'W' 206 return False
207 208
209 - def _writeState(self):
210 """ 211 Should be overwritten by the individual plugin's implementation if 212 it makes use of a state variable like e.g. the portal plugin. 213 214 @return: C{True} or C{False} depending on the success of the write 215 process. 216 @rtype: bool 217 218 """ 219 return True
220 221
222 - def _loadState(self):
223 """ 224 Should be overwritten by the individual plugin's implementation if 225 it makes use of a state variable like e.g. the portal plugin. 226 227 @return: C{True} or C{False} depending on the success of the read 228 process. 229 @rtype: bool 230 231 """ 232 return True
233 234 235
236 - def _writeEntries(self):
237 """ 238 Write all entries in L{self._entries} to the file specified through 239 C{self._intermediate_temp_filename}. 240 241 242 """ 243 try: 244 f = codecs.open(self._intermediate_temp_filename, u'a', u'utf-8') 245 except IOError, e: 246 self.logger.exception(u'Error opening file "%s": %s' 247 % (self._intermediate_temp_filename, e)) 248 self._stats.messages.append(u'ERROR: Error opening file "%s": %s' 249 % (self._intermediate_temp_filename, e)) 250 self._stats.status = u'F' 251 return False 252 number_of_entries = len(self._entries) 253 while len(self._entries) > 0: 254 entry = self._entries.pop() 255 f.write(str(entry)) 256 self.logger.info(u'%s entries written to file %s' 257 % (number_of_entries, self._intermediate_temp_filename)) 258 self._entries_written = self._entries_written + number_of_entries 259 f.close() 260 return True
261 262 263 @property
264 - def source_name(self):
265 """ 266 Get the sources name. 267 268 @return: The value of L{self._source_name} . 269 @rtype: string 270 271 """ 272 return self._source_name
273 274 275 @property
276 - def url(self):
277 """ 278 Get the URL of the plugin. 279 280 @return: The value of L{self._url} . 281 @rtype: string 282 283 """ 284 return self._url
285 286 287 @property
288 - def stats(self):
289 """ 290 Get the statistics of the plugin. 291 292 @return: The value of L{self._stats} . 293 @rtype: L{Stats} 294 295 """ 296 return self._stats
297 298 299 @property
300 - def entries_written(self):
301 """ 302 Get the overall number of written entries. 303 304 @return: The value of L{self._entries_written} . 305 @rtype: int 306 307 """ 308 return self._entries_written
309 310
311 - def run(self):
312 """ 313 Will be called by the controlling script. 314 315 B{Procedure}: 316 317 - If L{_NO_NET} is set, try to open the file containing 318 the data from a previous run. If this fails, return C{False}. 319 - Try to load the state of the plugin using L{_loadState()}. 320 - If L{_NO_NET} is set, try to get the data using L{_getData()}. 321 - Try to write remaining entries using L{_writeEntries()}. 322 - Try to consolidate the data using L{_consolidate()}. 323 - Try to write the state of the plugin using L{_writeState()}. 324 325 If one of these points fail return C{False} in the end, otherwise 326 return C{True}. 327 328 This function should normally not be overridden. 329 330 @return: C{False} if an error or warning occurred, C{True} otherwise. 331 @rtype: bool 332 333 """ 334 self._stats.update_time_start = datetime.now() 335 336 retval = True 337 338 if self._NO_NET: 339 self.logger.info(u'--nonet set - will be skipping data retrieval') 340 if not os.path.exists(self._temp_filename): 341 self.logger.warning(u'No old data (%s) ' 342 u'present, but --nonet set - aborting!' 343 % self._temp_filename) 344 self._stats.messages.append(u'WARNING: No old data (%s) ' 345 u'present, but --nonet set - aborting!' 346 % self._temp_filename) 347 self._stats.status = u'F' 348 self._stats.update_time_end = datetime.now() 349 return False 350 self.logger.info(u'Loading plugin state') 351 retval = self._loadState() and retval 352 if not self._NO_NET: 353 self.logger.info(u'Retrieving data') 354 retval = self._getData() and retval 355 self.logger.info(u'Consolidating data') 356 consolidated = self._consolidate() 357 retval = consolidated and retval 358 if consolidated: 359 self.logger.info(u'Writing plugin state') 360 retval = self._writeState() and retval 361 self.logger.info(u'Plugin run finished') 362 self._stats.update_time_end = datetime.now() 363 return retval
364