1
2
3
4 """
5 This modules provides classes and functions that can be used to build new
6 plugins.
7
8 @author: Johannes Schwenk
9 @copyright: 2010, Johannes Schwenk
10 @version: 2.0
11 @date: 2010-09-15
12
13
14 """
15
16 import sys
17
18
19 reload(sys)
20 sys.setdefaultencoding('utf-8')
21
22 import codecs
23 import os
24 import shutil
25
26 from string import Template
27 from datetime import datetime, timedelta
28 from urlparse import urlsplit
29
30 from log import BaseLogger
31 from stats import Stats
32 from xml import XMLEntry
33 from state import PortalSourceState
34 from request import BaseRequester
35 from config import USER_AGENT, TEMP_DIR, TEMP_FILE_EXT, LAST_QUERY_DEFAULT
40 """
41 This is the base class for all sync plugins.
42
43 """
44
45 _from_date = LAST_QUERY_DEFAULT
46 """
47 @ivar: The date to start the query from.
48 @type: datetime
49
50 """
51
52 _url = None
53 """
54 @ivar: The URL that is the starting point for the data acquisition.
55 @type: string
56
57 """
58
59 _base_url = None
60 """
61 @ivar: The base url of the server, extracted from L{_url} .
62 @type: string
63
64 """
65
66 _intermediate_temp_filename = None
67 """
68 @ivar: The temporary file to which all data from the queries will be
69 written. This file is necessary, to prevent destruction of useful
70 data from a previous run if an error in L{_getData()} should occurr.
71 @type: string
72
73 """
74
75 _temp_filename = None
76 """
77 @ivar: The temporary file to which all data from the queries will be
78 written.
79 @type: string
80
81 """
82
83 _intermediate_xml_filename = None
84 """
85 @ivar: The filename of the intermediate consolidated XML data. Necessary,
86 because errors during consolidation would destruct usable existing
87 data.
88 @type: string
89
90 """
91
92 _xml_filename = None
93 """
94 @ivar: The filename of the consolidated XML data.
95 @type: string
96
97 """
98
99 _entries = None
100 """
101 @ivar: A temporary storage for L{XMLEntry} instances.
102 @type: list of L{XMLEntry}
103
104 """
105
106 _entries_written = -1
107 """
108 @ivar: Overall number of elements written.
109 @type: int
110
111 """
112
113 _stats = None
114 """
115 @ivar: The statistics for the plugin,
116 @type: L{Stats}
117
118 """
119
120 _NO_NET = None
121 """
122 @ivar: Should we actually get the data from the net, or should we use
123 the data from the previous run?
124 @type: bool
125
126 """
127
128 _base_url = None
129 """
130 @ivar: Stores the url up to the location part.
131 @type: string
132
133 """
134
135
136 - def __init__(self, source_name, url, NO_NET=False):
137 """
138 Initialize the plugin.
139
140 If temporary data exists and C{NO_NET} is C{False}, the temporary
141 data from a previous run is deleted-
142
143 @param source_name: The name of the source.
144 @param url: The starting point for the retrieval of the source's data.
145 @param NO_NET: Should we operate on the data of the last run, or
146 retrieve new data?
147
148 @type source_name: string
149 @type url: string
150 @type NO_NET: bool
151
152 """
153 BaseRequester.__init__(self, source_name)
154 self._url = url
155 self._temp_filename = (u'%s/%s.%s'
156 % (TEMP_DIR, self._source_name, TEMP_FILE_EXT))
157 self._intermediate_temp_filename = (u'%s/%s.1.%s'
158 % (TEMP_DIR, self._source_name, TEMP_FILE_EXT))
159 if not NO_NET and os.path.exists(self._temp_filename):
160 os.remove(self._temp_filename)
161 self._xml_filename = u'%s/%s.xml' % (TEMP_DIR, self._source_name)
162 self._intermediate_xml_filename = (u'%s/%s.2.%s'
163 % (TEMP_DIR, self._source_name, TEMP_FILE_EXT))
164 self._entries = []
165 self._entries_written = 0
166 self._stats = Stats(source_name)
167 self._NO_NET = NO_NET
168 self._split_url = urlsplit(url)
169 self._base_url = (u'%s://%s' %
170 (self._split_url.scheme, self._split_url.netloc))
171
172
174 """
175 Should be overwritten by the individual plugin's implementation.
176 All data has to be written to L{_intermediate_temp_filename}.
177
178 @return: C{True} or C{False} depending on the success of the
179 retrieval process.
180 @rtype: bool
181
182 """
183 return True
184
185
187 """
188 Should be overwritten by the individual plugin's implementation if
189 a consolidation of the data is necessary.
190
191 This function copies the temporary data from L{_temp_filename} to
192 L{_xml_filename} .
193
194 """
195 if os.path.isfile(self._temp_filename):
196 self.logger.info(u'Copying temporary file to %s'
197 % self._xml_filename)
198 shutil.copy(self._temp_filename, self._xml_filename)
199 return True
200 else:
201 self.logger.warn(u'Temporary file %s not found!'
202 % self._temp_filename)
203 self._stats.messages.append(u'WARNING: Temporary file %s not'
204 u' found!' % self._temp_filename)
205 self._stats.status = u'W'
206 return False
207
208
210 """
211 Should be overwritten by the individual plugin's implementation if
212 it makes use of a state variable like e.g. the portal plugin.
213
214 @return: C{True} or C{False} depending on the success of the write
215 process.
216 @rtype: bool
217
218 """
219 return True
220
221
223 """
224 Should be overwritten by the individual plugin's implementation if
225 it makes use of a state variable like e.g. the portal plugin.
226
227 @return: C{True} or C{False} depending on the success of the read
228 process.
229 @rtype: bool
230
231 """
232 return True
233
234
235
261
262
263 @property
265 """
266 Get the sources name.
267
268 @return: The value of L{self._source_name} .
269 @rtype: string
270
271 """
272 return self._source_name
273
274
275 @property
277 """
278 Get the URL of the plugin.
279
280 @return: The value of L{self._url} .
281 @rtype: string
282
283 """
284 return self._url
285
286
287 @property
289 """
290 Get the statistics of the plugin.
291
292 @return: The value of L{self._stats} .
293 @rtype: L{Stats}
294
295 """
296 return self._stats
297
298
299 @property
301 """
302 Get the overall number of written entries.
303
304 @return: The value of L{self._entries_written} .
305 @rtype: int
306
307 """
308 return self._entries_written
309
310
312 """
313 Will be called by the controlling script.
314
315 B{Procedure}:
316
317 - If L{_NO_NET} is set, try to open the file containing
318 the data from a previous run. If this fails, return C{False}.
319 - Try to load the state of the plugin using L{_loadState()}.
320 - If L{_NO_NET} is set, try to get the data using L{_getData()}.
321 - Try to write remaining entries using L{_writeEntries()}.
322 - Try to consolidate the data using L{_consolidate()}.
323 - Try to write the state of the plugin using L{_writeState()}.
324
325 If one of these points fail return C{False} in the end, otherwise
326 return C{True}.
327
328 This function should normally not be overridden.
329
330 @return: C{False} if an error or warning occurred, C{True} otherwise.
331 @rtype: bool
332
333 """
334 self._stats.update_time_start = datetime.now()
335
336 retval = True
337
338 if self._NO_NET:
339 self.logger.info(u'--nonet set - will be skipping data retrieval')
340 if not os.path.exists(self._temp_filename):
341 self.logger.warning(u'No old data (%s) '
342 u'present, but --nonet set - aborting!'
343 % self._temp_filename)
344 self._stats.messages.append(u'WARNING: No old data (%s) '
345 u'present, but --nonet set - aborting!'
346 % self._temp_filename)
347 self._stats.status = u'F'
348 self._stats.update_time_end = datetime.now()
349 return False
350 self.logger.info(u'Loading plugin state')
351 retval = self._loadState() and retval
352 if not self._NO_NET:
353 self.logger.info(u'Retrieving data')
354 retval = self._getData() and retval
355 self.logger.info(u'Consolidating data')
356 consolidated = self._consolidate()
357 retval = consolidated and retval
358 if consolidated:
359 self.logger.info(u'Writing plugin state')
360 retval = self._writeState() and retval
361 self.logger.info(u'Plugin run finished')
362 self._stats.update_time_end = datetime.now()
363 return retval
364