1
2
3
4 """
5 This module provides classes for the representation of a sources state.
6
7 @author: Johannes Schwenk
8 @copyright: 2010, Johannes Schwenk
9 @version: 1.0
10 @date: 2010-09-15
11
12
13 """
14
15 import sys
16
17
18 reload(sys)
19 sys.setdefaultencoding('utf-8')
20
21 import os
22 import cPickle
23
24 from logging import DEBUG
25
26 from log import BaseLogger
27 from config import LOG_LEVEL, STATE_FILE_DIR, STATE_FILE_EXT, \
28 LAST_QUERY_DEFAULT
29
30
32 """
33 This class encapsulates a source's state.
34
35 """
36
37
38 last_query = None
39 """
40 @ivar: The source's name, required only to make L{BaseLogger} pick up the
41 correct name for the logger.
42 @type: string
43 """
44
45
46 url_map = None
47 """
48 @ivar: A dictionary mapping URLs to related information, like e.g. the
49 position of the content in a file retrieved in a previous run.
50 @type: dict
51 """
52
53
54 _filename = None
55 """
56 @ivar: The filename to which to write the state.
57 @type: string
58 """
59
60 source_name = None
61 """
62 @ivar: The source's name again. This is a public property that is only used
63 for displaying purposes, so it can be changed if necessary.
64 @type: string
65 """
66
81
82
84 """
85 Read state from a file L{self._filename} . The highest available
86 pickling protocol is used, this most certainly a binary
87 representation.
88
89 @return: Returns C{False} if reading failed, the unpickled state
90 otherwise.
91 @rtype: L{PortalSourceState}
92
93 """
94 if os.path.exists(self._filename):
95 try:
96 f = open(self._filename, u'r')
97 except IOError, e:
98 self.logger.exception(
99 u'I/O error, could not read state file %s'
100 % self._filename)
101 return False
102 else:
103 try:
104 new_state = cPickle.load(f)
105 except cPickle.UnpicklingError, e:
106 self.logger.exception(u'Unpickle error: %s' % e)
107 return False
108 except Exception, e:
109 self.logger.exception(u'Error: %s' % e)
110 return False
111 else:
112 self.logger.debug(u'Unpickled state')
113 return new_state
114 else:
115 self.logger.info(u'Statefile does not exist yet')
116 return False
117
118
120 """
121 Write state to file L{self._filename} . The highest available
122 pickling protocol is used, this most certainly a binary
123 representation.
124
125 @return: C{True} if all went fine, C{False} otherwise.
126 @rtype: bool
127
128 """
129 try:
130 f = open(self._filename, u'w')
131 except IOError, e:
132 self.logger.exception(
133 u'I/O error, could not write state file %s' % e)
134 return False
135 else:
136 self.logger.debug(u'State file %s opened for writing'
137 % self._filename)
138 try:
139 cPickle.dump(self, f, cPickle.HIGHEST_PROTOCOL)
140 except cPickle.PicklingError, e:
141 self.logger.exception(u'Pickling error: %s' % e)
142 return False
143 self.logger.debug(u'Pickled state to %s' % self._filename)
144 f.close()
145 return True
146
147
149 """
150 Make C{State} a dict-like object...
151
152 @param key: The key for the dict entry.
153 @param item: The item that will be stored under C{key}.
154
155 """
156 self.url_map[key] = item
157
158
160 """
161 Make C{State} a dict-like object...
162
163 @param key: Get the item that is stored under C{key}.
164 @return: The stored item.
165
166 """
167 return self.url_map[key]
168
169
170
171
172
173
174
175
176
177
179 """
180 Return printable and readable representation. Heaving set the logging
181 level to DEBUG increases verbosity so that all urls of the
182 L{url_map} are included.
183
184 @return: A readable and printable representation of the instance.
185 @rtype: string
186
187 """
188 state = u'State for "%s":' % self.source_name
189 state = u'%s\n\tFile: %s' % (state, self._filename)
190 state = u'%s\n\tLast query: %s' % (state, self.last_query)
191 if LOG_LEVEL == DEBUG:
192 state = u'%s\n\tURL map:' % state
193 for k,v in self.url_map.items():
194 state = u'%s\n\t\t%s : %s' % (state, k, v)
195 else:
196 state = u'%s\n\tURL map length: %s' % (state, len(self.url_map))
197 return state
198