1
2
3
4 """
5 This is the main script to gather all information from the various sources
6 to build the XML file with all the data to be fed to the parser.
7
8 For command line usage see L{usage()} .
9
10 @author: Johannes Schwenk
11 @copyright: 2010, Johannes Schwenk
12 @version: 2.0
13 @date: 2010-09-15
14
15
16 """
17
18 import sys
19
20
21 reload(sys)
22 sys.setdefaultencoding('utf-8')
23
24 import os
25 import getopt
26 import subprocess
27
28 from shutil import copyfile, move
29
30 from tools import removeInvalidUTF8
31 from tools.functions import du
32 from xmlgetter.log import BaseLogger
33 from xmlgetter.controller import Controller
34 from config import TEMPLATES_DIR, TEMP_DIR, OUT_DIR, XML_FILENAME, \
35 PORTALS, PLUGINS, ALWAYS_OUTPUT_STATS_ON_EXIT, COMPLETION_SERVER, \
36 COMPLETION_SERVER_START_DIR, PARSER_DIR, PARSER
37
38 XML_TEMP_FILENAME = (u'%s/%s'
39 % (TEMP_DIR, u'.raw.'.join(XML_FILENAME.split(u'.'))))
40 """
41 Temporary XML file of the acquired data. Will be moved to
42 C{./}L{config.OUT_DIR}C{/}L{config.XML_FILENAME} after L{removeInvalidUTF8} has been
43 called on it.
44 """
45
46
48 """
49 Print out the programs version to stdout.
50
51 """
52 print """getXML.py - Version 1.0"""
53
54
56 """
57 Print out the version and command line usage information.
58
59 Usage::
60 $./getXML.py [options]
61
62 Command line options:
63 -v | --version : Print version information, then exit.
64 -h | --help : Print this text, then exit.
65 -n | --nonet : Do a run without fetching data from the net.
66 -p <PLUGIN_NAME> | --plugin=<PLUGIN_NAME> : Load only the specified plugin.
67 Requires presence of the -s parameter.
68 -s <SOURCE_NAME> | --source=<SOURCE_NAME> : Together with the -p parameter,
69 loads only the plugin for the specified source.
70
71 """
72 version()
73 print """
74 This program fetches data for Plone "portals" and "plugins" and builds a merged
75 XML file with all data, so it can be fed to the completion server.
76
77 Usage:
78 $./getXML.py [options]
79
80 Command line options:
81 -v | --version : Print version information, then exit.
82 -h | --help : Print this text, then exit.
83 -n | --nonet : Do a run without fetching data from the net.
84 -p <PLUGIN_NAME> | --plugin=<PLUGIN_NAME> : Load only the specified plugin.
85 Requires presence of the -s parameter.
86 -s <SOURCE_NAME> | --source=<SOURCE_NAME> : Together with the -p parameter,
87 loads only the plugin for the specified source."""
88
89
90
92 """
93 Collect all needed information from command line arguments and act
94 accordingly.
95
96 Checks for available disk space and issues a warning to the logfile
97 if the remaining space is less than 6 times that of the space already
98 used by the scripts directory and subdirectories.
99
100 If the command line parameters are not -h, -v or their long versions,
101 instantiate a L{Controller} and call its run method.
102
103 """
104
105 logger = BaseLogger(source_name=u'getXML.py').logger
106 logger.info(u'==== START ====')
107
108
109 disk = os.statvfs(u'.')
110
111
112 used = du() / 1048576
113 available = disk.f_bsize * disk.f_bavail / 1048576
114
115 logger.debug(u'Used disk space: %sMB' % used)
116 logger.debug(u'Available disk space: %sMB' % available)
117
118 if available < (6 * used):
119 logger.warn(u'Low disk space: %sMB' % available)
120
121 try:
122 opts, args = getopt.getopt(argv, u'hvnp:s:',
123 [u'help', u'version', u'nonet', u'plugin='])
124 except getopt.GetoptError:
125 usage()
126 sys.exit(2)
127
128 NO_NET = False
129 plugin = None
130 source = None
131
132 for opt, arg in opts:
133 if opt in (u'-h', u'--help'):
134 usage()
135 sys.exit()
136 if opt in (u'-v', u'--version'):
137 version()
138 sys.exit()
139 if opt in (u'-n', u'--nonet'):
140 NO_NET = True
141 logger.info(u'Running in NO_NET mode, no new data will '
142 u'be retrieved')
143 if opt in (u'-p', u'--plugin'):
144 plugin = arg
145 if opt in (u'-s', u'--source'):
146 source = arg
147
148 if (plugin and not source) or (source and not plugin):
149 usage()
150 sys.exit(1)
151
152
153 messages = False
154
155
156 ctrl = Controller(NO_NET, plugin, source)
157 if not ctrl.run() or ALWAYS_OUTPUT_STATS_ON_EXIT:
158 messages = u'%s' % ctrl.stats
159
160
161
162 try:
163 os.system("cat %s/xmlhead.txt > %s"
164 % (TEMPLATES_DIR, XML_TEMP_FILENAME))
165 except Exception, e:
166 errmsg = (u'Error concatenating '
167 u'%s/xmlhead.txt: %s' % (TEMPLATES_DIR, e))
168 logger.critical(errmsg)
169 messages = u'%s\n\n%s' % (messages, errmsg)
170
171
172 names_list = ([p[u'name'] for p in PLUGINS]
173 + [p[u'name'] for p in PORTALS])
174 for name in names_list:
175 xml_filename = u'%s/%s.xml' % (TEMP_DIR, name)
176 try:
177 os.system("cat %s >> %s"
178 % (xml_filename, XML_TEMP_FILENAME))
179 except Exception, e:
180 errmsg = (u'[%s] : Error concatenating '
181 u'%s: %s' % (name, xml_filename, e))
182 logger.critical(errmsg)
183 messages = u'%s\n\n%s' % (messages, errmsg)
184
185 try:
186 os.system("cat %s/xmltail.txt >> %s"
187 % (TEMPLATES_DIR, XML_TEMP_FILENAME))
188 except Exception, e:
189 errmsg = (u'Error concatenating '
190 u'%s/xmltail.txt: %s' % (TEMPLATES_DIR, e))
191 logger.critical(errmsg)
192 messages = u'%s\n\n%s' % (messages, errmsg)
193
194
195 """
196 Remove invalid utf-8 characters from unifr.raw.xml and save the
197 resulting xml file as unifr.xml.
198 """
199 try:
200 logger.info(u'Removing invalid UTF-8 entities from XML file...')
201 removeInvalidUTF8.main(["--input", XML_TEMP_FILENAME,
202 "--output", "%s/%s"
203 % (TEMP_DIR, XML_FILENAME)])
204 except Exception, e:
205 errmsg = u'Error while removing invalid utf-8: %s' % e
206 logger.critical(errmsg)
207 messages = u'%s\n\n%s' % (messages, errmsg)
208
209
210 logger.info(u'Checking output with xmllint...')
211 try:
212 retcode = subprocess.call([u'xmllint', u'--noout',
213 u'%s/%s' % (OUT_DIR, XML_FILENAME)],
214 cwd=OUT_DIR)
215 if retcode < 0:
216 print >>sys.stderr, "Child was terminated by signal", -retcode
217 elif retcode > 0:
218 print >>sys.stderr, "Child returned", retcode
219 except OSError, e:
220 print >>sys.stderr, "Execution failed:", e
221 else:
222
223 move(u'%s/%s' % (TEMP_DIR, XML_FILENAME),
224 u'%s/%s' % (OUT_DIR, XML_FILENAME))
225
226 logger.info(u'Parsing and rebuilding index...')
227 try:
228 retcode = subprocess.call(PARSER,
229 cwd=PARSER_DIR)
230 if retcode < 0:
231 print >>sys.stderr, "Child was terminated by signal", -retcode
232 elif retcode > 0:
233 print >>sys.stderr, "Child returned", retcode
234 except OSError, e:
235 print >>sys.stderr, "Execution failed:", e
236
237
238 logger.info(u'Restarting CompletionServer...')
239 try:
240 retcode = subprocess.call(COMPLETION_SERVER,
241 cwd=COMPLETION_SERVER_START_DIR)
242 if retcode < 0:
243 print >>sys.stderr, "Child was terminated by signal", -retcode
244 elif retcode > 0:
245 print >>sys.stderr, "Child returned", retcode
246 except OSError, e:
247 print >>sys.stderr, "Execution failed:", e
248
249
250 logger.info(ctrl.stats)
251 logger.info(u'---- END ----')
252
253 if messages:
254 sys.exit(messages)
255
256
257 if __name__ == "__main__":
258 run(sys.argv[1:])
259