Package buildxml :: Module getXML
[hide private]
[frames] | no frames]

Source Code for Module buildxml.getXML

  1  #!/usr/bin/python 
  2  # -*- coding: utf-8 -*- 
  3   
  4  """ 
  5  This is the main script to gather all information from the various sources 
  6  to build the XML file with all the data to be fed to the parser. 
  7   
  8  For command line usage see L{usage()} . 
  9   
 10  @author: Johannes Schwenk 
 11  @copyright: 2010, Johannes Schwenk 
 12  @version: 2.0 
 13  @date: 2010-09-15 
 14   
 15   
 16  """ 
 17   
 18  import sys 
 19   
 20  # Imortant! 
 21  reload(sys) 
 22  sys.setdefaultencoding('utf-8') 
 23   
 24  import os 
 25  import getopt 
 26  import subprocess 
 27   
 28  from shutil import copyfile, move 
 29   
 30  from tools import removeInvalidUTF8 
 31  from tools.functions import du 
 32  from xmlgetter.log import BaseLogger 
 33  from xmlgetter.controller import Controller 
 34  from config import TEMPLATES_DIR, TEMP_DIR, OUT_DIR, XML_FILENAME, \ 
 35  PORTALS, PLUGINS, ALWAYS_OUTPUT_STATS_ON_EXIT, COMPLETION_SERVER, \ 
 36  COMPLETION_SERVER_START_DIR, PARSER_DIR, PARSER 
 37   
 38  XML_TEMP_FILENAME = (u'%s/%s' 
 39          % (TEMP_DIR, u'.raw.'.join(XML_FILENAME.split(u'.')))) 
 40  """ 
 41  Temporary XML file of the acquired data. Will be moved to 
 42  C{./}L{config.OUT_DIR}C{/}L{config.XML_FILENAME} after L{removeInvalidUTF8} has been 
 43  called on it. 
 44  """ 
 45   
 46   
47 -def version():
48 """ 49 Print out the programs version to stdout. 50 51 """ 52 print """getXML.py - Version 1.0"""
53 54
55 -def usage():
56 """ 57 Print out the version and command line usage information. 58 59 Usage:: 60 $./getXML.py [options] 61 62 Command line options: 63 -v | --version : Print version information, then exit. 64 -h | --help : Print this text, then exit. 65 -n | --nonet : Do a run without fetching data from the net. 66 -p <PLUGIN_NAME> | --plugin=<PLUGIN_NAME> : Load only the specified plugin. 67 Requires presence of the -s parameter. 68 -s <SOURCE_NAME> | --source=<SOURCE_NAME> : Together with the -p parameter, 69 loads only the plugin for the specified source. 70 71 """ 72 version() 73 print """ 74 This program fetches data for Plone "portals" and "plugins" and builds a merged 75 XML file with all data, so it can be fed to the completion server. 76 77 Usage: 78 $./getXML.py [options] 79 80 Command line options: 81 -v | --version : Print version information, then exit. 82 -h | --help : Print this text, then exit. 83 -n | --nonet : Do a run without fetching data from the net. 84 -p <PLUGIN_NAME> | --plugin=<PLUGIN_NAME> : Load only the specified plugin. 85 Requires presence of the -s parameter. 86 -s <SOURCE_NAME> | --source=<SOURCE_NAME> : Together with the -p parameter, 87 loads only the plugin for the specified source."""
88 89 90
91 -def run(argv):
92 """ 93 Collect all needed information from command line arguments and act 94 accordingly. 95 96 Checks for available disk space and issues a warning to the logfile 97 if the remaining space is less than 6 times that of the space already 98 used by the scripts directory and subdirectories. 99 100 If the command line parameters are not -h, -v or their long versions, 101 instantiate a L{Controller} and call its run method. 102 103 """ 104 # Get a BaseLogger instance to be able to log messages. 105 logger = BaseLogger(source_name=u'getXML.py').logger 106 logger.info(u'==== START ====') 107 108 # Enough available disk space ? 109 disk = os.statvfs(u'.') 110 #capacity = disk.f_bsize * disk.f_blocks 111 #used = (disk.f_bsize * (disk.f_blocks - disk.f_bavail)) / 1048576 112 used = du() / 1048576 # in MB 113 available = disk.f_bsize * disk.f_bavail / 1048576 # in MB 114 115 logger.debug(u'Used disk space: %sMB' % used) 116 logger.debug(u'Available disk space: %sMB' % available) 117 118 if available < (6 * used): 119 logger.warn(u'Low disk space: %sMB' % available) 120 121 try: 122 opts, args = getopt.getopt(argv, u'hvnp:s:', 123 [u'help', u'version', u'nonet', u'plugin=']) 124 except getopt.GetoptError: 125 usage() 126 sys.exit(2) 127 128 NO_NET = False 129 plugin = None 130 source = None 131 132 for opt, arg in opts: 133 if opt in (u'-h', u'--help'): 134 usage() 135 sys.exit() 136 if opt in (u'-v', u'--version'): 137 version() 138 sys.exit() 139 if opt in (u'-n', u'--nonet'): 140 NO_NET = True 141 logger.info(u'Running in NO_NET mode, no new data will ' 142 u'be retrieved') 143 if opt in (u'-p', u'--plugin'): 144 plugin = arg 145 if opt in (u'-s', u'--source'): 146 source = arg 147 148 if (plugin and not source) or (source and not plugin): 149 usage() 150 sys.exit(1) 151 152 153 messages = False 154 155 # Initialize and run the controller. 156 ctrl = Controller(NO_NET, plugin, source) 157 if not ctrl.run() or ALWAYS_OUTPUT_STATS_ON_EXIT: 158 messages = u'%s' % ctrl.stats 159 160 161 # Build the final xml in a raw version. 162 try: 163 os.system("cat %s/xmlhead.txt > %s" 164 % (TEMPLATES_DIR, XML_TEMP_FILENAME)) 165 except Exception, e: 166 errmsg = (u'Error concatenating ' 167 u'%s/xmlhead.txt: %s' % (TEMPLATES_DIR, e)) 168 logger.critical(errmsg) 169 messages = u'%s\n\n%s' % (messages, errmsg) 170 171 # cat all output of portals and plugins together. 172 names_list = ([p[u'name'] for p in PLUGINS] 173 + [p[u'name'] for p in PORTALS]) 174 for name in names_list: 175 xml_filename = u'%s/%s.xml' % (TEMP_DIR, name) 176 try: 177 os.system("cat %s >> %s" 178 % (xml_filename, XML_TEMP_FILENAME)) 179 except Exception, e: 180 errmsg = (u'[%s] : Error concatenating ' 181 u'%s: %s' % (name, xml_filename, e)) 182 logger.critical(errmsg) 183 messages = u'%s\n\n%s' % (messages, errmsg) 184 185 try: 186 os.system("cat %s/xmltail.txt >> %s" 187 % (TEMPLATES_DIR, XML_TEMP_FILENAME)) 188 except Exception, e: 189 errmsg = (u'Error concatenating ' 190 u'%s/xmltail.txt: %s' % (TEMPLATES_DIR, e)) 191 logger.critical(errmsg) 192 messages = u'%s\n\n%s' % (messages, errmsg) 193 194 195 """ 196 Remove invalid utf-8 characters from unifr.raw.xml and save the 197 resulting xml file as unifr.xml. 198 """ 199 try: 200 logger.info(u'Removing invalid UTF-8 entities from XML file...') 201 removeInvalidUTF8.main(["--input", XML_TEMP_FILENAME, 202 "--output", "%s/%s" 203 % (TEMP_DIR, XML_FILENAME)]) 204 except Exception, e: 205 errmsg = u'Error while removing invalid utf-8: %s' % e 206 logger.critical(errmsg) 207 messages = u'%s\n\n%s' % (messages, errmsg) 208 209 210 logger.info(u'Checking output with xmllint...') 211 try: 212 retcode = subprocess.call([u'xmllint', u'--noout', 213 u'%s/%s' % (OUT_DIR, XML_FILENAME)], 214 cwd=OUT_DIR) 215 if retcode < 0: 216 print >>sys.stderr, "Child was terminated by signal", -retcode 217 elif retcode > 0: 218 print >>sys.stderr, "Child returned", retcode 219 except OSError, e: 220 print >>sys.stderr, "Execution failed:", e 221 else: 222 223 move(u'%s/%s' % (TEMP_DIR, XML_FILENAME), 224 u'%s/%s' % (OUT_DIR, XML_FILENAME)) 225 226 logger.info(u'Parsing and rebuilding index...') 227 try: 228 retcode = subprocess.call(PARSER, 229 cwd=PARSER_DIR) 230 if retcode < 0: 231 print >>sys.stderr, "Child was terminated by signal", -retcode 232 elif retcode > 0: 233 print >>sys.stderr, "Child returned", retcode 234 except OSError, e: 235 print >>sys.stderr, "Execution failed:", e 236 237 238 logger.info(u'Restarting CompletionServer...') 239 try: 240 retcode = subprocess.call(COMPLETION_SERVER, 241 cwd=COMPLETION_SERVER_START_DIR) 242 if retcode < 0: 243 print >>sys.stderr, "Child was terminated by signal", -retcode 244 elif retcode > 0: 245 print >>sys.stderr, "Child returned", retcode 246 except OSError, e: 247 print >>sys.stderr, "Execution failed:", e 248 249 # Log statistics information. 250 logger.info(ctrl.stats) 251 logger.info(u'---- END ----') 252 253 if messages: 254 sys.exit(messages)
255 256 257 if __name__ == "__main__": 258 run(sys.argv[1:]) 259