buildxml.tools.functions

Source Code for Module buildxml.tools.functions

1 # -*- coding: utf-8 -*- 2 3 4 """ 5 This module holds some utility functions. 6 7 @author: Johannes Schwenk 8 @copyright: 2010, Johannes Schwenk 9 @version: 1.0 10 @date: 2010-09-15 11 12 """ 13 14 import sys 15 16 # Imortant! 17 reload(sys) 18 sys.setdefaultencoding('utf-8') 19 20 import re 21 import htmlentitydefs 22 import os 23 24 25 from datetime import datetime, timedelta 26 27 try: 28 from email.utils import parsedate_tz 29 except ImportError: 30 from email.Utils import parsedate_tz 31 32 33

34 -def du(start_path=u'.'):

35 """ 36 Computes the size of the files in the file system subtree starting at 37 C{start_path} . 38 39 @return: The recursive size of C{start_path} in bytes. 40 @rtype: int 41 42 """ 43 total_size = 0 44 for dirpath, dirnames, filenames in os.walk(start_path): 45 for f in filenames: 46 fp = os.path.join(dirpath, f) 47 if os.path.exists(fp): 48 total_size += os.path.getsize(fp) 49 return total_size

50 51

52 -def unescape(text):

53 """ 54 Unescape HTML Entities # 55 © October 28, 2006 | Fredrik Lundh 56 57 http://effbot.org/zone/re-sub.htm#unescape-html 58 59 Removes HTML or XML character references and entities from a text string. 60 61 @param text: The HTML (or XML) source text. 62 @return: The plain text, as a Unicode string, if necessary. 63 """ 64 def fixup(m): 65 text = m.group(0) 66 if text[:2] == "&#": 67 # character reference 68 try: 69 if text[:3] == "&#x": 70 return unichr(int(text[3:-1], 16)) 71 else: 72 return unichr(int(text[2:-1])) 73 except ValueError: 74 pass 75 else: 76 # named entity 77 try: 78 text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) 79 except KeyError: 80 pass 81 return text # leave as is

82 return re.sub("&#?\w+;", fixup, text) 83 84 85 """ 86 ================== License for parse_date: =================================== 87 88 Copyright (c) 2010 by the Werkzeug Team, see AUTHORS for more details. 89 90 Redistribution and use in source and binary forms, with or without 91 modification, are permitted provided that the following conditions are 92 met: 93 94 * Redistributions of source code must retain the above copyright 95 notice, this list of conditions and the following disclaimer. 96 97 * Redistributions in binary form must reproduce the above 98 copyright notice, this list of conditions and the following 99 disclaimer in the documentation and/or other materials provided 100 with the distribution. 101 102 * The names of the contributors may not be used to endorse or 103 promote products derived from this software without specific 104 prior written permission. 105 106 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 107 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 108 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 109 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 110 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 111 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 112 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 113 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 114 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 115 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 116 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 117 118 """ 119

120 -def parse_date(date):

121 """ 122 Parses date strings of formats: 123 124 Sun, 06 Nov 1994 08:49:37 GMT 125 Sunday, 06-Nov-94 08:49:37 GMT 126 Sun Nov 6 08:49:37 1994 127 128 @param date: Date as string. 129 @return: A C{datetime.datetime} object or C{None} if parsing failed. 130 """ 131 if date: 132 parsed_date = parsedate_tz(date.strip()) 133 if not parsed_date == None: 134 try: 135 year = parsed_date[0] 136 """ 137 Unfortunately that function does not tell us if two digit 138 years were part of the string, or if they were prefixed 139 with two zeroes. So what we do is to assume that 69-99 140 refer to 1900, and everything below to 2000. 141 """ 142 if year >= 0 and year <= 68: 143 year += 2000 144 elif year >= 69 and year <= 99: 145 year += 1900 146 return datetime(*((year,) + parsed_date[1:7])) - \ 147 timedelta(seconds=parsed_date[-1] or 0) 148 except (ValueError, OverflowError): 149 return None

150