1
2
3
4 """
5 This module holds some utility functions.
6
7 @author: Johannes Schwenk
8 @copyright: 2010, Johannes Schwenk
9 @version: 1.0
10 @date: 2010-09-15
11
12 """
13
14 import sys
15
16
17 reload(sys)
18 sys.setdefaultencoding('utf-8')
19
20 import re
21 import htmlentitydefs
22 import os
23
24
25 from datetime import datetime, timedelta
26
27 try:
28 from email.utils import parsedate_tz
29 except ImportError:
30 from email.Utils import parsedate_tz
31
32
33
34 -def du(start_path=u'.'):
35 """
36 Computes the size of the files in the file system subtree starting at
37 C{start_path} .
38
39 @return: The recursive size of C{start_path} in bytes.
40 @rtype: int
41
42 """
43 total_size = 0
44 for dirpath, dirnames, filenames in os.walk(start_path):
45 for f in filenames:
46 fp = os.path.join(dirpath, f)
47 if os.path.exists(fp):
48 total_size += os.path.getsize(fp)
49 return total_size
50
51
53 """
54 Unescape HTML Entities #
55 © October 28, 2006 | Fredrik Lundh
56
57 http://effbot.org/zone/re-sub.htm#unescape-html
58
59 Removes HTML or XML character references and entities from a text string.
60
61 @param text: The HTML (or XML) source text.
62 @return: The plain text, as a Unicode string, if necessary.
63 """
64 def fixup(m):
65 text = m.group(0)
66 if text[:2] == "&#":
67
68 try:
69 if text[:3] == "&#x":
70 return unichr(int(text[3:-1], 16))
71 else:
72 return unichr(int(text[2:-1]))
73 except ValueError:
74 pass
75 else:
76
77 try:
78 text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
79 except KeyError:
80 pass
81 return text
82 return re.sub("&#?\w+;", fixup, text)
83
84
85 """
86 ================== License for parse_date: ===================================
87
88 Copyright (c) 2010 by the Werkzeug Team, see AUTHORS for more details.
89
90 Redistribution and use in source and binary forms, with or without
91 modification, are permitted provided that the following conditions are
92 met:
93
94 * Redistributions of source code must retain the above copyright
95 notice, this list of conditions and the following disclaimer.
96
97 * Redistributions in binary form must reproduce the above
98 copyright notice, this list of conditions and the following
99 disclaimer in the documentation and/or other materials provided
100 with the distribution.
101
102 * The names of the contributors may not be used to endorse or
103 promote products derived from this software without specific
104 prior written permission.
105
106 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
107 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
108 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
109 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
110 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
111 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
112 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
113 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
114 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
115 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
116 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
117
118 """
119
121 """
122 Parses date strings of formats:
123
124 Sun, 06 Nov 1994 08:49:37 GMT
125 Sunday, 06-Nov-94 08:49:37 GMT
126 Sun Nov 6 08:49:37 1994
127
128 @param date: Date as string.
129 @return: A C{datetime.datetime} object or C{None} if parsing failed.
130 """
131 if date:
132 parsed_date = parsedate_tz(date.strip())
133 if not parsed_date == None:
134 try:
135 year = parsed_date[0]
136 """
137 Unfortunately that function does not tell us if two digit
138 years were part of the string, or if they were prefixed
139 with two zeroes. So what we do is to assume that 69-99
140 refer to 1900, and everything below to 2000.
141 """
142 if year >= 0 and year <= 68:
143 year += 2000
144 elif year >= 69 and year <= 99:
145 year += 1900
146 return datetime(*((year,) + parsed_date[1:7])) - \
147 timedelta(seconds=parsed_date[-1] or 0)
148 except (ValueError, OverflowError):
149 return None
150