Package buildxml :: Package plugins :: Module vkal
[hide private]
[frames] | no frames]

Source Code for Module buildxml.plugins.vkal

  1  #!/usr/bin/python 
  2  # -*- coding: utf-8 -*- 
  3   
  4  """ 
  5  Contains a plugin that harvests all events from the Veranstaltungskalender from 
  6  C{http://www.uni-freiburg.de/universitaet/presse/veranstaltungskalender} . 
  7   
  8  @author: Johannes Schwenk 
  9  @copyright: 2010, Johannes Schwenk 
 10  @version: 1.0 
 11  @date: 2010-09-15 
 12   
 13   
 14  """ 
 15   
 16  import sys 
 17  import shutil 
 18   
 19  # Imortant! 
 20  reload(sys) 
 21  sys.setdefaultencoding('utf-8') 
 22   
 23   
 24  from tools.BeautifulSoup import BeautifulStoneSoup 
 25  from xmlgetter.plugin import BaseSyncPlugin 
 26  from xmlgetter.xml import XMLEntry 
 27   
 28   
29 -class SyncPlugin_vkal(BaseSyncPlugin):
30 """ 31 A plugin that harvests all events from the Veranstaltungskalender from 32 C{http://www.uni-freiburg.de/universitaet/presse/veranstaltungskalender} . 33 34 There is no need for consolidation, so L{_getData} is the only Function 35 that is overridden. 36 37 """ 38 39
40 - def _getData(self):
41 """ 42 Get all events from the Veranstaltungskalender. 43 44 It loads all events from the URL provided in L{config.PLUGINS} as 45 XML and then fetches the details for each entry seperately. 46 47 """ 48 49 # Initialize statistics. 50 self._stats.entries = 0 51 self._stats.new_entries = 0 52 53 response = self._requestURL(self._url) 54 if not response: 55 return False 56 57 soup = BeautifulStoneSoup(response.read()) 58 if not soup: 59 self.logger.warn(u'No soup!') 60 return False 61 62 # Find all tags C{<veranstaltung>} 63 veranstaltungen = soup.findAll('veranstaltung') 64 if not veranstaltungen: 65 self.logger.warn(u'No soup!') 66 return False 67 68 # Generate a L{XMLEntry} for each found event. 69 for veranstaltung in veranstaltungen: 70 url = u"http://www.uni-freiburg.de/universitaet/presse/" +\ 71 u"veranstaltungskalender/?layout=v3&modus=9&" +\ 72 u"beschreibungid=%s&terminid=%s" \ 73 % (veranstaltung.langbeschreibungid.string, \ 74 veranstaltung.termin["id"]) 75 content = u"%s %s %s %s %s %s %s %s %s %s %s" \ 76 % (veranstaltung.publikationsdatum.string, 77 veranstaltung.termin.zeitpunkt.string, 78 veranstaltung.termin.ende.string, 79 veranstaltung.termin.ort.string, 80 veranstaltung.termin.referent.string, 81 veranstaltung.telefon.string, 82 veranstaltung.fax.string, 83 veranstaltung.email.string, 84 veranstaltung.url.string, 85 veranstaltung.ansprechpartner.string, 86 veranstaltung.veranstalter.string) 87 entry = XMLEntry(url, 88 veranstaltung.termin.thema.string, 89 content, 90 veranstaltung.beschreibung.string, 91 created=veranstaltung.publikationsdatum.string, 92 creator=veranstaltung.veranstalter.string, 93 portal_type=u'_vkal_entry', 94 sources=[self._source_name,]) 95 self._entries.append(entry) 96 97 # Update statistics. 98 self._stats.entries = self._stats.entries + 1 99 self._stats.static_entries = self._stats.static_entries + 1 100 101 # Keep memory profile low. 102 if len(self._entries) % 10 == 0: 103 self._writeEntries() 104 105 self.logger.debug(u'Writing remaining entries') 106 self._writeEntries() 107 108 if self._stats.entries == 0: 109 self.logger.warn(u'No entries!') 110 self._stats.messages.append(u'WARNING: No entries! This could be' 111 u' due to the webpage beeing unreachable!') 112 self._stats.status = u'W' 113 return False 114 115 shutil.move(self._intermediate_temp_filename, self._temp_filename) 116 return True
117