#!/usr/local/bin/python # itunes-url-decoder.py # # A quick and dirty Python hack to retrieve the RSS feed URL from an # iTunes Music Store (ITMS) podcast URL. # # Written in August 2007 by Andrew Clarke # and released to the public domain. __MYVERSION__ = "1.3" __MYDATE__ = "2007-08-14" import sys, urllib2, gzip, StringIO, xml.parsers.expat feedURL = '' have_key = False def usage(): print "Usage: " + sys.argv[0] + " url [url...]" sys.exit(1) if len(sys.argv) < 2: usage() def http_get(uri): request = urllib2.Request(uri) request.add_header("Accept-encoding", "gzip") usock = urllib2.urlopen(request) data = usock.read() if usock.headers.get('content-encoding', None) == 'gzip': data = gzip.GzipFile(fileobj=StringIO.StringIO(data)).read() return data def get_xml_url(data): # HTML scraping... open_str = "itmsOpen('" start = data.find(open_str) if start == -1: return '' data = data[start + len(open_str):] data = data.replace("itms://", "http://") end = data.find("'") return data[:end] def char_data(data): # This feels like a terrible hack global have_key, feedURL if feedURL != '': return if have_key: feedURL = data else: if data == 'feedURL': have_key = True for arg in sys.argv[1:]: feedURL = '' have_key = False xml_url = get_xml_url(http_get(arg)) if xml_url == '': print >> sys.stderr, sys.argv[0] + ' error: itmsOpen() not found in ' + arg continue xml_data = http_get(xml_url) x = xml.parsers.expat.ParserCreate() x.CharacterDataHandler = char_data x.Parse(xml_data, True) if feedURL == '': print >> sys.stderr, sys.argv[0] + ' error: feedURL not found in XML file ' + xml_url else: print feedURL