Put the following in a file named url_scrape.py.
'''Prints a list of URLs that are found in standard input.
It will only find URLs between quotes ("" or '') and starting with http://
# Pattern for fully-qualified URLs:
url_pattern = re.compile('''["']http://[^+]*?['"]''')
# build list of all URLs found in standard input
s = sys.stdin.read()
all = url_pattern.findall(s)
# output all the URLs
for i in all:
wget -O - http://madphilosopher.ca/ | ./url_scrape.py | sort | uniq