import os
import sys
import getopt
import ConfigParser
from urllib2 import urlopen
def download_file(url, filepath):
""" Download file from remote url """
try:
print ''
print 'Downloading "%s" from url:\n"%s" ...' % (filepath, url)
response = urlopen(url)
local_file = open(filepath, 'wb')
local_file.write(response.read())
response.close()
local_file.close()
return True
except:
return False
def retrieve_single_file(url, target_folder, skip_existing):
return_value = True
message = ''
# remove parameters from query string
tokens = url.split('?')
if len(tokens) > 1:
url = tokens[0]
url = url.replace(' ', '%20')
# # remove protocol (i.e. "http://")
# tokens = url.split('//')
# if len(tokens) > 1:
# url = tokens[len(tokens) - 1]
# build filepath
filename = url.split('/')[-1]
filepath = os.sep.join([target_folder, filename, ])
try:
os.makedirs(target_folder)
except:
pass
if skip_existing and os.path.exists(filepath):
message = '--> SKIPPED : %s' % filename
else:
if download_file(url, filepath):
message = '--> DOWNLOADED : %s' % filename
else:
return_value = False
message = '--> NOT FOUND : %s' % filename
return (return_value, message)
def usage():
print """
Downloads a list of remote files
Usage: $ python batch_download.py [options]
Options:
-h = help
-i = debug with ipython
The list of files to be downloaded is deduced from config file "batch_download.cfg"
Sample config. file:
[general]
target_folder = ./output
skip_existing = False
files = files
[files]
url_01=http://www.brainstorm.it/downloads/setup_dbkit_3.1.0.exe
url_02=http://www.brainstorm.it/downloads/setup_protocol_simulator_3.3.0.exe
"""
def fail(message):
print 'ERROR: ' + message
# Eventually raise exception
# raise Exception(message)
sys.exit(1)
def main():
try:
opts, args = getopt.getopt(sys.argv[1:], "hi?", ["help", ])
except getopt.GetoptError, err:
# print help information and exit:
print str(err) # will print something like "option -a not recognized"
usage()
sys.exit(2)
# scan options
for o, a in opts:
if o in ("-h", "--help", "-?"):
usage()
sys.exit()
elif o in ("-i",):
try:
import IPython
from IPython.Debugger import Tracer
IPython.Shell.IPShell(argv=[])
ipdb_set_trace = Tracer(colors='Linux')
import pdb
pdb.set_trace = ipdb_set_trace
except:
pass
else:
assert False, "unhandled option"
# read config. file
config_filename = os.path.splitext(sys.argv[0])[0] + os.path.extsep + "cfg"
config = ConfigParser.ConfigParser()
if len(config.read(config_filename)) <= 0:
fail('Config. file "%s" not found' % (config_filename))
# process files
general_section = dict(config.items('general'))
target_folder = general_section['target_folder']
skip_existing = general_section['skip_existing'].lower() in ['true', '1', 't', 'y', 'yes', ]
files_section = general_section['files']
urls = [item[1] for item in config.items(files_section)]
for url in urls:
(return_value, message) = retrieve_single_file(url, target_folder, skip_existing)
print message
print 'Done'
if __name__ == "__main__":
main()