import urllib import re import time data = urllib.urlopen('http://bitly.measuredvoice.com/bitly_archive/?C=M;O=D').read() #print data #datafiles name pattern - usagov_bitly_data2011-07-29-1311919454 p = re.compile('usagov_bitly_data\d{4}-\d{2}-\d{2}-\d{10}') #print p.findall('[   ]usagov_bitly_data2011-07-29-131191945429-Jul-2011 07:04