from pandas import * from ulmo.usgs import nwis import numpy as np import os ref_file = 'ref_gauge_ids.txt' data_dir = 'usgs_data_ulmo' #cache_db = 'cache/usgs_cache.h5' if not(os.path.exists(data_dir)): os.makedirs(data_dir) ids = read_csv(ref_file, header=None, names=['site_id'], dtype={'site_id': np.character}) for site_id in ids['site_id']: data_file = os.path.join(data_dir, site_id + '.csv') if not(os.path.exists(data_file)): # the site may not exist try: # download and cache site data (this will take a long time the first time) # currently downloads all available parameters nwis.hdf5.update_site_data(site_id) # read daily mean discharge data from cache (statistics code 00003) data = nwis.hdf5.get_site_data(site_id, parameter_code='00060:00003')['00060:00003'] # convert data to a pandas dataframe df = DataFrame(data['values']).drop(['last_checked','last_modified'], axis=1) df.value = df.value.apply(np.float) df.datetime = to_datetime(df.datetime) # mark bad data as NaN df[df.values == -999999] = np.nan # df.to_sql('flow', con, if_exists='append') df.to_csv(data_file, index=False) except: print 'Could not get data for site %s\n' % (site_id)