import xarray as xr
import numpy as np
import pandas as pd
import os, glob, shutil, urllib3
Download data from NOAA ERDDAP using GRIDDAP
Written by Minh Phan
WE DO NOT USE ANY DATA GENERATED FROM THIS NOTEBOOK
This tutorial serves to provide one of many ways a user can download data from NOAA’s ERDDAP HTML link template. Unlike NASA’s EarthData, you don’t need to register an account to download data from NOAA ERRDAP.
Get to know GRIDDAP URL link template
One of the datasets that we used in this project is Reanalysis Data ERA5 monthly 3d Wind velocities. For the default options, select file type as .nc (NetCDF3-binary file) and click ‘Just generate the URL,’ you can see the URL is then https://apdrc.soest.hawaii.edu/erddap/griddap/hawaii_soest_66d3_10d8_0f3c.nc?u[(2023-04-01T00:00:00Z):1:(2023-04-01T00:00:00Z)][(10.0):1:(1000.0)][(-90.0):1:(90.0)][(0.0):1:(359.75)],v[(2023-04-01T00:00:00Z):1:(2023-04-01T00:00:00Z)][(10.0):1:(1000.0)][(-90.0):1:(90.0)][(0.0):1:(359.75)] and it follows this template:
/DATASET_ID.FILE_EXTENSION
following by
VARIABLE_NAME1 [(START_DIM1):STRIDE_DIM1:(STOP_DIM1)][(START_DIM2):STRIDE_DIM2:(STOP_DIM2)][(START_DIM3):STRIDE_DIM3:(STOP_DIM3)]…
and then
VARIABLE_NAME2[(START_DIM1):STRIDE_DIM1:(STOP_DIM1)][(START_DIM2):STRIDE_DIM2:(STOP_DIM2)][(START_DIM3):STRIDE_DIM3:(STOP_DIM3)]…
and so on for all the variables. To illustrate this template, you can liken it to downloading every variable as an Xarray DataArray, and combine them together based on the coordinates. In the URL above, the order and specifications of the dimension slicing options must be the consistent for all variables, just like how Xarray combining by coords only works if the coordinates are so.
The template is also well-explained in GRIDDAP documentation.
We can all agree that trying to write down every template for different datasets is error-prone and tedious, so you are more than welcome to copy and modify your template using the original file generator Python formatted strings to help with generating URLs for downloads. Since URLs vary greatly form dataset to dataset, there is no ideal URL template to consider, but we will still develop a function to generalize the URL generator to the best of my knowledge.
NOTE: for the URL template, we focus on downloading smaller chunks of the dataset, sliced temporally just like downloading from the EarthData database so that the remote server would not be broken handling the request. We also recommend you to download variables together in one NetCDF3 file (multiple variable names in one URL) instead of individual variables as it takes time piecing them all together.
Import necessary libraries
Download data
Unlike NASA Earthdata, we can slice dataset spatially directly for every server request.
def download_ERDDAP_URLs(var):
# get list of URLs
= ERDDAP_URL_template_generator(var)
URLs, months = urllib3.PoolManager()
http
= 'demonstrated data/ERDDAP'
MAIN_FOLDER = var['datasetID']
TEMP_FOLDER = os.path.join(MAIN_FOLDER, TEMP_FOLDER)
path_temp_folder
if not os.path.exists(path_temp_folder):
os.makedirs(path_temp_folder)
for URL, month in zip(URLs, months):
= os.path.join(path_temp_folder, f"{month.strftime('%Y%m')}.nc")
fileout
# download
with http.request('GET', URL, preload_content=False) as resp, open(fileout, 'wb') as out_file:
shutil.copyfileobj(resp, out_file)
resp.release_conn()
print('Downloaded ' + fileout)
def ERDDAP_URL_template_generator(var) -> list():
"""
Generates a list of ERDDAP URLs for downloading
Example of parameters shown below in next code cell.
- datasetID: 'erdTAgeo1day_LonPM180'
- variable_name ['u', 'v']
- coordinates: {'time': '2000-01:1:2000-03', 'altitude': '0:1:0', 'latitude': '-12:1:12', 'longitude': '42:1:102'}
NOTE: 'time' dimension must be listed in order (typical of ERDDAP URLs)
"""
= []
list_of_URLS = dimension_boundaries_ordered['time'].split(':')
time_ele = time_ele[0]
start_month = time_ele[2]
end_month = time_ele[1]
stride_time = pd.date_range(start_month, end_month, freq='M')
months for month_idx in range(len(months)):
= f'https://apdrc.soest.hawaii.edu/erddap/griddap/{var_dict['datasetID']}.nc?'
URL for var in var_dict['variable_names']:
= URL + var + f"[({months[month_idx].strftime('%Y-%m')}-01):{stride_time}:({months[month_idx].strftime('%Y-%m-%d')})]"
URL for d in list(var_dict['coordinates'].values())[1:]:
= d.split(':')
d_ele = URL + f"[({d_ele[0]}):{d_ele[1]}:({d_ele[2]})]"
URL = URL + ","
URL
# eliminate last comma
-1])
list_of_URLS.append(URL[:
# example
# url = "https://apdrc.soest.hawaii.edu/erddap/griddap/hawaii_soest_66d3_10d8_0f3c.nc?"\
# f"u[({months[month_idx].strftime('%Y-%m')}-01):1:({months[month_idx+1].strftime('%Y-%m')})][(10):1:(10)][(-12):1:(32)][(42):1:(102)],"\
# f"v[({months[month_idx].strftime('%Y-%m')}-01):1:({months[month_idx+1].strftime('%Y-%m')})][(10):1:(10)][(-12):1:(32)][(42):1:(102)]"
return list_of_URLS, months
= {
var 'datasetID': 'hawaii_soest_66d3_10d8_0f3c',
'variable_names': ['u', 'v'],
'coordinates': {'time': '2000-01:1:2000-10', # must be in order of the ERDDAP dataset dimension list
'LEV': '10:1:10',
'latitude': '-12:1:12',
'longitude': '42:1:102'}
}
='hawaii_soest_66d3_10d8_0f3c', variable_names=['u', 'v'], dimension_boundaries_ordered={'time': '2000-01:1:2000-10', 'LEV': '10:1:10', 'latitude': '-12:1:12', 'longitude': '42:1:102'}) ERDDAP_URL_template_generator(datasetID
['https://apdrc.soest.hawaii.edu/erddap/griddap/hawaii_soest_66d3_10d8_0f3c.nc?u[(2000-01-01):1:(2000-01-31)][(10):1:(10)][(-12):1:(12)][(42):1:(102)],v[(2000-01-01):1:(2000-01-31)][(10):1:(10)][(-12):1:(12)][(42):1:(102)]',
'https://apdrc.soest.hawaii.edu/erddap/griddap/hawaii_soest_66d3_10d8_0f3c.nc?u[(2000-02-01):1:(2000-02-29)][(10):1:(10)][(-12):1:(12)][(42):1:(102)],v[(2000-02-01):1:(2000-02-29)][(10):1:(10)][(-12):1:(12)][(42):1:(102)]',
'https://apdrc.soest.hawaii.edu/erddap/griddap/hawaii_soest_66d3_10d8_0f3c.nc?u[(2000-03-01):1:(2000-03-31)][(10):1:(10)][(-12):1:(12)][(42):1:(102)],v[(2000-03-01):1:(2000-03-31)][(10):1:(10)][(-12):1:(12)][(42):1:(102)]',
'https://apdrc.soest.hawaii.edu/erddap/griddap/hawaii_soest_66d3_10d8_0f3c.nc?u[(2000-04-01):1:(2000-04-30)][(10):1:(10)][(-12):1:(12)][(42):1:(102)],v[(2000-04-01):1:(2000-04-30)][(10):1:(10)][(-12):1:(12)][(42):1:(102)]',
'https://apdrc.soest.hawaii.edu/erddap/griddap/hawaii_soest_66d3_10d8_0f3c.nc?u[(2000-05-01):1:(2000-05-31)][(10):1:(10)][(-12):1:(12)][(42):1:(102)],v[(2000-05-01):1:(2000-05-31)][(10):1:(10)][(-12):1:(12)][(42):1:(102)]',
'https://apdrc.soest.hawaii.edu/erddap/griddap/hawaii_soest_66d3_10d8_0f3c.nc?u[(2000-06-01):1:(2000-06-30)][(10):1:(10)][(-12):1:(12)][(42):1:(102)],v[(2000-06-01):1:(2000-06-30)][(10):1:(10)][(-12):1:(12)][(42):1:(102)]',
'https://apdrc.soest.hawaii.edu/erddap/griddap/hawaii_soest_66d3_10d8_0f3c.nc?u[(2000-07-01):1:(2000-07-31)][(10):1:(10)][(-12):1:(12)][(42):1:(102)],v[(2000-07-01):1:(2000-07-31)][(10):1:(10)][(-12):1:(12)][(42):1:(102)]',
'https://apdrc.soest.hawaii.edu/erddap/griddap/hawaii_soest_66d3_10d8_0f3c.nc?u[(2000-08-01):1:(2000-08-31)][(10):1:(10)][(-12):1:(12)][(42):1:(102)],v[(2000-08-01):1:(2000-08-31)][(10):1:(10)][(-12):1:(12)][(42):1:(102)]',
'https://apdrc.soest.hawaii.edu/erddap/griddap/hawaii_soest_66d3_10d8_0f3c.nc?u[(2000-09-01):1:(2000-09-30)][(10):1:(10)][(-12):1:(12)][(42):1:(102)],v[(2000-09-01):1:(2000-09-30)][(10):1:(10)][(-12):1:(12)][(42):1:(102)]']