import earthaccess
from pprint import pprint
import xarray as xr
import geopandas as gpd
import regionmask
import os
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
import warnings
"ignore") warnings.filterwarnings(
Extract data within a boundary
📘 Learning Objectives
- How to trim satellite data to specific bounding coordinates
- How to apply shapefiles as masks to satellite data
- How to compute and plot means within the shapefiles
Summary
This example is adapted from the NOAA CoastWatch Tutorial Github repository.. It shows you how to get a time series of daily SST within a region defined by a shapefile.
For those not working in the JupyterHub
and then create a code cell and run pip install earthaccess
, pip install regionmask
, pip install cartopy
Datasets used
GHRSST Level 4 AVHRR_OI Global Blended Sea Surface Temperature Analysis (GDS2) from NCEI
This NOAA blended SST is a moderate resolution satellite-based gap-free sea surface temperature (SST) product. We will use the daily data. https://cmr.earthdata.nasa.gov/search/concepts/C2036881712-POCLOUD.html
Longhurst Marine Provinces
The dataset represents the division of the world oceans into provinces as defined by Longhurst (1995; 1998; 2006). This division has been based on the prevailing role of physical forcing as a regulator of phytoplankton distribution.
The Longhurst Marine Provinces dataset is available online (https://www.marineregions.org/downloads.php) and within the shapes folder associated with this repository. For this exercise we will use the Gulf Stream province (ProvCode: GFST)
Import packages and authenticate
"HOME"] = "/home/jovyan" os.environ[
= earthaccess.login()
auth # are we authenticated?
if not auth.authenticated:
# ask for credentials and persist them in a .netrc file
="interactive", persist=True) auth.login(strategy
Load the Longhurst Provinces shape files into a geopandas dataframe
#shape_path = '../resources/longhurst_v4_2010/Longhurst_world_v4_2010.shp'
= os.path.join('..',
shape_path 'resources',
'longhurst_v4_2010',
'Longhurst_world_v4_2010.shp'
)= gpd.read_file(shape_path)
shapefiles 8) shapefiles.head(
ProvCode | ProvDescr | geometry | |
---|---|---|---|
0 | BPLR | Polar - Boreal Polar Province (POLR) | MULTIPOLYGON (((-161.18426 63.50000, -161.5000... |
1 | ARCT | Polar - Atlantic Arctic Province | MULTIPOLYGON (((-21.51305 64.64409, -21.55945 ... |
2 | SARC | Polar - Atlantic Subarctic Province | MULTIPOLYGON (((11.26472 63.96082, 11.09548 63... |
3 | NADR | Westerlies - N. Atlantic Drift Province (WWDR) | POLYGON ((-11.50000 57.50000, -11.50000 56.500... |
4 | GFST | Westerlies - Gulf Stream Province | POLYGON ((-43.50000 43.50000, -43.50000 42.500... |
5 | NASW | Westerlies - N. Atlantic Subtropical Gyral Pro... | POLYGON ((-39.50000 25.50000, -40.50000 25.500... |
6 | NATR | Trades - N. Atlantic Tropical Gyral Province (... | MULTIPOLYGON (((-72.34673 18.53597, -72.36877 ... |
7 | WTRA | Trades - Western Tropical Atlantic Province | POLYGON ((-19.50000 -6.50000, -20.50000 -6.500... |
Isolate the Gulf Stream Province
The Gulf Stream Province can be isolated using its ProvCode (GFST)
= "GFST"
ProvCode
# Locate the row with the ProvCode code
= shapefiles.loc[shapefiles["ProvCode"] == ProvCode]
gulf_stream gulf_stream
ProvCode | ProvDescr | geometry | |
---|---|---|---|
4 | GFST | Westerlies - Gulf Stream Province | POLYGON ((-43.50000 43.50000, -43.50000 42.500... |
Find the coordinates of the bounding box
- The bounding box is the smallest rectangle that will completely enclose the province.
- We will use the bounding box coordinates to subset the satellite data
= gulf_stream.bounds
gs_bnds gs_bnds
minx | miny | maxx | maxy | |
---|---|---|---|---|
4 | -73.5 | 33.5 | -43.5 | 43.5 |
Search and access NASA Earthdata with the Collection Concept ID
Shortname | Collection Concept ID | DOI |
---|---|---|
AVHRR_OI-NCEI-L4-GLOB-v2.1 | C2036881712-POCLOUD | 10.5067/GHAAO-4BC21 |
# Search Dataset Unique ID
= 'C2036881712-POCLOUD'
collection_id = earthaccess.search_data(
results = collection_id,
concept_id )
Granules found: 3053
# Define date range and bounding box and search
= ("2020-01-16", "2020-3-16")
date_range # (xmin=-73.5, ymin=33.5, xmax=-43.5, ymax=43.5)
= (gs_bnds.minx, gs_bnds.miny, gs_bnds.maxx, gs_bnds.maxy) bbox
# Get results based on date range and bbox
= earthaccess.search_data(
results = collection_id,
concept_id = True,
cloud_hosted = date_range,
temporal = bbox,
bounding_box )
Granules found: 62
#Examine results
= results[0]
item #type(item)
#item.keys()
'umm'] item[
{'TemporalExtent': {'RangeDateTime': {'EndingDateTime': '2020-01-16T00:00:00.000Z',
'BeginningDateTime': '2020-01-15T00:00:00.000Z'}},
'MetadataSpecification': {'URL': 'https://cdn.earthdata.nasa.gov/umm/granule/v1.6.5',
'Name': 'UMM-G',
'Version': '1.6.5'},
'GranuleUR': '20200115120000-NCEI-L4_GHRSST-SSTblend-AVHRR_OI-GLOB-v02.0-fv02.1',
'ProviderDates': [{'Type': 'Insert', 'Date': '2021-05-11T17:17:16.602Z'},
{'Type': 'Update', 'Date': '2021-05-11T17:17:16.603Z'}],
'SpatialExtent': {'HorizontalSpatialDomain': {'Geometry': {'BoundingRectangles': [{'WestBoundingCoordinate': -179.875,
'SouthBoundingCoordinate': -89.875,
'EastBoundingCoordinate': 179.875,
'NorthBoundingCoordinate': 89.875}]}}},
'DataGranule': {'ArchiveAndDistributionInformation': [{'SizeUnit': 'MB',
'Size': 9.72747802734375e-05,
'Checksum': {'Value': 'f7f272ac28fd5563da4a03bc9c74a9c2',
'Algorithm': 'MD5'},
'SizeInBytes': 102,
'Name': '20200115120000-NCEI-L4_GHRSST-SSTblend-AVHRR_OI-GLOB-v02.0-fv02.1.nc.md5'},
{'SizeUnit': 'MB',
'Size': 0.9890298843383789,
'Checksum': {'Value': '779e063e913ff1df721b252bbc4eb3b9',
'Algorithm': 'MD5'},
'SizeInBytes': 1037073,
'Name': '20200115120000-NCEI-L4_GHRSST-SSTblend-AVHRR_OI-GLOB-v02.0-fv02.1.nc'}],
'DayNightFlag': 'Unspecified',
'ProductionDateTime': '2020-02-11T00:00:00.000Z'},
'CollectionReference': {'Version': '2.1',
'ShortName': 'AVHRR_OI-NCEI-L4-GLOB-v2.1'},
'RelatedUrls': [{'URL': 's3://podaac-ops-cumulus-protected/AVHRR_OI-NCEI-L4-GLOB-v2.1/20200115120000-NCEI-L4_GHRSST-SSTblend-AVHRR_OI-GLOB-v02.0-fv02.1.nc',
'Type': 'GET DATA VIA DIRECT ACCESS',
'Description': 'This link provides direct download access via S3 to the granule.'},
{'URL': 'https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-public/AVHRR_OI-NCEI-L4-GLOB-v2.1/20200115120000-NCEI-L4_GHRSST-SSTblend-AVHRR_OI-GLOB-v02.0-fv02.1.nc.md5',
'Description': 'Download 20200115120000-NCEI-L4_GHRSST-SSTblend-AVHRR_OI-GLOB-v02.0-fv02.1.nc.md5',
'Type': 'EXTENDED METADATA'},
{'URL': 'https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/AVHRR_OI-NCEI-L4-GLOB-v2.1/20200115120000-NCEI-L4_GHRSST-SSTblend-AVHRR_OI-GLOB-v02.0-fv02.1.nc',
'Description': 'Download 20200115120000-NCEI-L4_GHRSST-SSTblend-AVHRR_OI-GLOB-v02.0-fv02.1.nc',
'Type': 'GET DATA'},
{'URL': 'https://archive.podaac.earthdata.nasa.gov/s3credentials',
'Description': 'api endpoint to retrieve temporary credentials valid for same-region direct s3 access',
'Type': 'VIEW RELATED INFORMATION'},
{'URL': 'https://opendap.earthdata.nasa.gov/providers/POCLOUD/collections/GHRSST%20Level%204%20AVHRR_OI%20Global%20Blended%20Sea%20Surface%20Temperature%20Analysis%20(GDS2)%20from%20NCEI/granules/20200115120000-NCEI-L4_GHRSST-SSTblend-AVHRR_OI-GLOB-v02.0-fv02.1',
'Type': 'USE SERVICE API',
'Subtype': 'OPENDAP DATA',
'Description': 'OPeNDAP request URL'}]}
# Get the first 30 files
= earthaccess.open(results[1:30]) fileset
Opening 29 granules, approx size: 0.03 GB
Load the satellite data
# Load data; takes awhile
= xr.open_mfdataset(fileset, chunks = {}) ds
# Get SST data
= ds['analysed_sst'] ds_subset
Visualize the unmasked data on a map
The map shows the full extent of the bounding box
=(14, 10))
plt.figure(figsize
# Label axes of a Plate Carree projection with a central longitude of 180:
= plt.subplot(211, projection=ccrs.PlateCarree(central_longitude=180))
ax1
# Use the lon and lat ranges to set the extent of the map
# the 120, 260 lon range will show the whole Pacific
# the 15, 55 lat range with capture the range of the data
260, 350, 15, 55], ccrs.PlateCarree())
ax1.set_extent([
# set the tick marks to be slightly inside the map extents
# add feature to the map
='0.6')
ax1.add_feature(cfeature.LAND, facecolor
ax1.coastlines()
# format the lat and lon axis labels
= LongitudeFormatter(zero_direction_label=True)
lon_formatter = LatitudeFormatter()
lat_formatter
ax1.xaxis.set_major_formatter(lon_formatter)
ax1.yaxis.set_major_formatter(lat_formatter)
= ds_subset[0].plot.pcolormesh(ax=ax1, transform=ccrs.PlateCarree(), cmap='jet')
sst_cm 'Satellite Data Before Masking') plt.title(
Text(0.5, 1.0, 'Satellite Data Before Masking')
Create the region from the shape file
The plot shows the shape of the region and its placement along the US East Coast.
= regionmask.from_geopandas(gulf_stream)
region region.plot()
Mask the satellite data
# Create the mask
= region.mask(ds_subset.lon, ds_subset.lat)
mask
# Apply mask the the satellite data
= ds_subset.where(mask == region.numbers[0]) masked_ds
Visualize the masked data on a map
These data have been trimmed to contain only values within the Gulf Stream Province
=(14, 10))
plt.figure(figsize
# Label axes of a Plate Carree projection with a central longitude of 180:
= plt.subplot(211, projection=ccrs.PlateCarree(central_longitude=180))
ax1
# Use the lon and lat ranges to set the extent of the map
# the 120, 260 lon range will show the whole Pacific
# the 15, 55 lat range with capture the range of the data
260, 350, 15, 55], ccrs.PlateCarree())
ax1.set_extent([
# add feature to the map
='0.6')
ax1.add_feature(cfeature.LAND, facecolor
ax1.coastlines()
0].plot.pcolormesh(ax=ax1,
masked_ds[=ccrs.PlateCarree(),
transform='jet')
cmap
'Satellite Data After Masking for Longhurst GFST'); plt.title(
Text(0.5, 1.0, 'Satellite Data After Masking for Longhurst GFST')
Calculate the mean temperature over all timesteps
# Calculate mean over all time steps
= masked_ds.mean(dim='time')
sst_mean_grid
# Map the SST mean
sst_mean_grid.plot()
Calculate the mean temperature over all grids (lat, lon)
# Calculate mean over all grids
= masked_ds.mean(dim=['lat', 'lon'])
sst_mean_time
# Plot the SST mean
=(10, 5))
plt.figure(figsize
plt.plot_date(sst_mean_time.time,
sst_mean_time, 'o', markersize=8,
='gulf stream', c='black',
label='-', linewidth=2)
linestyle
'Gulf Stream Province Daily Mean Temperature 2020')
plt.title('SST (degrees C)')
plt.ylabel('Dates')
plt.xlabel( plt.legend()
References
- AVHRR SST Data
- NASA Earthdata catalog
- To explore a full range of tutorials on accessing and utilizing oceanographic satellite data, visit the NOAA CoastWatch Tutorial Github repository.. These tutorials use ERDDAP servers but code for minulating the data cubes is the same as that use with data from NASA Earthdata.
- Sources for marine shape files: https://www.marineregions.org/downloads.php