# set up bucket client
# stop annoying warnings
import warnings
"ignore", message="Your application has authenticated using end user credentials")
warnings.filterwarnings(
from google.cloud import storage
from pathlib import Path
# === Set these ===
= "nmfs_odp_nwfsc"
bucket_name
# Create client and bucket
= storage.Client(project="noaa-gcs-public-data")
client = client.bucket(bucket_name) bucket
Uploading to Google Cloud Storage
Uploading to Google Cloud
Prerequisites
The py-rocket-geospatial-2
image on the NMFS Openscapes JuptyerHub is already set up with this.
pip install google-cloud-storage
sudo apt-get install google-cloud-sdk
You need to have the Storage Admin role on the bucket or on the folder of a bucket. For example, if you will be uploading to the NOAA Fisheries Google NODD Public Buckets, you will need to be added as a Storage Admin role to a specific folder.
Authenticate
Run the following in a terminal. It will open and you authenticate there. It will save application_default_credentials.json
to ~/.config/gcloud
. If you do not have google-cloud-sdk
installed, you can install somewhere (like locally) and then copy that file and create it here (in hub or whereever you are running this tutorial).
gcloud auth application-default login
Upload a netcdf file
The code to create littlecube.nc
is below.
Upload a netcdf file
The code to create littlecube.nc
is below.
# Set the file you want to test with
= Path("littlecube.nc") # change this if using a different file
test_file = "CB/test"
destination_prefix
# Create blob and upload
= f"{destination_prefix}/{test_file.name}"
blob_path = bucket.blob(blob_path)
blob str(test_file))
blob.upload_from_filename(
print(f"Uploaded {test_file.name} → gs://{bucket_name}/{blob_path}")
Uploaded littlecube.nc → gs://nmfs_odp_nwfsc/CB/test/littlecube.nc
Lazy loading one file
import xarray as xr
import fsspec
= "gcs://nmfs_odp_nwfsc/CB/test/littlecube.nc"
url = fsspec.filesystem("gcs", anon=True) # anon=True since this is a public bucket
fs = fs.open(url, mode="rb") # Open file
f = xr.open_dataset(f) # lazy load ds
ds
<xarray.Dataset> Size: 8kB Dimensions: (lat: 8, lon: 8, time: 31) Coordinates: * lat (lat) float32 32B 33.62 33.88 34.12 ... 34.88 35.12 35.38 * lon (lon) float32 32B -75.38 -75.12 -74.88 ... -73.88 -73.62 * time (time) datetime64[ns] 248B 2020-01-01 ... 2020-01-31 Data variables: analysed_sst (time, lat, lon) float32 8kB ...
"analysed_sst"].mean(dim="time").plot() ds[
"analysed_sst"].mean(dim=["lat", "lon"]).plot() ds[
# when completely done
# close the file when you're completely done f.close()
Summary
We uploaded netcdf and Zarr directory to Google Cloud. Some workflows are based on downloading netcdf files, so I uploaded those but if you want to interact with the data by only getting the subsets that you need, then you will want to work with the Zarr files. Unfortunately, R tooling does not yet work well with Zarr files, but it is catching up.
import gcsfs
= gcsfs.GCSFileSystem(token="/home/jovyan/.config/gcloud/application_default_credentials.json")
fs
= "nmfs_odp_nwfsc/CB/nwm_daily_means/wr18"
bucket_prefix
# List all files under the prefix
= fs.ls(bucket_prefix)
files
# Delete each file
for f in files:
print(f"Deleting {f}")
=True)
fs.rm(f, recursive
print("✅ Folder deleted.")
Create a test file
import earthaccess
= 'AVHRR_OI-NCEI-L4-GLOB-v2.1'
short_name = "2.1"
version = ("2020-01-02", "2020-01-31")
date_range
= earthaccess.search_data(
results = short_name,
short_name = version,
version = date_range,
temporal =True
cloud_hosted
)= earthaccess.open(results)
fileset import xarray as xr
= xr.open_mfdataset(fileset)
ds = ds['analysed_sst'].sel(lat=slice(33.5, 35.5), lon=slice(-75.5, -73.5))
dc "littlecube.nc") dc.to_netcdf(
31