7  Example child images

7.1 py-rocket-geospatial

This has many geospatial R and Python packages along with QGIS, Panoply and CoastWatch utils.

Dockerfile

FROM ghcr.io/nmfs-opensci/py-rocket-base/test:latest

LABEL org.opencontainers.image.maintainers="eli.holmes@noaa.gov"
LABEL org.opencontainers.image.author="eli.holmes@noaa.gov"
LABEL org.opencontainers.image.source=https://github.com/nmfs-opensci/container-images/py-rocket-2
LABEL org.opencontainers.image.description="Geospatial Python (3.11) and R (4.4) image with Desktop (QGIS, Panoply, CWUtils)"
LABEL org.opencontainers.image.licenses=Apache2.0
LABEL org.opencontainers.image.version=2024.11.06

# copy files into the build context
COPY . /tmp2/

# The scripts will switch to NB_USER for installation if needed
USER root

# Use install script to take care of installation tasks
RUN /pyrocket_scripts/install-conda-packages.sh /tmp2/environment.yml
RUN /pyrocket_scripts/install-r-packages.sh /tmp2/install.R
RUN /pyrocket_scripts/install-apt-packages.sh /tmp2/apt.txt
RUN /pyrocket_scripts/install-desktop.sh /tmp2/Desktop

# Don't leave the files in the image
RUN rm -rf /tmp2

# install the geospatial libraries and R spatial
# The PATH setting is required because rocker scripts need to NOT have conda on the PATH
RUN PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin && \
    /rocker_scripts/install_geospatial.sh

# Install cwutils
RUN cd /tmp && \
    wget https://www.star.nesdis.noaa.gov/socd/coastwatch/cwf/cwutils-4_0_0_198-linux-x86_64.tar.gz && \
    tar -zxf cwutils-4_0_0_198-linux-x86_64.tar.gz && \
    rm -rf cwutils-4_0_0_198-linux-x86_64.tar.gz
ENV PATH=${PATH}:/tmp/cwutils_4.0.0.198/bin
ENV MANPATH=${MANPATH}:/tmp/cwutils_4.0.0.198/doc/man
ENV INSTALL4J ADD VM PARAMS=-Dsun.java2d.uiScale=2.0

# Install panoply
RUN cd /tmp && \
  wget --user-agent="Mozilla/5.0" https://www.giss.nasa.gov/tools/panoply/download/PanoplyJ-5.5.5.tgz && \
  tar -zxf PanoplyJ-5.5.5.tgz && \
  rm -rf PanoplyJ-5.5.5.tgz
ENV PATH=${PATH}:/tmp/PanoplyJ

USER ${NB_USER}
WORKDIR ${HOME}

environment.yml

name: py-rocket-geospatial 
# 2024-11-01
channels:
  - conda-forge
  - nodefaults

dependencies:
  # Core scientific python stack
  - cython # optimization, C API access
  - flox # optimization, xarray operations
  - hypothesis # needed by numpy testing tools
  - networkx
  - numba # high-performance numerics
  - numpy
  - pandas
  - scikit-image
  - scikit-learn
  - scipy
  - statsmodels
  - pymannkendall # non-parametric Mann-Kendall trend analysis
  - sympy
  - xarray>=2024.05.0

  # Visualization packages
  - bokeh
  - cartopy # geospatial plotting with matplotlib
  - geemap
  - geoviews
  - hvplot
  - ipyleaflet
  - ipympl # This enables matplotlib interaction with jupyter widgets
  - leafmap
  - lonboard
  - matplotlib
  - plotly
  - seaborn # statistical plotting with matplotlib
  - cmocean # colormaps for ocean
  - imageio # helps writing image files
  - apng # create animation from multiple png
  - holoviews
  - graphviz
  - bqplot # 2-D visualization system
  - regionmask # create masks of (geo)spatial regions

  # Machine Learning packages
  - py-xgboost~=2.1.1=cpu*

  # Geospatial data packages
  - geopandas>=0.14.4
  - pygmt
  - rasterio
  - rioxarray
  - rasterstats # summarize geospatial raster datasets based on vector geometries
  - pyresample # resampling geospatial image data
  - shapely # manipulation and analysis of planar geometric objects
  - pyproj
  - datashader

  # File formats and file management, download, dataset caching
  - h5py
  - h5netcdf
  - nco
  - pooch
  - zarr
  - kerchunk
  - rechunker
  - cftime # for decoding time units and variable values in a netCDF file
  - h5coro # reading HDF5 data stored in S3
  - hdf5plugin # provides HDF5 compression filters
  - lxml # processing XML and HTML
  - pynco # python style access to the NetCDF Operators (NCO)

  # Cloud access tools and libraries
  - awscli
  - awscliv2
  - boto3
  - s3fs>=2023.6.0
  # handle login and similar details for accessing earthdata protected data
  # Access datasets exposed via STAC
  - pystac-client
  - stackstac
  # Access datasets exposed via intake catalogs
  - intake
  - intake-esm>=2023.7.7
  - intake-stac==0.4.0
  - intake-xarray==0.6.1
  - gcsfs>=2023.5.0
  - certifi # Root Certificates for validating the trustworthiness of SSL certificates.

  # Specific cloud access libraries
  - copernicusmarine # get data from copernicus
  - earthaccess>=0.11.0 # get data from nasa earth access
  - pydap # OPeNDAP implementation
  - erddapy # connect to erddap servers
  - ecmwflibs # wraps some of European Centre for Medium-Range Weather Forecasts libraries
  - harmony-py

  # Distributed computing
  - dask>=2023.12.1
  - dask-labextension
  - dask-geopandas
  - coiled

  # Other useful generic python packages
  - pillow # Python imaging library, useful for many image-related tasks
  - pytest
  - pytest-cov
  - pep8
  - flake8
  - tqdm # progress bars, with notebook support
  - joblib # lightweight pipelining in Python

  # Packages specific to climate and ocean data work
  - esmpy
  - xmip
  - spectral # pure Python module for processing hyperspectral image data

  # Desktop tools
  - qgis 
  - pyopencl  # Maybe needed for qgis? https://github.com/conda-forge/qgis-feedstock/issues/263
  # Resolves warning "No ICDs were found": https://github.com/CryoInTheCloud/hub-image/issues/50
  - ocl-icd-system
  
  # Quarto
  - quarto
  - pip:
    - xq # Apply XPath expressions to XML
    - jupyterlab-quarto

install.R

#! /usr/local/bin/Rscript
# install R dependencies

# to match rocker/verse:4.4 used in py-rocker-base
# look up the date that the Rocker image was created and put that
repo <- "https://p3m.dev/cran/__linux__/jammy/2024-05-13"

install.packages(c("rstac", "quarto", "aws.s3", "reticulate", "gdalcubes", "rnaturalearth"), repos=repo)
install.packages("rnaturalearthdata", repos=repo)

remotes::install_github('r-tmap/tmap', upgrade=FALSE)
# CRAN version is out of date
devtools::install_github("boettiger-lab/earthdatalogin")

# CoastWatch required
list.of.packages <- c("parsedate", "reshape2", "gridGraphics", "PBSmapping",   
                      "date", "openair", "cmocean", "plotdap", "rerddapXtracto")
install.packages(list.of.packages, repos=repo)

apt.txt

# for qgis
libgl1-mesa-glx