Icechunk, Xarray and Zarr Groups
Icechunk, Xarray and Zarr Groups
June 1, 2026
A comprehensive example that combines the following icechunk functionalities.
- Local filesystem storage
- Writing multiple
xarray.Datasets into individual Zarr groups - Creating empty arrays in another group
Local “store” and “repository”
import xarray as xr
import zarr
import rioxarray as xrrio
import icechunk as ic
from icechunk.xarray import to_icechunk
print(f"{ic.__version__ = }")
print(f"{zarr.__version__ = }")
print(f"{xr.__version__ = }")
# ic.__version__ = '1.1.21'
# zarr.__version__ = '3.1.1'
# xr.__version__ = '2025.6.0'
icstore = ic.storage.local_filesystem_storage("/home/alex/Temp/icechunk_repos/ioesri_z0_bydir.ic.zarr")
icrepo = ic.Repository.create(icstore) # Fails if the path already existsXarray to Zarr group with transaction
Using a Python context that Icechunk provides for a transaction, a series of individual operations can be performed and committed all together if they complete without throwing errors.
Here two xarray.DataArray objects are committed to a common group hierarchy Mean_2017_2022/02C.
# ...
# Bunch of stuff here, reading and/or computing the mask on a different grid, etc.
# ...
# pmask: xr.DataArray of global bool mask data
# Read GeoTIFF with `rioxarray`
gtifpath = "02C_my_input_tile.tif"
a = xrrio.open_rasterio(gtifpath)
a.name = "roughness_length"
# Clip global mask to tile extent
b = rasterio.warp.transform_bounds(a.rio.crs, "EPSG:4326", *a.rio.bounds())
_m = pmask.rio.clip_box(minx=b[0], miny=b[1], maxx=b[2], maxy=b[3], crs="EPSG:4326")
# Icechunk transaction context
with icrepo.transaction("main",
message=f"Adding mask and {gtifpath} data for a tile"
) as icstore:
# Write geotiff xarray object to group in `icstore`
to_icechunk(a.isel(band=0), icstore.session, group="Mean_2017_2022/02C/input_tif")
# Write mask subset to adjacent group in `.../02C`
to_icechunk(_m, icstore.session, group="Mean_2017_2022/02C/gebco_mask")
# Show current contents of zarr
icroot = zarr.group(icstore)
print(icroot.tree())
# /
# └── Mean_2017_2022
# └── 02C
# ├── gebco_mask
# │ ├── __xarray_dataarray_variable__ (1930, 2558) int8
# │ ├── lat (1930,) float64
# │ ├── lon (2558,) float64
# │ └── spatial_ref () int64
# └── input_tif
# ├── band () int64
# ├── roughness_length (89414, 20686) float32
# ├── spatial_ref () int64
# ├── x (20686,) float64
# └── y (89414,) float64Add empty array to Zarr
We can specify space for array data that we will need to write, but have no computed yet with a typical create_array method,
where group and array name are specified along with the desired shape and element type.
ashp = a.shape
ndirs = 16
# Icechunk transaction context
with icrepo.transaction("main",
message=f"Adding directional roughness output for a tile"
) as icstore:
outputs = icroot.create_array(
f"Mean_2017_2022/02C/directional_roughness",
shape=(ndirs, ashp[0], ashp[1]), dtype="f4"
)
print(icroot.tree())
# /
# └── Mean_2017_2022
# └── 02C
# ├── directional_roughness (16, 89414, 20686) float32
# ├── gebco_mask
# │ ├── __xarray_dataarray_variable__ (1930, 2558) int8
# │ ├── lat (1930,) float64
# │ ├── lon (2558,) float64
# │ └── spatial_ref () int64
# └── input_tif
# ├── band () int64
# ├── roughness_length (89414, 20686) float32
# ├── spatial_ref () int64
# ├── x (20686,) float64
# └── y (89414,) float64Fill new Zarr array
Within the same transaction, the new Zarr array is filled incrementally, from a temporary work array.
# ... still within the current transaction ...
workget = np.zeros((ashp[0], ashp[1]), dtype=np.float32)
for di in range(ndirs):
this_kernel = kerns16[di, :, :]
t = time.time()
dir_loop(a, this_kernel, workget)
t1 = time.time()-t
print(f"time for idir={di}:", t1, t1/3600.)
print("quantile", np.quantile(workget, [0.5, 0.99]), flush=True)
outputs[di, :, :] = workget
workget[:] = 0.