Skip to content
Icechunk, Xarray and Zarr Groups

Icechunk, Xarray and Zarr Groups

June 1, 2026

A comprehensive example that combines the following icechunk functionalities.

  • Local filesystem storage
  • Writing multiple xarray.Datasets into individual Zarr groups
  • Creating empty arrays in another group

Local “store” and “repository”

import xarray as xr
import zarr
import rioxarray as xrrio
import icechunk as ic
from icechunk.xarray import to_icechunk

print(f"{ic.__version__ = }")
print(f"{zarr.__version__ = }")
print(f"{xr.__version__ = }")

# ic.__version__ = '1.1.21'
# zarr.__version__ = '3.1.1'
# xr.__version__ = '2025.6.0'

icstore = ic.storage.local_filesystem_storage("/home/alex/Temp/icechunk_repos/ioesri_z0_bydir.ic.zarr")
icrepo = ic.Repository.create(icstore) # Fails if the path already exists

Xarray to Zarr group with transaction

Using a Python context that Icechunk provides for a transaction, a series of individual operations can be performed and committed all together if they complete without throwing errors.

Here two xarray.DataArray objects are committed to a common group hierarchy Mean_2017_2022/02C.

# ...
# Bunch of stuff here, reading and/or computing the mask on a different grid, etc.
# ...
# pmask: xr.DataArray of global bool mask data

# Read GeoTIFF with `rioxarray`
gtifpath = "02C_my_input_tile.tif"
a = xrrio.open_rasterio(gtifpath)
a.name = "roughness_length"

# Clip global mask to tile extent
b = rasterio.warp.transform_bounds(a.rio.crs, "EPSG:4326", *a.rio.bounds())
_m = pmask.rio.clip_box(minx=b[0], miny=b[1], maxx=b[2], maxy=b[3], crs="EPSG:4326")  

# Icechunk transaction context
with icrepo.transaction("main", 
    message=f"Adding mask and {gtifpath} data for a tile"
) as icstore:

    # Write geotiff xarray object to group in `icstore`
    to_icechunk(a.isel(band=0), icstore.session, group="Mean_2017_2022/02C/input_tif")
   
    # Write mask subset to adjacent group in `.../02C`
    to_icechunk(_m, icstore.session, group="Mean_2017_2022/02C/gebco_mask")
    
    # Show current contents of zarr
    icroot = zarr.group(icstore)
    print(icroot.tree())

    # /
    # └── Mean_2017_2022
    #     └── 02C
    #         ├── gebco_mask
    #         │   ├── __xarray_dataarray_variable__ (1930, 2558) int8
    #         │   ├── lat (1930,) float64
    #         │   ├── lon (2558,) float64
    #         │   └── spatial_ref () int64
    #         └── input_tif
    #             ├── band () int64
    #             ├── roughness_length (89414, 20686) float32
    #             ├── spatial_ref () int64
    #             ├── x (20686,) float64
    #             └── y (89414,) float64

Add empty array to Zarr

We can specify space for array data that we will need to write, but have no computed yet with a typical create_array method, where group and array name are specified along with the desired shape and element type.

ashp = a.shape
ndirs = 16

# Icechunk transaction context
with icrepo.transaction("main", 
    message=f"Adding directional roughness output for a tile"
) as icstore:

    outputs = icroot.create_array(
        f"Mean_2017_2022/02C/directional_roughness", 
        shape=(ndirs, ashp[0], ashp[1]), dtype="f4"
    )

    print(icroot.tree())

    # /
    # └── Mean_2017_2022
    #     └── 02C
    #         ├── directional_roughness (16, 89414, 20686) float32
    #         ├── gebco_mask
    #         │   ├── __xarray_dataarray_variable__ (1930, 2558) int8
    #         │   ├── lat (1930,) float64
    #         │   ├── lon (2558,) float64
    #         │   └── spatial_ref () int64
    #         └── input_tif
    #             ├── band () int64
    #             ├── roughness_length (89414, 20686) float32
    #             ├── spatial_ref () int64
    #             ├── x (20686,) float64
    #             └── y (89414,) float64

Fill new Zarr array

Within the same transaction, the new Zarr array is filled incrementally, from a temporary work array.

# ... still within the current transaction ...
    workget = np.zeros((ashp[0], ashp[1]), dtype=np.float32)
                    
    for di in range(ndirs):
        this_kernel = kerns16[di, :, :]
        t = time.time()
        dir_loop(a, this_kernel, workget)
        t1 = time.time()-t
        print(f"time for idir={di}:", t1, t1/3600.)
        print("quantile", np.quantile(workget, [0.5, 0.99]), flush=True)
        outputs[di, :, :] = workget
        workget[:] = 0.