Files
tono/backend/importers/hdf5.py
2026-03-30 20:33:28 -07:00

108 lines
3.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Generic HDF5 importer (.h5, .hdf5, .he5).
Each 2-D dataset found in the file is returned as a DataField. Physical
dimensions are read from standard dataset attributes if present:
xreal / yreal physical scan size in metres (fallback: 1e-6)
xoff / yoff position offset in metres (fallback: 0)
si_unit_xy lateral unit string (fallback: "m")
si_unit_z value unit string (fallback: "m")
For Asylum Research / Ergo format files (which store scan metadata in a
sidecar group rather than as dataset attributes), use the ergo_hdf5 importer.
Requires:
pip install h5py
"""
from __future__ import annotations
from pathlib import Path
import numpy as np
from backend.data_types import DataField
extensions = frozenset({".h5", ".hdf5", ".he5"})
calibrated = True # we attempt to read physical metadata
def _iter_2d_datasets(h5file):
"""Yield (name, dataset) for every 2-D numeric dataset in the file."""
import h5py
def _visit(name, obj):
if isinstance(obj, h5py.Dataset) and obj.ndim == 2 and np.issubdtype(obj.dtype, np.number):
results.append((name, obj))
results: list = []
h5file.visititems(_visit)
return results
def _attr_str(attrs, key: str, default: str) -> str:
val = attrs.get(key)
if val is None:
return default
if isinstance(val, bytes):
return val.decode("utf-8", errors="replace").strip() or default
return str(val).strip() or default
def _attr_float(attrs, key: str, default: float) -> float:
val = attrs.get(key)
if val is None:
return default
try:
return float(val)
except (TypeError, ValueError):
return default
def load(path: Path) -> list[DataField]:
try:
import h5py
except ImportError:
raise ImportError("Install 'h5py' to load HDF5 files: pip install h5py")
with h5py.File(str(path), "r") as f:
datasets = _iter_2d_datasets(f)
if not datasets:
raise ValueError(f"No 2-D numeric datasets found in {path.name}")
fields = []
for name, ds in datasets:
data = np.asarray(ds, dtype=np.float64)
attrs = ds.attrs
fields.append(DataField(
data=data,
xreal=_attr_float(attrs, "xreal", 1e-6),
yreal=_attr_float(attrs, "yreal", 1e-6),
xoff=_attr_float(attrs, "xoff", 0.0),
yoff=_attr_float(attrs, "yoff", 0.0),
si_unit_xy=_attr_str(attrs, "si_unit_xy", "m"),
si_unit_z=_attr_str(attrs, "si_unit_z", "m"),
))
return fields
def channel_names(path: Path) -> list[str]:
try:
import h5py
except ImportError:
return []
try:
with h5py.File(str(path), "r") as f:
datasets = _iter_2d_datasets(f)
# Return second-to-last component as display name, or full name for
# top-level datasets.
names = []
for full_name, _ in datasets:
parts = full_name.split("/")
names.append(parts[-2] if len(parts) >= 2 else parts[-1])
return names
except Exception:
return []