108 lines
3.2 KiB
Python
108 lines
3.2 KiB
Python
"""
|
|
Generic HDF5 importer (.h5, .hdf5, .he5).
|
|
|
|
Each 2-D dataset found in the file is returned as a DataField. Physical
|
|
dimensions are read from standard dataset attributes if present:
|
|
|
|
xreal / yreal - physical scan size in metres (fallback: 1e-6)
|
|
xoff / yoff - position offset in metres (fallback: 0)
|
|
si_unit_xy - lateral unit string (fallback: "m")
|
|
si_unit_z - value unit string (fallback: "m")
|
|
|
|
For Asylum Research / Ergo format files (which store scan metadata in a
|
|
sidecar group rather than as dataset attributes), use the ergo_hdf5 importer.
|
|
|
|
Requires:
|
|
pip install h5py
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
import numpy as np
|
|
|
|
from backend.data_types import DataField
|
|
|
|
|
|
extensions = frozenset({".h5", ".hdf5", ".he5"})
|
|
calibrated = True # we attempt to read physical metadata
|
|
|
|
|
|
def _iter_2d_datasets(h5file):
|
|
"""Yield (name, dataset) for every 2-D numeric dataset in the file."""
|
|
import h5py
|
|
|
|
def _visit(name, obj):
|
|
if isinstance(obj, h5py.Dataset) and obj.ndim == 2 and np.issubdtype(obj.dtype, np.number):
|
|
results.append((name, obj))
|
|
|
|
results: list = []
|
|
h5file.visititems(_visit)
|
|
return results
|
|
|
|
|
|
def _attr_str(attrs, key: str, default: str) -> str:
|
|
val = attrs.get(key)
|
|
if val is None:
|
|
return default
|
|
if isinstance(val, bytes):
|
|
return val.decode("utf-8", errors="replace").strip() or default
|
|
return str(val).strip() or default
|
|
|
|
|
|
def _attr_float(attrs, key: str, default: float) -> float:
|
|
val = attrs.get(key)
|
|
if val is None:
|
|
return default
|
|
try:
|
|
return float(val)
|
|
except (TypeError, ValueError):
|
|
return default
|
|
|
|
|
|
def load(path: Path) -> list[DataField]:
|
|
try:
|
|
import h5py
|
|
except ImportError:
|
|
raise ImportError("Install 'h5py' to load HDF5 files: pip install h5py")
|
|
|
|
with h5py.File(str(path), "r") as f:
|
|
datasets = _iter_2d_datasets(f)
|
|
if not datasets:
|
|
raise ValueError(f"No 2-D numeric datasets found in {path.name}")
|
|
|
|
fields = []
|
|
for name, ds in datasets:
|
|
data = np.asarray(ds, dtype=np.float64)
|
|
attrs = ds.attrs
|
|
fields.append(DataField(
|
|
data=data,
|
|
xreal=_attr_float(attrs, "xreal", 1e-6),
|
|
yreal=_attr_float(attrs, "yreal", 1e-6),
|
|
xoff=_attr_float(attrs, "xoff", 0.0),
|
|
yoff=_attr_float(attrs, "yoff", 0.0),
|
|
si_unit_xy=_attr_str(attrs, "si_unit_xy", "m"),
|
|
si_unit_z=_attr_str(attrs, "si_unit_z", "m"),
|
|
))
|
|
return fields
|
|
|
|
|
|
def channel_names(path: Path) -> list[str]:
|
|
try:
|
|
import h5py
|
|
except ImportError:
|
|
return []
|
|
try:
|
|
with h5py.File(str(path), "r") as f:
|
|
datasets = _iter_2d_datasets(f)
|
|
# Return second-to-last component as display name, or full name for
|
|
# top-level datasets.
|
|
names = []
|
|
for full_name, _ in datasets:
|
|
parts = full_name.split("/")
|
|
names.append(parts[-2] if len(parts) >= 2 else parts[-1])
|
|
return names
|
|
except Exception:
|
|
return []
|