import pickle from dataclasses import dataclass from pathlib import Path from typing import Tuple import h5py import numpy as np import pandas as pd from pandas import DataFrame @dataclass class ParticlesMeta: particle_mass: float def read_file(file: Path) -> Tuple[pd.DataFrame, ParticlesMeta]: cache_file = file.with_suffix(".cache.pickle") meta_cache_file = file.with_suffix(".cache_meta.pickle") if not (cache_file.exists() and meta_cache_file.exists()): reference_file = h5py.File(file) has_fof = "FOFGroupIDs" in reference_file["PartType1"] masses = reference_file["PartType1"]["Masses"] if not np.all(masses == masses[0]): raise ValueError("only equal mass particles are supported for now") df = pd.DataFrame( reference_file["PartType1"]["Coordinates"], columns=["X", "Y", "Z"] ) if has_fof: df2 = pd.DataFrame( reference_file["PartType1"]["FOFGroupIDs"], columns=["FOFGroupIDs"] ).astype("category") df = df.merge(df2, "outer", left_index=True, right_index=True) del df2 df3 = pd.DataFrame( reference_file["PartType1"]["ParticleIDs"], columns=["ParticleIDs"] ) df = df.merge(df3, "outer", left_index=True, right_index=True) del df3 df.set_index("ParticleIDs", inplace=True) if has_fof: print("sorting") df.sort_values("FOFGroupIDs", inplace=True) meta = ParticlesMeta(particle_mass=masses[0]) print("saving cache") with meta_cache_file.open("wb") as f: pickle.dump(meta, f) df.to_pickle(str(cache_file)) reference_file.close() return df, meta print("from cache") df = pd.read_pickle(str(cache_file)) with meta_cache_file.open("rb") as f: meta = pickle.load(f) return df, meta def read_halo_file(file: Path) -> DataFrame: # file = path / "fof_output_0004.hdf5" reference_file = h5py.File(file) df1 = pd.DataFrame(reference_file["Groups"]["Centres"], columns=["X", "Y", "Z"]) df2 = pd.DataFrame(reference_file["Groups"]["GroupIDs"], columns=["GroupIDs"]) df3 = pd.DataFrame(reference_file["Groups"]["Masses"], columns=["Masses"]) df4 = pd.DataFrame(reference_file["Groups"]["Sizes"], columns=["Sizes"]) df = df1.merge(df2, "outer", left_index=True, right_index=True) df = df.merge(df3, "outer", left_index=True, right_index=True) df = df.merge(df4, "outer", left_index=True, right_index=True) df.set_index("GroupIDs", inplace=True) return df def read_fof_file(path: Path): file = path / "" def read_g4_file(file: Path, zoom_type: str) -> Tuple[np.ndarray, np.ndarray]: reference_file = h5py.File(file) hubble_param = reference_file["Parameters"].attrs["HubbleParam"] if zoom_type == 'pbh': highres_parttype = 'PartType0' lowres_parttype = 'PartType1' elif zoom_type == 'cdm': highres_parttype = 'PartType1' lowres_parttype = 'PartType2' highres_mass = reference_file['Header'].attrs['MassTable'][1] / hubble_param else: raise ValueError('Please select pbh or cdm as zoom_type!') highres_coordinates = reference_file[highres_parttype]["Coordinates"][:] #all coordinates in Mpc/h without adaption! lowres_coordinates = reference_file[lowres_parttype]["Coordinates"][:] reference_file.close() return highres_coordinates, lowres_coordinates