2022-05-05 11:10:07 +02:00
|
|
|
import pickle
|
|
|
|
from dataclasses import dataclass
|
2022-05-04 13:42:57 +02:00
|
|
|
from pathlib import Path
|
2022-05-05 11:10:07 +02:00
|
|
|
from typing import Tuple
|
2022-05-04 13:42:57 +02:00
|
|
|
|
|
|
|
import h5py
|
2022-05-05 11:10:07 +02:00
|
|
|
import numpy as np
|
2022-05-04 13:42:57 +02:00
|
|
|
import pandas as pd
|
|
|
|
from pandas import DataFrame
|
|
|
|
|
|
|
|
|
2022-05-05 11:10:07 +02:00
|
|
|
@dataclass
|
|
|
|
class ParticlesMeta:
|
|
|
|
particle_mass: float
|
|
|
|
|
|
|
|
|
2022-06-02 11:15:18 +02:00
|
|
|
def read_file(file: Path) -> Tuple[pd.DataFrame, ParticlesMeta]:
|
|
|
|
cache_file = file.with_suffix(".cache.pickle")
|
|
|
|
meta_cache_file = file.with_suffix(".cache_meta.pickle")
|
2022-05-05 11:10:07 +02:00
|
|
|
if not (cache_file.exists() and meta_cache_file.exists()):
|
2022-05-04 13:42:57 +02:00
|
|
|
reference_file = h5py.File(file)
|
2022-06-10 11:06:32 +02:00
|
|
|
has_fof = "FOFGroupIDs" in reference_file["PartType1"]
|
2022-05-05 11:10:07 +02:00
|
|
|
|
2022-08-24 23:42:10 +02:00
|
|
|
try:
|
|
|
|
masses = reference_file["PartType1"]["Masses"]
|
|
|
|
if not np.all(masses == masses[0]):
|
|
|
|
raise ValueError("only equal mass particles are supported for now")
|
|
|
|
meta = ParticlesMeta(particle_mass=masses[0])
|
|
|
|
|
|
|
|
except KeyError:
|
|
|
|
meta = ParticlesMeta(particle_mass=0)
|
2022-08-10 16:26:30 +02:00
|
|
|
df = pd.DataFrame(
|
|
|
|
reference_file["PartType1"]["Coordinates"], columns=["X", "Y", "Z"]
|
|
|
|
)
|
2022-06-03 10:33:16 +02:00
|
|
|
if has_fof:
|
2022-08-10 16:26:30 +02:00
|
|
|
df2 = pd.DataFrame(
|
|
|
|
reference_file["PartType1"]["FOFGroupIDs"], columns=["FOFGroupIDs"]
|
|
|
|
).astype("category")
|
2022-06-03 10:33:16 +02:00
|
|
|
df = df.merge(df2, "outer", left_index=True, right_index=True)
|
|
|
|
del df2
|
2022-08-10 16:26:30 +02:00
|
|
|
df3 = pd.DataFrame(
|
|
|
|
reference_file["PartType1"]["ParticleIDs"], columns=["ParticleIDs"]
|
|
|
|
)
|
2022-05-04 13:42:57 +02:00
|
|
|
|
|
|
|
df = df.merge(df3, "outer", left_index=True, right_index=True)
|
|
|
|
del df3
|
|
|
|
df.set_index("ParticleIDs", inplace=True)
|
2022-06-03 10:33:16 +02:00
|
|
|
if has_fof:
|
|
|
|
print("sorting")
|
2022-06-10 11:06:32 +02:00
|
|
|
df.sort_values("FOFGroupIDs", inplace=True)
|
2022-05-04 13:42:57 +02:00
|
|
|
print("saving cache")
|
2022-05-05 11:10:07 +02:00
|
|
|
with meta_cache_file.open("wb") as f:
|
|
|
|
pickle.dump(meta, f)
|
2022-05-04 13:42:57 +02:00
|
|
|
df.to_pickle(str(cache_file))
|
2022-08-05 17:55:09 +02:00
|
|
|
reference_file.close()
|
2022-05-05 11:10:07 +02:00
|
|
|
return df, meta
|
2022-05-04 13:42:57 +02:00
|
|
|
print("from cache")
|
|
|
|
df = pd.read_pickle(str(cache_file))
|
2022-05-05 11:10:07 +02:00
|
|
|
with meta_cache_file.open("rb") as f:
|
|
|
|
meta = pickle.load(f)
|
|
|
|
return df, meta
|
2022-05-04 13:42:57 +02:00
|
|
|
|
|
|
|
|
2022-06-02 11:15:18 +02:00
|
|
|
def read_halo_file(file: Path) -> DataFrame:
|
|
|
|
# file = path / "fof_output_0004.hdf5"
|
2022-05-04 13:42:57 +02:00
|
|
|
reference_file = h5py.File(file)
|
|
|
|
df1 = pd.DataFrame(reference_file["Groups"]["Centres"], columns=["X", "Y", "Z"])
|
|
|
|
df2 = pd.DataFrame(reference_file["Groups"]["GroupIDs"], columns=["GroupIDs"])
|
|
|
|
df3 = pd.DataFrame(reference_file["Groups"]["Masses"], columns=["Masses"])
|
|
|
|
df4 = pd.DataFrame(reference_file["Groups"]["Sizes"], columns=["Sizes"])
|
|
|
|
df = df1.merge(df2, "outer", left_index=True, right_index=True)
|
|
|
|
df = df.merge(df3, "outer", left_index=True, right_index=True)
|
|
|
|
df = df.merge(df4, "outer", left_index=True, right_index=True)
|
|
|
|
df.set_index("GroupIDs", inplace=True)
|
|
|
|
return df
|
2022-05-09 15:20:10 +02:00
|
|
|
|
|
|
|
|
2022-06-10 11:06:32 +02:00
|
|
|
def read_fof_file(path: Path):
|
|
|
|
file = path / ""
|
2022-08-18 16:10:24 +02:00
|
|
|
|
|
|
|
|
2022-08-24 23:42:10 +02:00
|
|
|
def read_g4_file(file: Path, zoom_type: str) -> Tuple[np.ndarray, np.ndarray, float, float]:
|
|
|
|
with h5py.File(file) as reference_file:
|
|
|
|
|
|
|
|
hubble_param = reference_file["Parameters"].attrs["HubbleParam"]
|
|
|
|
masstable = reference_file['Header'].attrs['MassTable']
|
|
|
|
if zoom_type == 'pbh':
|
|
|
|
highres_parttype = 'PartType0'
|
|
|
|
lowres_parttype = 'PartType1'
|
|
|
|
highres_mass = masstable[0] / hubble_param
|
|
|
|
lowres_mass = masstable[1] / hubble_param
|
2022-08-18 16:10:24 +02:00
|
|
|
|
2022-08-24 23:42:10 +02:00
|
|
|
elif zoom_type == 'cdm':
|
|
|
|
highres_parttype = 'PartType1'
|
|
|
|
lowres_parttype = 'PartType2'
|
|
|
|
highres_mass = masstable[1] / hubble_param
|
|
|
|
lowres_mass = masstable[2] / hubble_param
|
|
|
|
else:
|
|
|
|
raise ValueError('Please select pbh or cdm as zoom_type!')
|
2022-08-18 16:10:24 +02:00
|
|
|
|
2022-08-24 23:42:10 +02:00
|
|
|
# all coordinates in Mpc/h without adaption!
|
|
|
|
highres_coordinates = reference_file[highres_parttype]["Coordinates"][:]
|
|
|
|
lowres_coordinates = reference_file[lowres_parttype]["Coordinates"][:]
|
2022-08-18 16:10:24 +02:00
|
|
|
|
2022-08-24 23:42:10 +02:00
|
|
|
return highres_coordinates, lowres_coordinates, highres_mass, lowres_mass
|