1
0
Fork 0
mirror of https://github.com/Findus23/halo_comparison.git synced 2024-09-19 16:03:50 +02:00
halo_comparison/readfiles.py

105 lines
3.8 KiB
Python

import pickle
from dataclasses import dataclass
from pathlib import Path
from typing import Tuple
import h5py
import numpy as np
import pandas as pd
from pandas import DataFrame
@dataclass
class ParticlesMeta:
particle_mass: float
def read_file(file: Path) -> Tuple[pd.DataFrame, ParticlesMeta]:
cache_file = file.with_suffix(".cache.pickle")
meta_cache_file = file.with_suffix(".cache_meta.pickle")
if not (cache_file.exists() and meta_cache_file.exists()):
reference_file = h5py.File(file)
has_fof = "FOFGroupIDs" in reference_file["PartType1"]
try:
masses = reference_file["PartType1"]["Masses"]
if not np.all(masses == masses[0]):
raise ValueError("only equal mass particles are supported for now")
meta = ParticlesMeta(particle_mass=masses[0])
except KeyError:
meta = ParticlesMeta(particle_mass=0)
df = pd.DataFrame(
reference_file["PartType1"]["Coordinates"], columns=["X", "Y", "Z"]
)
if has_fof:
df2 = pd.DataFrame(
reference_file["PartType1"]["FOFGroupIDs"], columns=["FOFGroupIDs"]
).astype("category")
df = df.merge(df2, "outer", left_index=True, right_index=True)
del df2
df3 = pd.DataFrame(
reference_file["PartType1"]["ParticleIDs"], columns=["ParticleIDs"]
)
df = df.merge(df3, "outer", left_index=True, right_index=True)
del df3
df.set_index("ParticleIDs", inplace=True)
if has_fof:
print("sorting")
df.sort_values("FOFGroupIDs", inplace=True)
print("saving cache")
with meta_cache_file.open("wb") as f:
pickle.dump(meta, f)
df.to_pickle(str(cache_file))
reference_file.close()
return df, meta
print("from cache")
df = pd.read_pickle(str(cache_file))
with meta_cache_file.open("rb") as f:
meta = pickle.load(f)
return df, meta
def read_halo_file(file: Path) -> DataFrame:
# file = path / "fof_output_0004.hdf5"
reference_file = h5py.File(file)
df1 = pd.DataFrame(reference_file["Groups"]["Centres"], columns=["X", "Y", "Z"])
df2 = pd.DataFrame(reference_file["Groups"]["GroupIDs"], columns=["GroupIDs"])
df3 = pd.DataFrame(reference_file["Groups"]["Masses"], columns=["Masses"])
df4 = pd.DataFrame(reference_file["Groups"]["Sizes"], columns=["Sizes"])
df = df1.merge(df2, "outer", left_index=True, right_index=True)
df = df.merge(df3, "outer", left_index=True, right_index=True)
df = df.merge(df4, "outer", left_index=True, right_index=True)
df.set_index("GroupIDs", inplace=True)
return df
def read_fof_file(path: Path):
file = path / ""
def read_g4_file(file: Path, zoom_type: str) -> Tuple[np.ndarray, np.ndarray, float, float]:
with h5py.File(file) as reference_file:
hubble_param = reference_file["Parameters"].attrs["HubbleParam"]
masstable = reference_file['Header'].attrs['MassTable']
if zoom_type == 'pbh':
highres_parttype = 'PartType0'
lowres_parttype = 'PartType1'
highres_mass = masstable[0] / hubble_param
lowres_mass = masstable[1] / hubble_param
elif zoom_type == 'cdm':
highres_parttype = 'PartType1'
lowres_parttype = 'PartType2'
highres_mass = masstable[1] / hubble_param
lowres_mass = masstable[2] / hubble_param
else:
raise ValueError('Please select pbh or cdm as zoom_type!')
# all coordinates in Mpc/h without adaption!
highres_coordinates = reference_file[highres_parttype]["Coordinates"][:]
lowres_coordinates = reference_file[lowres_parttype]["Coordinates"][:]
return highres_coordinates, lowres_coordinates, highres_mass, lowres_mass