1
0
Fork 0
mirror of https://github.com/Findus23/halo_comparison.git synced 2024-09-19 16:03:50 +02:00
halo_comparison/readfiles.py

114 lines
4.2 KiB
Python
Raw Normal View History

2022-05-05 11:10:07 +02:00
import pickle
from dataclasses import dataclass
2022-05-04 13:42:57 +02:00
from pathlib import Path
2022-05-05 11:10:07 +02:00
from typing import Tuple
2022-05-04 13:42:57 +02:00
import h5py
2022-05-05 11:10:07 +02:00
import numpy as np
2022-05-04 13:42:57 +02:00
import pandas as pd
from pandas import DataFrame
2022-05-05 11:10:07 +02:00
@dataclass
class ParticlesMeta:
particle_mass: float
def read_file(file: Path) -> Tuple[pd.DataFrame, ParticlesMeta]:
cache_file = file.with_suffix(".cache.pickle")
meta_cache_file = file.with_suffix(".cache_meta.pickle")
2022-05-05 11:10:07 +02:00
if not (cache_file.exists() and meta_cache_file.exists()):
2022-05-04 13:42:57 +02:00
reference_file = h5py.File(file)
2022-06-10 11:06:32 +02:00
has_fof = "FOFGroupIDs" in reference_file["PartType1"]
2022-05-05 11:10:07 +02:00
2022-08-24 23:42:10 +02:00
try:
masses = reference_file["PartType1"]["Masses"]
if not np.all(masses == masses[0]):
raise ValueError("only equal mass particles are supported for now")
meta = ParticlesMeta(particle_mass=masses[0])
except KeyError:
meta = ParticlesMeta(particle_mass=0)
df = pd.DataFrame(
reference_file["PartType1"]["Coordinates"], columns=["X", "Y", "Z"]
)
2022-06-03 10:33:16 +02:00
if has_fof:
df2 = pd.DataFrame(
reference_file["PartType1"]["FOFGroupIDs"], columns=["FOFGroupIDs"]
).astype("category")
2022-06-03 10:33:16 +02:00
df = df.merge(df2, "outer", left_index=True, right_index=True)
del df2
df3 = pd.DataFrame(
reference_file["PartType1"]["ParticleIDs"], columns=["ParticleIDs"]
)
2022-05-04 13:42:57 +02:00
df = df.merge(df3, "outer", left_index=True, right_index=True)
del df3
df.set_index("ParticleIDs", inplace=True)
2022-06-03 10:33:16 +02:00
if has_fof:
print("sorting")
2022-06-10 11:06:32 +02:00
df.sort_values("FOFGroupIDs", inplace=True)
2022-05-04 13:42:57 +02:00
print("saving cache")
2022-05-05 11:10:07 +02:00
with meta_cache_file.open("wb") as f:
pickle.dump(meta, f)
2022-05-04 13:42:57 +02:00
df.to_pickle(str(cache_file))
2022-08-05 17:55:09 +02:00
reference_file.close()
2022-05-05 11:10:07 +02:00
return df, meta
2022-05-04 13:42:57 +02:00
print("from cache")
df = pd.read_pickle(str(cache_file))
2022-05-05 11:10:07 +02:00
with meta_cache_file.open("rb") as f:
meta = pickle.load(f)
return df, meta
2022-05-04 13:42:57 +02:00
def read_halo_file(file: Path) -> DataFrame:
# file = path / "fof_output_0004.hdf5"
2022-05-04 13:42:57 +02:00
reference_file = h5py.File(file)
df1 = pd.DataFrame(reference_file["Groups"]["Centres"], columns=["X", "Y", "Z"])
df2 = pd.DataFrame(reference_file["Groups"]["GroupIDs"], columns=["GroupIDs"])
df3 = pd.DataFrame(reference_file["Groups"]["Masses"], columns=["Masses"])
df4 = pd.DataFrame(reference_file["Groups"]["Sizes"], columns=["Sizes"])
df = df1.merge(df2, "outer", left_index=True, right_index=True)
df = df.merge(df3, "outer", left_index=True, right_index=True)
df = df.merge(df4, "outer", left_index=True, right_index=True)
df.set_index("GroupIDs", inplace=True)
return df
2022-05-09 15:20:10 +02:00
2022-12-15 16:44:14 +01:00
def read_gadget_halos(directory: Path):
reference_file = h5py.File(directory / "output" / "fof_subhalo_tab_019.hdf5")
df1 = pd.DataFrame(reference_file["Subhalo"]["SubhaloPos"], columns=["X", "Y", "Z"])
df2 = pd.DataFrame(reference_file["Subhalo"]["SubhaloMass"], columns=["Masses"])
df = df1.merge(df2, "outer", left_index=True, right_index=True)
return df
2022-06-10 11:06:32 +02:00
def read_fof_file(path: Path):
file = path / ""
2022-08-24 23:42:10 +02:00
def read_g4_file(file: Path, zoom_type: str) -> Tuple[np.ndarray, np.ndarray, float, float]:
with h5py.File(file) as reference_file:
hubble_param = reference_file["Parameters"].attrs["HubbleParam"]
masstable = reference_file['Header'].attrs['MassTable']
if zoom_type == 'pbh':
highres_parttype = 'PartType0'
lowres_parttype = 'PartType1'
highres_mass = masstable[0] / hubble_param
lowres_mass = masstable[1] / hubble_param
2022-08-24 23:42:10 +02:00
elif zoom_type == 'cdm':
highres_parttype = 'PartType1'
lowres_parttype = 'PartType2'
highres_mass = masstable[1] / hubble_param
lowres_mass = masstable[2] / hubble_param
else:
raise ValueError('Please select pbh or cdm as zoom_type!')
2022-08-24 23:42:10 +02:00
# all coordinates in Mpc/h without adaption!
highres_coordinates = reference_file[highres_parttype]["Coordinates"][:]
lowres_coordinates = reference_file[lowres_parttype]["Coordinates"][:]
2022-08-24 23:42:10 +02:00
return highres_coordinates, lowres_coordinates, highres_mass, lowres_mass