1
0
Fork 0
mirror of https://github.com/Findus23/halo_comparison.git synced 2024-09-19 16:03:50 +02:00
halo_comparison/readfiles.py

69 lines
2.4 KiB
Python
Raw Normal View History

2022-05-05 11:10:07 +02:00
import pickle
2022-05-06 09:51:43 +02:00
import sqlite3
2022-05-05 11:10:07 +02:00
from dataclasses import dataclass
2022-05-04 13:42:57 +02:00
from pathlib import Path
2022-05-05 11:10:07 +02:00
from typing import Tuple
2022-05-04 13:42:57 +02:00
import h5py
2022-05-05 11:10:07 +02:00
import numpy as np
2022-05-04 13:42:57 +02:00
import pandas as pd
from pandas import DataFrame
2022-05-05 11:10:07 +02:00
@dataclass
class ParticlesMeta:
particle_mass: float
def read_file(file: Path) -> Tuple[pd.DataFrame, ParticlesMeta]:
cache_file = file.with_suffix(".cache.pickle")
meta_cache_file = file.with_suffix(".cache_meta.pickle")
2022-05-05 11:10:07 +02:00
if not (cache_file.exists() and meta_cache_file.exists()):
2022-05-04 13:42:57 +02:00
reference_file = h5py.File(file)
2022-05-05 11:10:07 +02:00
masses = reference_file["PartType1"]["Masses"]
if not np.all(masses == masses[0]):
raise ValueError("only equal mass particles are supported for now")
2022-05-04 13:42:57 +02:00
df = pd.DataFrame(reference_file["PartType1"]["Coordinates"], columns=["X", "Y", "Z"])
df2 = pd.DataFrame(reference_file["PartType1"]["FOFGroupIDs"], columns=["FOFGroupIDs"]).astype("category")
df = df.merge(df2, "outer", left_index=True, right_index=True)
del df2
df3 = pd.DataFrame(reference_file["PartType1"]["ParticleIDs"], columns=["ParticleIDs"])
df = df.merge(df3, "outer", left_index=True, right_index=True)
del df3
df.set_index("ParticleIDs", inplace=True)
2022-05-06 09:51:43 +02:00
print("sorting")
df.sort_values("FOFGroupIDs",inplace=True)
2022-05-05 11:10:07 +02:00
meta = ParticlesMeta(
particle_mass=masses[0]
)
2022-05-04 13:42:57 +02:00
print("saving cache")
2022-05-05 11:10:07 +02:00
with meta_cache_file.open("wb") as f:
pickle.dump(meta, f)
2022-05-04 13:42:57 +02:00
df.to_pickle(str(cache_file))
2022-05-05 11:10:07 +02:00
return df, meta
2022-05-04 13:42:57 +02:00
print("from cache")
df = pd.read_pickle(str(cache_file))
2022-05-05 11:10:07 +02:00
with meta_cache_file.open("rb") as f:
meta = pickle.load(f)
return df, meta
2022-05-04 13:42:57 +02:00
def read_halo_file(file: Path) -> DataFrame:
# file = path / "fof_output_0004.hdf5"
2022-05-04 13:42:57 +02:00
reference_file = h5py.File(file)
df1 = pd.DataFrame(reference_file["Groups"]["Centres"], columns=["X", "Y", "Z"])
df2 = pd.DataFrame(reference_file["Groups"]["GroupIDs"], columns=["GroupIDs"])
df3 = pd.DataFrame(reference_file["Groups"]["Masses"], columns=["Masses"])
df4 = pd.DataFrame(reference_file["Groups"]["Sizes"], columns=["Sizes"])
df = df1.merge(df2, "outer", left_index=True, right_index=True)
df = df.merge(df3, "outer", left_index=True, right_index=True)
df = df.merge(df4, "outer", left_index=True, right_index=True)
df.set_index("GroupIDs", inplace=True)
return df
2022-05-09 15:20:10 +02:00
def read_fof_file(path:Path):
file=path/""