mirror of
https://github.com/Findus23/halo_comparison.git
synced 2024-09-13 09:03:49 +02:00
39bb626a42
Formatted everything with black
213 lines
7.1 KiB
Python
213 lines
7.1 KiB
Python
from pathlib import Path
|
|
from typing import Dict, Tuple, Optional
|
|
|
|
import h5py
|
|
import numpy as np
|
|
import pandas as pd
|
|
from h5py import Dataset
|
|
from matplotlib import pyplot as plt
|
|
from matplotlib.axes import Axes
|
|
from matplotlib.figure import Figure
|
|
from pandas import DataFrame
|
|
|
|
from halo_mass_profile import V
|
|
from paths import base_dir
|
|
from readfiles import read_file
|
|
from utils import print_progress
|
|
|
|
HaloParticleMapping = Dict[int, set[int]]
|
|
|
|
|
|
def all_children(df: DataFrame, id: int):
|
|
subhalos: pd.DataFrame = df.loc[df.parent_halo_id == id]
|
|
|
|
if len(subhalos) == 0:
|
|
yield id
|
|
|
|
for sh, value in subhalos.iterrows():
|
|
yield from all_children(df, sh)
|
|
|
|
|
|
def particles_in_halo(
|
|
offsets: Dict[int, int], particle_ids: np.ndarray
|
|
) -> HaloParticleMapping:
|
|
"""
|
|
get mapping from halo ID to particle ID set by using the offset and a lookup in the particle array
|
|
"""
|
|
halo_particle_ids: Dict[int, set[int]] = {}
|
|
pointer = 0
|
|
for halo_id, offset in offsets.items():
|
|
if halo_id % 100 == 0:
|
|
print_progress(halo_id, len(offsets))
|
|
if halo_id != len(offsets):
|
|
end = offsets[halo_id + 1]
|
|
else: # special handling for last halo
|
|
end = -1
|
|
ids = particle_ids[pointer:end]
|
|
halo_particle_ids[halo_id] = set(ids)
|
|
pointer = end
|
|
return halo_particle_ids
|
|
# if recursivly:
|
|
# # add particles of subhalos to parent halos
|
|
# # maybe not such a good idea
|
|
# for halo_id in range(1, len(df) + 1):
|
|
# sub_n_children = list(all_children(df, halo_id)) # IDs of children, subchildren, ...
|
|
# if not sub_n_children:
|
|
# continue
|
|
# for child_id in sub_n_children:
|
|
# child_particles = halo_particle_ids[child_id]
|
|
# halo_particle_ids[halo_id].update(child_particles)
|
|
|
|
|
|
def cached_particles_in_halo(file: Path, *args, **kwargs) -> HaloParticleMapping:
|
|
"""
|
|
Save mapping from halo ID to set of particle IDs into HDF5 file.
|
|
Every halo is a dataset with its ID as the name and the list of particles as value.
|
|
|
|
Unfortunatly this is magnitudes slower than doing the calculation itself as HDF5 is not
|
|
intended for 100K small datasets making this whole function pointless.
|
|
|
|
"""
|
|
if file.exists():
|
|
print("loading from cache")
|
|
with h5py.File(file) as data_file:
|
|
halo_particle_ids: HaloParticleMapping = {}
|
|
i = 0
|
|
for name, dataset in data_file.items():
|
|
halo_particle_ids[int(name)] = set(dataset)
|
|
print_progress(i, len(data_file))
|
|
i += 1
|
|
else:
|
|
halo_particle_ids = particles_in_halo(*args, **kwargs)
|
|
print("saving to cache")
|
|
with h5py.File(file, "w") as data_file:
|
|
for key, valueset in halo_particle_ids.items():
|
|
data_file.create_dataset(
|
|
str(key),
|
|
data=list(valueset),
|
|
compression="gzip",
|
|
compression_opts=5,
|
|
)
|
|
return halo_particle_ids
|
|
|
|
|
|
def read_velo_halos(directory: Path, veloname="vroutput"):
|
|
"""
|
|
Returns a dataframe containing all scalar properties of the halos
|
|
(https://velociraptor-stf.readthedocs.io/en/latest/output.html),
|
|
"""
|
|
if (directory / f"{veloname}.catalog_groups.0").exists():
|
|
suffix = ".0"
|
|
else:
|
|
suffix = ""
|
|
group_catalog = h5py.File(directory / f"{veloname}.catalog_groups{suffix}")
|
|
group_properties = h5py.File(directory / f"{veloname}.properties{suffix}")
|
|
scalar_properties = {}
|
|
for k, v in group_properties.items():
|
|
if not isinstance(v, Dataset):
|
|
# skip groups
|
|
continue
|
|
if len(v.shape) != 1:
|
|
print(v)
|
|
continue
|
|
if len(v) == 1:
|
|
# skip global properties like Total_num_of_groups
|
|
continue
|
|
scalar_properties[k] = v
|
|
scalar_properties["X"] = scalar_properties["Xc"]
|
|
scalar_properties["Y"] = scalar_properties["Yc"]
|
|
scalar_properties["Z"] = scalar_properties["Zc"]
|
|
data = {
|
|
"group_size": group_catalog["Group_Size"],
|
|
"offset": group_catalog["Offset"],
|
|
"offset_unbound": group_catalog["Offset_unbound"],
|
|
"parent_halo_id": group_catalog["Parent_halo_ID"],
|
|
}
|
|
df = pd.DataFrame(
|
|
{**data, **scalar_properties}
|
|
) # create dataframe from two merged dicts
|
|
df.index += 1 # Halo IDs start at 1
|
|
return df
|
|
|
|
|
|
def read_velo_halo_particles(
|
|
directory: Path, skip_halo_particle_ids=False, skip_unbound=True
|
|
) -> Tuple[DataFrame, Optional[HaloParticleMapping], Optional[HaloParticleMapping]]:
|
|
"""
|
|
This reads the output files of VELOCIraptor
|
|
and returns the halo data from read_velo_halos
|
|
and two dictionaries mapping the halo IDs to sets of particle IDs
|
|
"""
|
|
if (directory / f"vroutput.catalog_particles.0").exists():
|
|
suffix = ".0"
|
|
else:
|
|
suffix = ""
|
|
df = read_velo_halos(directory)
|
|
particle_catalog = h5py.File(directory / f"vroutput.catalog_particles{suffix}")
|
|
particle_ids = np.asarray(particle_catalog["Particle_IDs"])
|
|
|
|
particle_catalog_unbound = h5py.File(
|
|
directory / f"vroutput.catalog_particles.unbound{suffix}"
|
|
)
|
|
particle_ids_unbound = particle_catalog_unbound["Particle_IDs"][:]
|
|
if skip_halo_particle_ids:
|
|
return df, None, None
|
|
print("look up bound particle IDs")
|
|
# particle_cache_file = directory / "vrbound.cache.hdf5"
|
|
# ub_particle_cache_file = directory / "vrunbound.cache.hdf5"
|
|
halo_offsets = dict(df["offset"])
|
|
halo_particle_ids = particles_in_halo(halo_offsets, particle_ids)
|
|
if skip_unbound:
|
|
halo_particle_unbound_ids = {}
|
|
else:
|
|
print("look up unbound particle IDs")
|
|
halo_unbound_offsets = dict(df["offset_unbound"])
|
|
halo_particle_unbound_ids = particles_in_halo(
|
|
halo_unbound_offsets, particle_ids_unbound
|
|
)
|
|
return df, halo_particle_ids, halo_particle_unbound_ids
|
|
|
|
|
|
def read_velo_profiles(directory: Path):
|
|
if (directory / f"vroutput.profiles.0").exists():
|
|
suffix = ".0"
|
|
else:
|
|
suffix = ""
|
|
profiles_file = h5py.File(directory / f"vroutput.profiles{suffix}")
|
|
bin_edges = profiles_file["Radial_bin_edges"][1:]
|
|
|
|
bin_volumes = V(bin_edges)
|
|
|
|
mass_profiles = profiles_file["Mass_profile"][:]
|
|
|
|
density_profiles = mass_profiles / bin_volumes
|
|
|
|
return bin_edges, mass_profiles, density_profiles
|
|
|
|
|
|
def main():
|
|
waveform = "shannon"
|
|
Nres = 512
|
|
directory = base_dir / f"{waveform}_{Nres}_100"
|
|
|
|
df_halo, halo_particle_ids, halo_particle_unbound_ids = read_velo_halo_particles(
|
|
directory
|
|
)
|
|
particles, meta = read_file(directory)
|
|
HALO = 1000
|
|
while True:
|
|
fig: Figure = plt.figure()
|
|
ax: Axes = fig.gca()
|
|
|
|
bound_particles = particles.loc[list(halo_particle_ids[HALO])]
|
|
unbound_particles = particles.loc[list(halo_particle_unbound_ids[HALO])]
|
|
|
|
ax.scatter(bound_particles.X, bound_particles.Y, label="bound", s=1)
|
|
ax.scatter(unbound_particles.X, unbound_particles.Y, label="unbound", s=1)
|
|
|
|
plt.show()
|
|
HALO += 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|