1
0
Fork 0
mirror of https://github.com/Findus23/halo_comparison.git synced 2024-09-19 16:03:50 +02:00

initial version

This commit is contained in:
Lukas Winkler 2022-05-04 13:42:57 +02:00
commit ef8046ff86
5 changed files with 255 additions and 0 deletions

3
.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
.idea/
*.png
*.csv

139
main.py Normal file
View file

@ -0,0 +1,139 @@
from pathlib import Path
import h5py
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.axes import Axes
from matplotlib.figure import Figure
from pandas import DataFrame
from readfiles import read_file, read_halo_file
from remap_particle_IDs import IDScaler
REFERENCE_RESOLUTION = 128
COMPARISON_RESOLUTION = 512
PLOT = True
SINGLE = False
reference_dir = Path(f"/home/lukas/monofonic_tests/shannon_{REFERENCE_RESOLUTION}_100")
comparison_dir = Path(f"/home/lukas/monofonic_tests/shannon_{COMPARISON_RESOLUTION}_100/")
# comparison_dir = Path(f"/home/lukas/monofonic_tests/DB8_{COMPARISON_RESOLUTION}_100/")
ref_masses = []
comp_masses = []
ref_sizes = []
comp_sizes = []
print("reading reference file")
df_ref = read_file(reference_dir)
df_ref_halo = read_halo_file(reference_dir)
print("reading comparison file")
df_comp = read_file(comparison_dir)
df_comp_halo = read_halo_file(comparison_dir)
bytes_used = df_ref.memory_usage(index=True).sum()
print(f"Memory: {bytes_used / 1024 / 1024:.2f} MB")
print(df_ref.dtypes)
for index, original_halo in df_ref_halo[:5].iterrows():
print(index)
print(len(df_ref))
particles_in_ref_halo = df_ref.loc[df_ref["FOFGroupIDs"] == index]
ref_halo = df_ref_halo.loc[index]
halo_particle_ids = set(particles_in_ref_halo.index.to_list())
if REFERENCE_RESOLUTION < COMPARISON_RESOLUTION:
print("upscaling IDs")
upscaled_ids = set()
prev_len = len(halo_particle_ids)
print(prev_len)
scaler = IDScaler(REFERENCE_RESOLUTION, COMPARISON_RESOLUTION)
# i = 0
for id in halo_particle_ids:
# i += 1
# if i % 1000 == 0:
# print(i)
upscaled_ids.update(set(scaler.upscale(id)))
halo_particle_ids = upscaled_ids
after_len = len(upscaled_ids)
print(after_len)
print(after_len / prev_len)
print("done")
if COMPARISON_RESOLUTION < REFERENCE_RESOLUTION:
print("downscaling IDs")
prev_count = len(halo_particle_ids)
print(prev_count)
downscaled_ids = set()
scaler = IDScaler(COMPARISON_RESOLUTION, REFERENCE_RESOLUTION)
for id in halo_particle_ids:
downscaled_ids.add(scaler.downscale(id))
halo_particle_ids = downscaled_ids
print("done")
after_count = len(halo_particle_ids)
print(after_count)
print(prev_count / after_count)
particles = df_comp.loc[list(halo_particle_ids)]
# print(particles)
halos_in_particles = set(particles["FOFGroupIDs"])
halos_in_particles.discard(2147483647)
# print(halos_in_particles)
if PLOT:
fig: Figure = plt.figure()
ax: Axes = fig.gca()
ax.scatter(particles["X"], particles["Y"], s=1, alpha=.3, label="Halo")
# ax.scatter(particles_in_ref_halo["X"], particles_in_ref_halo["Y"], s=1, alpha=.3, label="RefHalo")
# plt.legend()
# plt.show()
best_halo = None
best_halo_match = 0
for halo in halos_in_particles:
# print("----------", halo, "----------")
print(halo)
halo_data = df_comp_halo.loc[halo]
particles_in_comp_halo: DataFrame = df_comp.loc[df_comp["FOFGroupIDs"] == halo]
halo_size = len(particles_in_comp_halo)
df = particles_in_comp_halo.join(particles, how="inner", rsuffix="ref")
shared_size = len(df)
match = shared_size / halo_size
# print(match, halo_size, shared_size)
# print(df)
if PLOT:
ax.scatter(df["X"], df["Y"], s=1, alpha=.3, label=f"shared {halo}")
# ax.scatter(particles_in_comp_halo["X"], particles_in_comp_halo["Y"], s=2, alpha=.3, label=f"shared {halo}")
if shared_size > best_halo_match:
best_halo_match = shared_size
best_halo = halo
# print("-------")
# print(best_halo)
comp_halo = df_comp_halo.loc[best_halo]
print(ref_halo)
print(comp_halo)
ref_sizes.append(ref_halo["Sizes"])
ref_masses.append(ref_halo["Masses"])
comp_sizes.append(comp_halo["Sizes"])
comp_masses.append(comp_halo["Masses"])
# exit()
if PLOT:
ax.legend()
ax.set_title(f"{reference_dir.name} vs. {comparison_dir.name} (Halo {index})")
fig.savefig("out.png", dpi=300)
plt.show()
if SINGLE:
break
df = DataFrame(np.array([ref_sizes, comp_sizes, ref_masses, comp_masses]).T,
columns=["ref_sizes", "comp_sizes", "ref_masses", "comp_masses"])
print(df)
df.to_csv("sizes.csv", index=False)

41
readfiles.py Normal file
View file

@ -0,0 +1,41 @@
from pathlib import Path
import h5py
import pandas as pd
from pandas import DataFrame
def read_file(path: Path) -> pd.DataFrame:
cache_file = path / "cache"
if not cache_file.exists():
file = path / "output_0004.hdf5"
reference_file = h5py.File(file)
df = pd.DataFrame(reference_file["PartType1"]["Coordinates"], columns=["X", "Y", "Z"])
df2 = pd.DataFrame(reference_file["PartType1"]["FOFGroupIDs"], columns=["FOFGroupIDs"]).astype("category")
df = df.merge(df2, "outer", left_index=True, right_index=True)
del df2
df3 = pd.DataFrame(reference_file["PartType1"]["ParticleIDs"], columns=["ParticleIDs"])
df = df.merge(df3, "outer", left_index=True, right_index=True)
del df3
df.set_index("ParticleIDs", inplace=True)
print("saving cache")
df.to_pickle(str(cache_file))
return df
print("from cache")
df = pd.read_pickle(str(cache_file))
return df
def read_halo_file(path: Path) -> DataFrame:
file = path / "fof_output_0004.hdf5"
reference_file = h5py.File(file)
df1 = pd.DataFrame(reference_file["Groups"]["Centres"], columns=["X", "Y", "Z"])
df2 = pd.DataFrame(reference_file["Groups"]["GroupIDs"], columns=["GroupIDs"])
df3 = pd.DataFrame(reference_file["Groups"]["Masses"], columns=["Masses"])
df4 = pd.DataFrame(reference_file["Groups"]["Sizes"], columns=["Sizes"])
df = df1.merge(df2, "outer", left_index=True, right_index=True)
df = df.merge(df3, "outer", left_index=True, right_index=True)
df = df.merge(df4, "outer", left_index=True, right_index=True)
df.set_index("GroupIDs", inplace=True)
return df

53
remap_particle_IDs.py Normal file
View file

@ -0,0 +1,53 @@
from itertools import product
import numpy as np
class IDScaler:
def __init__(self, Nres_min: int, Nres_max: int):
assert Nres_max % Nres_min == 0
self.Nres_min = Nres_min
self.Nres_max = Nres_max
self.N = Nres_max // Nres_min
self.shifts = []
for shift in product(range(self.N), repeat=3):
self.shifts.append(np.array(shift))
@staticmethod
def original_position(Nres: int, particle_ID: int):
particle_k = particle_ID % Nres
particle_j = ((particle_ID - particle_k) // Nres) % Nres
particle_i = ((particle_ID - particle_k) // Nres - particle_j) // Nres
return np.array([particle_i, particle_j, particle_k])
def upscale(self, particle_ID: int):
orig = self.original_position(self.Nres_min, particle_ID)
mult = orig * self.N
for shift in self.shifts:
variant = mult + shift
yield ((variant[0] * self.Nres_max) + variant[1]) * self.Nres_max + variant[2]
def downscale(self, particle_ID: int):
orig = self.original_position(self.Nres_max, particle_ID)
mult = np.floor_divide(orig, self.N)
return ((mult[0] * self.Nres_min) + mult[1]) * self.Nres_min + mult[2]
if __name__ == "__main__":
test_particle = np.array([0, 0, 127])
# maximum_test = np.array([127, 127, 127]) #this works, Nres - 1 is the maximum for (i,j,k)
Nres_1 = 128
Nres_2 = 256
test_particle_id = ((test_particle[0] * Nres_1) + test_particle[1]) * Nres_1 + test_particle[2]
print(test_particle_id)
scaler = IDScaler(Nres_1, Nres_2)
particle_ID_1_converted = scaler.upscale(test_particle_id)
for id in particle_ID_1_converted:
reverse = scaler.downscale(id)
print(id, reverse)
assert reverse == test_particle_id

19
sizes.py Normal file
View file

@ -0,0 +1,19 @@
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib.axes import Axes
from matplotlib.figure import Figure
df = pd.read_csv("sizes.csv")
print(df)
df = df.iloc[:50]
fig: Figure = plt.figure()
ax: Axes = fig.gca()
# ax.scatter(df["ref_sizes"], df["comp_sizes"], s=1, alpha=.3)
ax.scatter(df["ref_masses"], df["comp_masses"], s=3)
ax.set_xscale("log")
ax.set_yscale("log")
plt.show()