mirror of
https://github.com/Findus23/halo_comparison.git
synced 2024-09-19 16:03:50 +02:00
initial version
This commit is contained in:
commit
ef8046ff86
5 changed files with 255 additions and 0 deletions
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
.idea/
|
||||
*.png
|
||||
*.csv
|
139
main.py
Normal file
139
main.py
Normal file
|
@ -0,0 +1,139 @@
|
|||
from pathlib import Path
|
||||
|
||||
import h5py
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from matplotlib.axes import Axes
|
||||
from matplotlib.figure import Figure
|
||||
from pandas import DataFrame
|
||||
|
||||
from readfiles import read_file, read_halo_file
|
||||
from remap_particle_IDs import IDScaler
|
||||
|
||||
REFERENCE_RESOLUTION = 128
|
||||
COMPARISON_RESOLUTION = 512
|
||||
PLOT = True
|
||||
SINGLE = False
|
||||
|
||||
reference_dir = Path(f"/home/lukas/monofonic_tests/shannon_{REFERENCE_RESOLUTION}_100")
|
||||
comparison_dir = Path(f"/home/lukas/monofonic_tests/shannon_{COMPARISON_RESOLUTION}_100/")
|
||||
|
||||
|
||||
# comparison_dir = Path(f"/home/lukas/monofonic_tests/DB8_{COMPARISON_RESOLUTION}_100/")
|
||||
|
||||
|
||||
|
||||
ref_masses = []
|
||||
comp_masses = []
|
||||
ref_sizes = []
|
||||
comp_sizes = []
|
||||
|
||||
print("reading reference file")
|
||||
df_ref = read_file(reference_dir)
|
||||
df_ref_halo = read_halo_file(reference_dir)
|
||||
|
||||
print("reading comparison file")
|
||||
df_comp = read_file(comparison_dir)
|
||||
df_comp_halo = read_halo_file(comparison_dir)
|
||||
|
||||
bytes_used = df_ref.memory_usage(index=True).sum()
|
||||
print(f"Memory: {bytes_used / 1024 / 1024:.2f} MB")
|
||||
print(df_ref.dtypes)
|
||||
|
||||
for index, original_halo in df_ref_halo[:5].iterrows():
|
||||
print(index)
|
||||
print(len(df_ref))
|
||||
particles_in_ref_halo = df_ref.loc[df_ref["FOFGroupIDs"] == index]
|
||||
ref_halo = df_ref_halo.loc[index]
|
||||
halo_particle_ids = set(particles_in_ref_halo.index.to_list())
|
||||
|
||||
if REFERENCE_RESOLUTION < COMPARISON_RESOLUTION:
|
||||
print("upscaling IDs")
|
||||
upscaled_ids = set()
|
||||
prev_len = len(halo_particle_ids)
|
||||
print(prev_len)
|
||||
scaler = IDScaler(REFERENCE_RESOLUTION, COMPARISON_RESOLUTION)
|
||||
# i = 0
|
||||
for id in halo_particle_ids:
|
||||
# i += 1
|
||||
# if i % 1000 == 0:
|
||||
# print(i)
|
||||
upscaled_ids.update(set(scaler.upscale(id)))
|
||||
halo_particle_ids = upscaled_ids
|
||||
after_len = len(upscaled_ids)
|
||||
print(after_len)
|
||||
print(after_len / prev_len)
|
||||
print("done")
|
||||
if COMPARISON_RESOLUTION < REFERENCE_RESOLUTION:
|
||||
print("downscaling IDs")
|
||||
prev_count = len(halo_particle_ids)
|
||||
print(prev_count)
|
||||
downscaled_ids = set()
|
||||
scaler = IDScaler(COMPARISON_RESOLUTION, REFERENCE_RESOLUTION)
|
||||
for id in halo_particle_ids:
|
||||
downscaled_ids.add(scaler.downscale(id))
|
||||
halo_particle_ids = downscaled_ids
|
||||
print("done")
|
||||
after_count = len(halo_particle_ids)
|
||||
print(after_count)
|
||||
print(prev_count / after_count)
|
||||
|
||||
particles = df_comp.loc[list(halo_particle_ids)]
|
||||
# print(particles)
|
||||
|
||||
halos_in_particles = set(particles["FOFGroupIDs"])
|
||||
halos_in_particles.discard(2147483647)
|
||||
# print(halos_in_particles)
|
||||
if PLOT:
|
||||
fig: Figure = plt.figure()
|
||||
ax: Axes = fig.gca()
|
||||
ax.scatter(particles["X"], particles["Y"], s=1, alpha=.3, label="Halo")
|
||||
# ax.scatter(particles_in_ref_halo["X"], particles_in_ref_halo["Y"], s=1, alpha=.3, label="RefHalo")
|
||||
# plt.legend()
|
||||
# plt.show()
|
||||
best_halo = None
|
||||
best_halo_match = 0
|
||||
|
||||
for halo in halos_in_particles:
|
||||
# print("----------", halo, "----------")
|
||||
print(halo)
|
||||
halo_data = df_comp_halo.loc[halo]
|
||||
particles_in_comp_halo: DataFrame = df_comp.loc[df_comp["FOFGroupIDs"] == halo]
|
||||
halo_size = len(particles_in_comp_halo)
|
||||
|
||||
df = particles_in_comp_halo.join(particles, how="inner", rsuffix="ref")
|
||||
shared_size = len(df)
|
||||
match = shared_size / halo_size
|
||||
# print(match, halo_size, shared_size)
|
||||
# print(df)
|
||||
if PLOT:
|
||||
ax.scatter(df["X"], df["Y"], s=1, alpha=.3, label=f"shared {halo}")
|
||||
# ax.scatter(particles_in_comp_halo["X"], particles_in_comp_halo["Y"], s=2, alpha=.3, label=f"shared {halo}")
|
||||
if shared_size > best_halo_match:
|
||||
best_halo_match = shared_size
|
||||
best_halo = halo
|
||||
|
||||
# print("-------")
|
||||
# print(best_halo)
|
||||
comp_halo = df_comp_halo.loc[best_halo]
|
||||
|
||||
print(ref_halo)
|
||||
print(comp_halo)
|
||||
ref_sizes.append(ref_halo["Sizes"])
|
||||
ref_masses.append(ref_halo["Masses"])
|
||||
comp_sizes.append(comp_halo["Sizes"])
|
||||
comp_masses.append(comp_halo["Masses"])
|
||||
# exit()
|
||||
if PLOT:
|
||||
ax.legend()
|
||||
ax.set_title(f"{reference_dir.name} vs. {comparison_dir.name} (Halo {index})")
|
||||
fig.savefig("out.png", dpi=300)
|
||||
plt.show()
|
||||
if SINGLE:
|
||||
break
|
||||
|
||||
df = DataFrame(np.array([ref_sizes, comp_sizes, ref_masses, comp_masses]).T,
|
||||
columns=["ref_sizes", "comp_sizes", "ref_masses", "comp_masses"])
|
||||
print(df)
|
||||
df.to_csv("sizes.csv", index=False)
|
41
readfiles.py
Normal file
41
readfiles.py
Normal file
|
@ -0,0 +1,41 @@
|
|||
from pathlib import Path
|
||||
|
||||
import h5py
|
||||
import pandas as pd
|
||||
from pandas import DataFrame
|
||||
|
||||
|
||||
def read_file(path: Path) -> pd.DataFrame:
|
||||
cache_file = path / "cache"
|
||||
if not cache_file.exists():
|
||||
file = path / "output_0004.hdf5"
|
||||
reference_file = h5py.File(file)
|
||||
df = pd.DataFrame(reference_file["PartType1"]["Coordinates"], columns=["X", "Y", "Z"])
|
||||
df2 = pd.DataFrame(reference_file["PartType1"]["FOFGroupIDs"], columns=["FOFGroupIDs"]).astype("category")
|
||||
df = df.merge(df2, "outer", left_index=True, right_index=True)
|
||||
del df2
|
||||
df3 = pd.DataFrame(reference_file["PartType1"]["ParticleIDs"], columns=["ParticleIDs"])
|
||||
|
||||
df = df.merge(df3, "outer", left_index=True, right_index=True)
|
||||
del df3
|
||||
df.set_index("ParticleIDs", inplace=True)
|
||||
print("saving cache")
|
||||
df.to_pickle(str(cache_file))
|
||||
return df
|
||||
print("from cache")
|
||||
df = pd.read_pickle(str(cache_file))
|
||||
return df
|
||||
|
||||
|
||||
def read_halo_file(path: Path) -> DataFrame:
|
||||
file = path / "fof_output_0004.hdf5"
|
||||
reference_file = h5py.File(file)
|
||||
df1 = pd.DataFrame(reference_file["Groups"]["Centres"], columns=["X", "Y", "Z"])
|
||||
df2 = pd.DataFrame(reference_file["Groups"]["GroupIDs"], columns=["GroupIDs"])
|
||||
df3 = pd.DataFrame(reference_file["Groups"]["Masses"], columns=["Masses"])
|
||||
df4 = pd.DataFrame(reference_file["Groups"]["Sizes"], columns=["Sizes"])
|
||||
df = df1.merge(df2, "outer", left_index=True, right_index=True)
|
||||
df = df.merge(df3, "outer", left_index=True, right_index=True)
|
||||
df = df.merge(df4, "outer", left_index=True, right_index=True)
|
||||
df.set_index("GroupIDs", inplace=True)
|
||||
return df
|
53
remap_particle_IDs.py
Normal file
53
remap_particle_IDs.py
Normal file
|
@ -0,0 +1,53 @@
|
|||
from itertools import product
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
class IDScaler:
|
||||
def __init__(self, Nres_min: int, Nres_max: int):
|
||||
assert Nres_max % Nres_min == 0
|
||||
self.Nres_min = Nres_min
|
||||
self.Nres_max = Nres_max
|
||||
self.N = Nres_max // Nres_min
|
||||
self.shifts = []
|
||||
for shift in product(range(self.N), repeat=3):
|
||||
self.shifts.append(np.array(shift))
|
||||
|
||||
@staticmethod
|
||||
def original_position(Nres: int, particle_ID: int):
|
||||
particle_k = particle_ID % Nres
|
||||
particle_j = ((particle_ID - particle_k) // Nres) % Nres
|
||||
particle_i = ((particle_ID - particle_k) // Nres - particle_j) // Nres
|
||||
return np.array([particle_i, particle_j, particle_k])
|
||||
|
||||
def upscale(self, particle_ID: int):
|
||||
orig = self.original_position(self.Nres_min, particle_ID)
|
||||
mult = orig * self.N
|
||||
for shift in self.shifts:
|
||||
variant = mult + shift
|
||||
yield ((variant[0] * self.Nres_max) + variant[1]) * self.Nres_max + variant[2]
|
||||
|
||||
def downscale(self, particle_ID: int):
|
||||
orig = self.original_position(self.Nres_max, particle_ID)
|
||||
mult = np.floor_divide(orig, self.N)
|
||||
return ((mult[0] * self.Nres_min) + mult[1]) * self.Nres_min + mult[2]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_particle = np.array([0, 0, 127])
|
||||
# maximum_test = np.array([127, 127, 127]) #this works, Nres - 1 is the maximum for (i,j,k)
|
||||
|
||||
Nres_1 = 128
|
||||
Nres_2 = 256
|
||||
|
||||
test_particle_id = ((test_particle[0] * Nres_1) + test_particle[1]) * Nres_1 + test_particle[2]
|
||||
print(test_particle_id)
|
||||
|
||||
scaler = IDScaler(Nres_1, Nres_2)
|
||||
|
||||
particle_ID_1_converted = scaler.upscale(test_particle_id)
|
||||
|
||||
for id in particle_ID_1_converted:
|
||||
reverse = scaler.downscale(id)
|
||||
print(id, reverse)
|
||||
assert reverse == test_particle_id
|
19
sizes.py
Normal file
19
sizes.py
Normal file
|
@ -0,0 +1,19 @@
|
|||
import pandas as pd
|
||||
from matplotlib import pyplot as plt
|
||||
from matplotlib.axes import Axes
|
||||
from matplotlib.figure import Figure
|
||||
|
||||
df = pd.read_csv("sizes.csv")
|
||||
|
||||
print(df)
|
||||
|
||||
df = df.iloc[:50]
|
||||
|
||||
fig: Figure = plt.figure()
|
||||
ax: Axes = fig.gca()
|
||||
# ax.scatter(df["ref_sizes"], df["comp_sizes"], s=1, alpha=.3)
|
||||
ax.scatter(df["ref_masses"], df["comp_masses"], s=3)
|
||||
|
||||
ax.set_xscale("log")
|
||||
ax.set_yscale("log")
|
||||
plt.show()
|
Loading…
Reference in a new issue