mirror of
https://github.com/Findus23/halo_comparison.git
synced 2024-09-19 16:03:50 +02:00
initial version
This commit is contained in:
commit
ef8046ff86
5 changed files with 255 additions and 0 deletions
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
.idea/
|
||||||
|
*.png
|
||||||
|
*.csv
|
139
main.py
Normal file
139
main.py
Normal file
|
@ -0,0 +1,139 @@
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import h5py
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from matplotlib.axes import Axes
|
||||||
|
from matplotlib.figure import Figure
|
||||||
|
from pandas import DataFrame
|
||||||
|
|
||||||
|
from readfiles import read_file, read_halo_file
|
||||||
|
from remap_particle_IDs import IDScaler
|
||||||
|
|
||||||
|
REFERENCE_RESOLUTION = 128
|
||||||
|
COMPARISON_RESOLUTION = 512
|
||||||
|
PLOT = True
|
||||||
|
SINGLE = False
|
||||||
|
|
||||||
|
reference_dir = Path(f"/home/lukas/monofonic_tests/shannon_{REFERENCE_RESOLUTION}_100")
|
||||||
|
comparison_dir = Path(f"/home/lukas/monofonic_tests/shannon_{COMPARISON_RESOLUTION}_100/")
|
||||||
|
|
||||||
|
|
||||||
|
# comparison_dir = Path(f"/home/lukas/monofonic_tests/DB8_{COMPARISON_RESOLUTION}_100/")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
ref_masses = []
|
||||||
|
comp_masses = []
|
||||||
|
ref_sizes = []
|
||||||
|
comp_sizes = []
|
||||||
|
|
||||||
|
print("reading reference file")
|
||||||
|
df_ref = read_file(reference_dir)
|
||||||
|
df_ref_halo = read_halo_file(reference_dir)
|
||||||
|
|
||||||
|
print("reading comparison file")
|
||||||
|
df_comp = read_file(comparison_dir)
|
||||||
|
df_comp_halo = read_halo_file(comparison_dir)
|
||||||
|
|
||||||
|
bytes_used = df_ref.memory_usage(index=True).sum()
|
||||||
|
print(f"Memory: {bytes_used / 1024 / 1024:.2f} MB")
|
||||||
|
print(df_ref.dtypes)
|
||||||
|
|
||||||
|
for index, original_halo in df_ref_halo[:5].iterrows():
|
||||||
|
print(index)
|
||||||
|
print(len(df_ref))
|
||||||
|
particles_in_ref_halo = df_ref.loc[df_ref["FOFGroupIDs"] == index]
|
||||||
|
ref_halo = df_ref_halo.loc[index]
|
||||||
|
halo_particle_ids = set(particles_in_ref_halo.index.to_list())
|
||||||
|
|
||||||
|
if REFERENCE_RESOLUTION < COMPARISON_RESOLUTION:
|
||||||
|
print("upscaling IDs")
|
||||||
|
upscaled_ids = set()
|
||||||
|
prev_len = len(halo_particle_ids)
|
||||||
|
print(prev_len)
|
||||||
|
scaler = IDScaler(REFERENCE_RESOLUTION, COMPARISON_RESOLUTION)
|
||||||
|
# i = 0
|
||||||
|
for id in halo_particle_ids:
|
||||||
|
# i += 1
|
||||||
|
# if i % 1000 == 0:
|
||||||
|
# print(i)
|
||||||
|
upscaled_ids.update(set(scaler.upscale(id)))
|
||||||
|
halo_particle_ids = upscaled_ids
|
||||||
|
after_len = len(upscaled_ids)
|
||||||
|
print(after_len)
|
||||||
|
print(after_len / prev_len)
|
||||||
|
print("done")
|
||||||
|
if COMPARISON_RESOLUTION < REFERENCE_RESOLUTION:
|
||||||
|
print("downscaling IDs")
|
||||||
|
prev_count = len(halo_particle_ids)
|
||||||
|
print(prev_count)
|
||||||
|
downscaled_ids = set()
|
||||||
|
scaler = IDScaler(COMPARISON_RESOLUTION, REFERENCE_RESOLUTION)
|
||||||
|
for id in halo_particle_ids:
|
||||||
|
downscaled_ids.add(scaler.downscale(id))
|
||||||
|
halo_particle_ids = downscaled_ids
|
||||||
|
print("done")
|
||||||
|
after_count = len(halo_particle_ids)
|
||||||
|
print(after_count)
|
||||||
|
print(prev_count / after_count)
|
||||||
|
|
||||||
|
particles = df_comp.loc[list(halo_particle_ids)]
|
||||||
|
# print(particles)
|
||||||
|
|
||||||
|
halos_in_particles = set(particles["FOFGroupIDs"])
|
||||||
|
halos_in_particles.discard(2147483647)
|
||||||
|
# print(halos_in_particles)
|
||||||
|
if PLOT:
|
||||||
|
fig: Figure = plt.figure()
|
||||||
|
ax: Axes = fig.gca()
|
||||||
|
ax.scatter(particles["X"], particles["Y"], s=1, alpha=.3, label="Halo")
|
||||||
|
# ax.scatter(particles_in_ref_halo["X"], particles_in_ref_halo["Y"], s=1, alpha=.3, label="RefHalo")
|
||||||
|
# plt.legend()
|
||||||
|
# plt.show()
|
||||||
|
best_halo = None
|
||||||
|
best_halo_match = 0
|
||||||
|
|
||||||
|
for halo in halos_in_particles:
|
||||||
|
# print("----------", halo, "----------")
|
||||||
|
print(halo)
|
||||||
|
halo_data = df_comp_halo.loc[halo]
|
||||||
|
particles_in_comp_halo: DataFrame = df_comp.loc[df_comp["FOFGroupIDs"] == halo]
|
||||||
|
halo_size = len(particles_in_comp_halo)
|
||||||
|
|
||||||
|
df = particles_in_comp_halo.join(particles, how="inner", rsuffix="ref")
|
||||||
|
shared_size = len(df)
|
||||||
|
match = shared_size / halo_size
|
||||||
|
# print(match, halo_size, shared_size)
|
||||||
|
# print(df)
|
||||||
|
if PLOT:
|
||||||
|
ax.scatter(df["X"], df["Y"], s=1, alpha=.3, label=f"shared {halo}")
|
||||||
|
# ax.scatter(particles_in_comp_halo["X"], particles_in_comp_halo["Y"], s=2, alpha=.3, label=f"shared {halo}")
|
||||||
|
if shared_size > best_halo_match:
|
||||||
|
best_halo_match = shared_size
|
||||||
|
best_halo = halo
|
||||||
|
|
||||||
|
# print("-------")
|
||||||
|
# print(best_halo)
|
||||||
|
comp_halo = df_comp_halo.loc[best_halo]
|
||||||
|
|
||||||
|
print(ref_halo)
|
||||||
|
print(comp_halo)
|
||||||
|
ref_sizes.append(ref_halo["Sizes"])
|
||||||
|
ref_masses.append(ref_halo["Masses"])
|
||||||
|
comp_sizes.append(comp_halo["Sizes"])
|
||||||
|
comp_masses.append(comp_halo["Masses"])
|
||||||
|
# exit()
|
||||||
|
if PLOT:
|
||||||
|
ax.legend()
|
||||||
|
ax.set_title(f"{reference_dir.name} vs. {comparison_dir.name} (Halo {index})")
|
||||||
|
fig.savefig("out.png", dpi=300)
|
||||||
|
plt.show()
|
||||||
|
if SINGLE:
|
||||||
|
break
|
||||||
|
|
||||||
|
df = DataFrame(np.array([ref_sizes, comp_sizes, ref_masses, comp_masses]).T,
|
||||||
|
columns=["ref_sizes", "comp_sizes", "ref_masses", "comp_masses"])
|
||||||
|
print(df)
|
||||||
|
df.to_csv("sizes.csv", index=False)
|
41
readfiles.py
Normal file
41
readfiles.py
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import h5py
|
||||||
|
import pandas as pd
|
||||||
|
from pandas import DataFrame
|
||||||
|
|
||||||
|
|
||||||
|
def read_file(path: Path) -> pd.DataFrame:
|
||||||
|
cache_file = path / "cache"
|
||||||
|
if not cache_file.exists():
|
||||||
|
file = path / "output_0004.hdf5"
|
||||||
|
reference_file = h5py.File(file)
|
||||||
|
df = pd.DataFrame(reference_file["PartType1"]["Coordinates"], columns=["X", "Y", "Z"])
|
||||||
|
df2 = pd.DataFrame(reference_file["PartType1"]["FOFGroupIDs"], columns=["FOFGroupIDs"]).astype("category")
|
||||||
|
df = df.merge(df2, "outer", left_index=True, right_index=True)
|
||||||
|
del df2
|
||||||
|
df3 = pd.DataFrame(reference_file["PartType1"]["ParticleIDs"], columns=["ParticleIDs"])
|
||||||
|
|
||||||
|
df = df.merge(df3, "outer", left_index=True, right_index=True)
|
||||||
|
del df3
|
||||||
|
df.set_index("ParticleIDs", inplace=True)
|
||||||
|
print("saving cache")
|
||||||
|
df.to_pickle(str(cache_file))
|
||||||
|
return df
|
||||||
|
print("from cache")
|
||||||
|
df = pd.read_pickle(str(cache_file))
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def read_halo_file(path: Path) -> DataFrame:
|
||||||
|
file = path / "fof_output_0004.hdf5"
|
||||||
|
reference_file = h5py.File(file)
|
||||||
|
df1 = pd.DataFrame(reference_file["Groups"]["Centres"], columns=["X", "Y", "Z"])
|
||||||
|
df2 = pd.DataFrame(reference_file["Groups"]["GroupIDs"], columns=["GroupIDs"])
|
||||||
|
df3 = pd.DataFrame(reference_file["Groups"]["Masses"], columns=["Masses"])
|
||||||
|
df4 = pd.DataFrame(reference_file["Groups"]["Sizes"], columns=["Sizes"])
|
||||||
|
df = df1.merge(df2, "outer", left_index=True, right_index=True)
|
||||||
|
df = df.merge(df3, "outer", left_index=True, right_index=True)
|
||||||
|
df = df.merge(df4, "outer", left_index=True, right_index=True)
|
||||||
|
df.set_index("GroupIDs", inplace=True)
|
||||||
|
return df
|
53
remap_particle_IDs.py
Normal file
53
remap_particle_IDs.py
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
from itertools import product
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
class IDScaler:
|
||||||
|
def __init__(self, Nres_min: int, Nres_max: int):
|
||||||
|
assert Nres_max % Nres_min == 0
|
||||||
|
self.Nres_min = Nres_min
|
||||||
|
self.Nres_max = Nres_max
|
||||||
|
self.N = Nres_max // Nres_min
|
||||||
|
self.shifts = []
|
||||||
|
for shift in product(range(self.N), repeat=3):
|
||||||
|
self.shifts.append(np.array(shift))
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def original_position(Nres: int, particle_ID: int):
|
||||||
|
particle_k = particle_ID % Nres
|
||||||
|
particle_j = ((particle_ID - particle_k) // Nres) % Nres
|
||||||
|
particle_i = ((particle_ID - particle_k) // Nres - particle_j) // Nres
|
||||||
|
return np.array([particle_i, particle_j, particle_k])
|
||||||
|
|
||||||
|
def upscale(self, particle_ID: int):
|
||||||
|
orig = self.original_position(self.Nres_min, particle_ID)
|
||||||
|
mult = orig * self.N
|
||||||
|
for shift in self.shifts:
|
||||||
|
variant = mult + shift
|
||||||
|
yield ((variant[0] * self.Nres_max) + variant[1]) * self.Nres_max + variant[2]
|
||||||
|
|
||||||
|
def downscale(self, particle_ID: int):
|
||||||
|
orig = self.original_position(self.Nres_max, particle_ID)
|
||||||
|
mult = np.floor_divide(orig, self.N)
|
||||||
|
return ((mult[0] * self.Nres_min) + mult[1]) * self.Nres_min + mult[2]
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_particle = np.array([0, 0, 127])
|
||||||
|
# maximum_test = np.array([127, 127, 127]) #this works, Nres - 1 is the maximum for (i,j,k)
|
||||||
|
|
||||||
|
Nres_1 = 128
|
||||||
|
Nres_2 = 256
|
||||||
|
|
||||||
|
test_particle_id = ((test_particle[0] * Nres_1) + test_particle[1]) * Nres_1 + test_particle[2]
|
||||||
|
print(test_particle_id)
|
||||||
|
|
||||||
|
scaler = IDScaler(Nres_1, Nres_2)
|
||||||
|
|
||||||
|
particle_ID_1_converted = scaler.upscale(test_particle_id)
|
||||||
|
|
||||||
|
for id in particle_ID_1_converted:
|
||||||
|
reverse = scaler.downscale(id)
|
||||||
|
print(id, reverse)
|
||||||
|
assert reverse == test_particle_id
|
19
sizes.py
Normal file
19
sizes.py
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
import pandas as pd
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
from matplotlib.axes import Axes
|
||||||
|
from matplotlib.figure import Figure
|
||||||
|
|
||||||
|
df = pd.read_csv("sizes.csv")
|
||||||
|
|
||||||
|
print(df)
|
||||||
|
|
||||||
|
df = df.iloc[:50]
|
||||||
|
|
||||||
|
fig: Figure = plt.figure()
|
||||||
|
ax: Axes = fig.gca()
|
||||||
|
# ax.scatter(df["ref_sizes"], df["comp_sizes"], s=1, alpha=.3)
|
||||||
|
ax.scatter(df["ref_masses"], df["comp_masses"], s=3)
|
||||||
|
|
||||||
|
ax.set_xscale("log")
|
||||||
|
ax.set_yscale("log")
|
||||||
|
plt.show()
|
Loading…
Reference in a new issue