优化独立N体仿真的方法
我对并行计算和 Numba 包比较陌生。我正在为我惊人的并行 N 体模拟寻找优化方法。到目前为止,我已经将我所知道的一切应用到 Numpy 数组、JIT 编译器和多处理中。但是,我仍然没有达到我想要的速度(我看过他们的代码仍然更快的视频)
我目前拥有的是一个使用 Runge-Kutta 积分和两个运动方程的相当简单的 python 积分器。我在我的领域经常与数值积分器合作,所以我肯定想从你们那里学到更多技巧。
我已经在下面发布了我的代码,但基本上,我有一个名为“Motion”的主要函数,它需要 2 个初始条件并将它们的运动整合一段时间。我已经 JITTED 这个函数和它迭代调用的所有函数:“RK4”、“ODE”、“电场”。最后,我从 Multiprocessing 调用池函数来并行化“运动”函数,并为其运行的每个模拟插入不同的初始条件。
同样,我已经实现了我所知道的所有类型的优化,但是我对它的速度仍然不是很满意。我已经在下面发布了我的代码。如果有人能发现可以进一步优化的算法,那将非常有帮助和教育意义(至少对我而言)!感谢您的时间。
import numpy as np
import matplotlib.pyplot as plt
from numba import njit, prange
from time import time
from tqdm import tqdm
import multiprocessing as mp
from IPython.display import clear_output
from scipy import interpolate
"Electric Field Information"
A = np.float32(1.00E-04)
N_waves = np.int(19)
frequency = np.linspace(37.5,46.5,N_waves)*1e-3 #Set of frequencies used for Electric Field
m = np.int(20) #Azimuthal Wave Number
sigma = np.float32(0.5) #Gaussian Width of E wave in L
zeta = np.float32(1)
"Particle Information"
N_Particles = np.int(10000)
q = np.float32(-1) #Charge of electron
mass = np.float32(0.511e6) #Mass of Proton eV/c^2
FirstAdiabatic = np.float32(2000e10) #MeV/Gauss Adiabatic Invariant
"Runge-Kutta Paramters"
Total_Time = np.float32(10) #hours
Step_Size = np.float32(0.2) #second
Plot_Time = np.float32(60) #seconds
time_array = np.arange(0, Total_Time*3600+Step_Size, Step_Size) #Convert to seconds and Add End Point
N_points = len(time_array)
Skip_How_Many = int(Plot_Time/Step_Size) #Used to shorten our data set and save RAM
"Constants"
Beq = np.float64(31221.60592e-9) #nT
Re = np.float32(6371e3) #Meters
c = np.float32(2.998e8) #m/s
"Start Electric Field Code"
def wave_peak(omega): #Called once so no need to JIT or Optimize this
L_sample = np.linspace(1,10,100)
phidot = -3*FirstAdiabatic / (q* (L_sample*Re)**2 * np.sqrt(1+ (2*FirstAdiabatic*Beq/ (mass*L_sample**3)) ) )
phidot_to_L = interpolate.interp1d(phidot,L_sample, kind = 'cubic')
L0i = phidot_to_L(omega/m)
return L0i
omega = 2*np.pi*frequency
L0i_wave = wave_peak(omega)
Phi0i_wave = np.linspace(0,2*np.pi,N_waves)
np.random.shuffle(Phi0i_wave)
@njit(nogil= True)
def Electric_Field(t,r):
E0 = A*np.exp(-(r[0]-L0i_wave)**2 / (2*sigma**2))
Delta = np.arctan2( (r[0] * (r[0]-L0i_wave)/sigma**2 - 1), (2*np.pi*r[0]/zeta) )
Er = E0/m * np.sqrt( (2*np.pi*r[0]/zeta)**2 + (r[0]*(r[0]-L0i_wave)/sigma**2 -1)**2 ) * np.cos(m*r[1] - omega*t + Phi0i_wave + 2*np.pi*r[0]/zeta + Delta)
Ephi = E0*np.cos(m*r[1] - omega*t + Phi0i_wave + 2*np.pi*r[0]/zeta)
return np.sum(Er),np.sum(Ephi)
"End of Electric Field Code"
"Particle's ODE - Equation of Motion"
@njit(nogil= True)
def ODE(t,r):
Er, Ephi = Electric_Field(t,r) #Pull out the electric so we only call it once.
Ldot = Ephi * r[0]**3 / (Re*Beq)
Phidot = -Er * r[0]**2 / (Re*Beq) - 3* FirstAdiabatic / (q*r[0]**2*Re**2) * 1/np.sqrt(2*FirstAdiabatic*Beq/ (r[0]**3*mass) + 1)
return np.array([Ldot,Phidot])
@njit(nogil= True)
def RK4(t,r): #Standard Runge-Kutta Integration Algorthim
k1 = Step_Size*ODE(t,r)
k2 = Step_Size*ODE(t+Step_Size/2, r+k1/2)
k3 = Step_Size*ODE(t+Step_Size/2, r+k2)
k4 = Step_Size*ODE(t+Step_Size, r+k3)
return r + k1/6 + k2/3 + k3/3 + k4/6
@njit(nogil= True)
def Motion(L0,Phi0): #Insert Inital Conditions and it will loop through the RK4 integrator and output all its positions.
L_Array = np.zeros_like(time_array)
Phi_Array = np.zeros_like(time_array)
L_Array[0] = L0
Phi_Array[0] = Phi0
for i in range(1,N_points):
L_Array[i], Phi_Array[i] = RK4(time_array[i-1], np.array([ L_Array[i-1],Phi_Array[i-1] ]) )
return L_Array[::Skip_How_Many], Phi_Array[::Skip_How_Many]
#Skip_How_Many is used to take up less RAM space since we don't need that kind of percsion in our data
# Location = Motion(5,0)
# x = Location[0]*np.cos(Location[1])
# y = Location[0]*np.sin(Location[1])
# plt.plot(x,y,"o", markersize = 0.5)
# ts = time()
# Motion(5,0)
# print('Solo Time:', time() - ts)
"Getting my Arrays ready so I can index it"
Split = int(np.sqrt(N_Particles))
L0i = np.linspace(4.4,5.5,Split)
Phi0i = np.linspace(0,360,Split) / 180 * np.pi
L0_Grid = np.repeat(L0i,Split)
# ^Here I want to run a meshgrid of L0i and Phi0, so I repeat L0i using this function and mod (%) the index on the Phi Function
#Create Appending Array
results = []
def get_results(result): #Call back to this array from Multiprocessing to append the results it gives per run.
results.append(result)
clear_output()
print("Getting Results %0.2f" %(len(results)/N_Particles * 100), end='r')
if __name__ == '__main__':
#Call In Multiprocessing
pool = mp.Pool(mp.cpu_count()) #Counting number of threads to start
ts = time() #Timing this process. Begins here
for ii in range(N_Particles): #Not too sure what this does, but it works - I assume it parallelizes this loop
pool.apply_async(Motion, args = (L0_Grid[ii],Phi0i[int(ii%Split)]), callback=get_results)
pool.close() #I'm not too sure what this does but everyone uses it, and it won't work without it
pool.join()
print('Time in MP parallel:', time() - ts) #Output Time
回答
我认为您的代码缓慢的主要原因是您的 Runge-Kutta 方法具有固定的时间步长。Fancy ODE 求解器将选择允许误差量的最大时间步长。一个例子是来自 ODEPACK 的 LSODA ODE 求解器。
下面我使用NumbaLSODA 重写了您的代码。在我的电脑上,它使您的代码速度提高了大约 200 倍。
import numpy as np
import matplotlib.pyplot as plt
from numba import njit, prange
from time import time
from tqdm import tqdm
import multiprocessing as mp
from scipy import interpolate
from NumbaLSODA import lsoda_sig, lsoda
from numba import cfunc
import numba as nb
"Electric Field Information"
A = np.float32(1.00E-04)
N_waves = np.int(19)
frequency = np.linspace(37.5,46.5,N_waves)*1e-3 #Set of frequencies used for Electric Field
m = np.int(20) #Azimuthal Wave Number
sigma = np.float32(0.5) #Gaussian Width of E wave in L
zeta = np.float32(1)
"Particle Information"
N_Particles = np.int(10000)
q = np.float32(-1) #Charge of electron
mass = np.float32(0.511e6) #Mass of Proton eV/c^2
FirstAdiabatic = np.float32(2000e10) #MeV/Gauss Adiabatic Invariant
"Runge-Kutta Paramters"
Total_Time = np.float32(10) #hours
Step_Size = np.float32(0.2) #second
Plot_Time = np.float32(60) #seconds
time_array = np.arange(0, Total_Time*3600+Step_Size, Step_Size) #Convert to seconds and Add End Point
N_points = len(time_array)
Skip_How_Many = int(Plot_Time/Step_Size) #Used to shorten our data set and save RAM
"Constants"
Beq = np.float64(31221.60592e-9) #nT
Re = np.float32(6371e3) #Meters
c = np.float32(2.998e8) #m/s
"Start Electric Field Code"
def wave_peak(omega): #Called once so no need to JIT or Optimize this
L_sample = np.linspace(1,10,100)
phidot = -3*FirstAdiabatic / (q* (L_sample*Re)**2 * np.sqrt(1+ (2*FirstAdiabatic*Beq/ (mass*L_sample**3)) ) )
phidot_to_L = interpolate.interp1d(phidot,L_sample, kind = 'cubic')
L0i = phidot_to_L(omega/m)
return L0i
omega = 2*np.pi*frequency
L0i_wave = wave_peak(omega)
Phi0i_wave = np.linspace(0,2*np.pi,N_waves)
np.random.shuffle(Phi0i_wave)
@njit
def Electric_Field(t,r):
E0 = A*np.exp(-(r[0]-L0i_wave)**2 / (2*sigma**2))
Delta = np.arctan2( (r[0] * (r[0]-L0i_wave)/sigma**2 - 1), (2*np.pi*r[0]/zeta) )
Er = E0/m * np.sqrt( (2*np.pi*r[0]/zeta)**2 + (r[0]*(r[0]-L0i_wave)/sigma**2 -1)**2 ) * np.cos(m*r[1] - omega*t + Phi0i_wave + 2*np.pi*r[0]/zeta + Delta)
Ephi = E0*np.cos(m*r[1] - omega*t + Phi0i_wave + 2*np.pi*r[0]/zeta)
return np.sum(Er),np.sum(Ephi)
"End of Electric Field Code"
"Particle's ODE - Equation of Motion"
@cfunc(lsoda_sig)
def ODE(t, r_, dr, p):
r = nb.carray(r_, (2,))
Er, Ephi = Electric_Field(t,r)
Ldot = Ephi * r[0]**3 / (Re*Beq)
Phidot = -Er * r[0]**2 / (Re*Beq) - 3* FirstAdiabatic / (q*r[0]**2*Re**2) * 1/np.sqrt(2*FirstAdiabatic*Beq/ (r[0]**3*mass) + 1)
dr[0] = Ldot
dr[1] = Phidot
funcptr = ODE.address
@njit
def Motion(L0,Phi0):
u0 = np.array([L0,Phi0],np.float64)
data = np.array([5.0])
usol, success = lsoda(funcptr, u0, time_array, data)
L_Array = usol[:,0]
Phi_Array = usol[:,1]
return L_Array[::Skip_How_Many], Phi_Array[::Skip_How_Many]
#Skip_How_Many is used to take up less RAM space since we don't need that kind of percsion in our data
Location = Motion(5,0)
x = Location[0]*np.cos(Location[1])
y = Location[0]*np.sin(Location[1])
plt.plot(x,y,"o", markersize = 0.5)
ts = time()
Motion(5,0)
print('Solo Time:', time() - ts)