Skip to content

Commit 7186a98

Browse files
John DennisJohn Dennis
John Dennis
authored and
John Dennis
committed
Performance optimization: types variables, created functions.
1 parent 4ffa2ab commit 7186a98

File tree

1 file changed

+203
-82
lines changed

1 file changed

+203
-82
lines changed

swm_julia/swm_julia.jl

+203-82
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,25 @@
1+
using Profile
12
using LinearAlgebra
3+
using LoopVectorization
4+
5+
const M::Int = 256
6+
const N::Int = 256
7+
const M_LEN::Int = M + 1
8+
const N_LEN::Int = N + 1
9+
const ITMAX::Int = 40
10+
11+
const dt::Float64 = 90.::Float64
12+
const dx::Float64 = 100000.::Float64
13+
const dy::Float64 = 100000.::Float64
14+
const fsdx::Float64 = 4.::Float64 / (dx)
15+
const fsdy::Float64 = 4.::Float64 / (dy)
16+
const a::Float64 = 1000000.::Float64
17+
const alpha::Float64 = 0.001::Float64
18+
L_OUT = false
219

3-
M = 32
4-
N = 32
5-
M_LEN = M + 1
6-
N_LEN = N + 1
7-
ITMAX = 1000
8-
dt = 90.
9-
dt = dt
10-
dx = 100000.
11-
dy = 100000.
12-
fsdx = 4. / (dx)
13-
fsdy = 4. / (dy)
14-
a = 1000000.
15-
alpha = 0.001
16-
L_OUT = true
1720

1821
function initialize(u, v, p, M, N, dx, dy, a)
19-
pi = 4. * atan(1.)
22+
pi = 4.::Float64 * atan(1.)
2023
tpi = 2. * pi
2124
d_i = tpi / M
2225
d_j = tpi / N
@@ -27,13 +30,12 @@ function initialize(u, v, p, M, N, dx, dy, a)
2730
N_LEN = N + 1
2831

2932
psi = zeros(Float64, M_LEN, N_LEN)
30-
global p, u, v
3133

3234
for i in 0:(M_LEN-1)
3335
for j in 0:(N_LEN-1)
3436
idx = i * N_LEN + j + 1
3537
psi[idx] = a * sin((i + 0.5) * d_i) * sin((j + 0.5) * d_j)
36-
p[idx] = pcf * (cos(2.0 * i * d_i) + cos(2.0 * j * d_j)) + 50000.0
38+
p[idx] = pcf * (cos(2.0 * i * d_i) + cos(2.0 * j * d_j)) + 50000.0::Float64
3739
end
3840
end
3941

@@ -65,9 +67,9 @@ global p = zeros(Float64, M_LEN, N_LEN)
6567
u, v, p = initialize(u, v, p, M, N, dx, dy, a)
6668
end
6769

68-
global uold = copy(u)
69-
global vold = copy(v)
70-
global pold = copy(p)
70+
uold = u
71+
vold = v
72+
pold = p
7173

7274
if L_OUT
7375
println(" Number of points in the x direction: ", M)
@@ -81,93 +83,212 @@ if L_OUT
8183
println(" Initial v:\n", diag(v)[1:end-1])
8284
end
8385

84-
global tdt = dt
86+
function turbo_dot(x::AbstractVector{Float64}, A::AbstractMatrix{Float64}, y::AbstractVector{Float64})
87+
s = 0.0;
88+
@turbo for j in eachindex(y), i in eachindex(x)
89+
s += x[i]*A[i,j]*y[j];
90+
end
91+
return s;
92+
end
93+
94+
function calc_cu!(p::Array{Float64},u::Array{Float64},cu::Array{Float64})
95+
96+
# cu[2:end, 1:end-1] = ((p[2:end, 1:end-1] .+ p[1:end-1,1:end-1]) .* u[2:end,1:end-1]) ./ 2.0::Float64
97+
@turbo for j = 1:N,i = 2:M+1
98+
@inbounds cu[i,j] = ((p[i,j] + p[i-1,j]) * u[i,j]) / 2.0
99+
end
100+
return
101+
end
102+
function calc_cv!(p::Array{Float64},v::Array{Float64},cv::Array{Float64})
103+
104+
# cv[1:end-1, 2:end] = ((p[1:end-1, 2:end] .+ p[1:end-1,1:end-1]) .* v[1:end-1,2:end]) ./ 2.0::Float64
105+
@turbo for j = 2:N+1,i = 1:M
106+
@inbounds cv[i,j] = ((p[i,j] + p[i,j-1]) * v[i,j]) / 2.0
107+
end
108+
return
109+
end
110+
function calc_z!(v::Array{Float64},u::Array{Float64},p::Array{Float64},z::Array{Float64},fsdx::Float64,fsdy::Float64)
111+
# z[2:end, 2:end] = (fsdx .* (v[2:end, 2:end] .- v[1:end-1, 2:end]) .-
112+
# fsdy .* (u[2:end, 2:end] .- u[2:end, 1:end-1])) ./
113+
# (p[1:end-1,1:end-1] .+ p[2:end,1:end-1] .+
114+
# p[2:end,2:end] .+ p[1:end-1,2:end])
115+
@turbo for j=2:N,i=2:M
116+
@inbounds z[i,j] = (fsdx * (v[i,j] - v[i-1,j]) - fsdy * (u[i,j]-u[i,j-1]))/
117+
(p[i-1,j-1] + p[i,j-1] + p[i,j] + p[i-1,j])
118+
end
119+
return
120+
end
121+
function calc_h!(p::Array{Float64},u::Array{Float64},v::Array{Float64},h::Array{Float64})
122+
# h[1:end-1,1:end-1] = p[1:end-1,1:end-1] .+ 0.25::Float64 .*
123+
# (u[2:end, 1:end-1] .* u[2:end, 1:end-1] .+
124+
# u[1:end-1,1:end-1] .* u[1:end-1,1:end-1] .+
125+
# v[1:end-1, 2:end] .* v[1:end-1, 2:end] .+
126+
# v[1:end-1,1:end-1] .* v[1:end-1,1:end-1])
127+
@turbo for j=1:N,i=1:M
128+
@inbounds h[i,j] = p[i,j] + 0.25* (u[i+1,j]^2 + u[i,j]^2 + v[i,j+2]^2 + v[i,j]^2)
129+
end
130+
end
131+
function calc_unew!(uold::Array{Float64},z::Array{Float64},cv::Array{Float64},h::Array{Float64},unew::Array{Float64},tdts8,tdtsdx::Float64)
132+
#unew[2:end,1:end-1]= uold[2:end,1:end-1] .+ tdts8 .*
133+
# (z[2:end, 2:end] .+ z[2:end, 1:end-1]) .*
134+
# (cv[2:end,2:end] .+ cv[2:end,1:end-1] .+
135+
# cv[1:end-1,2:end] .+ cv[1:end-1,1:end-1]) .-
136+
# tdtsdx .* (h[2:end, 1:end-1] .- h[1:end-1,1:end-1])
137+
@turbo for j=2:N+1,i=1:M
138+
@inbounds unew[i,j] = uold[i,j] + tdts8 * (z[i+1,j] + z[i+1,j-1]) *
139+
(cv[i+1,j] + cv[i+1,j-1] + cv[i,j] + cv[i,j-1]) -
140+
tdtsdx * (h[i+1,j-1]-h[i,j-1])
141+
end
142+
end
143+
function calc_vnew!(vold::Array{Float64},z::Array{Float64},cu::Array{Float64},h::Array{Float64},vnew::Array{Float64},tdts8::Float64,tdtsdy::Float64)
144+
# vnew[1:end-1,2:end] = vold[1:end-1,2:end] .- tdts8 .*
145+
# (z[2:end, 2:end] .+ z[1:end-1, 2:end]) .*
146+
# (cu[2:end,2:end] .+ cu[2:end,1:end-1] .+
147+
# cu[1:end-1,2:end] .+ cu[1:end-1,1:end-1]) .-
148+
# tdtsdy .* (h[1:end-1, 2:end] .- h[1:end-1,1:end-1])
149+
@turbo for j=2:N+1,i=1:M
150+
@inbounds vnew[i,j] = vold[i,j] - tdts8 * (z[i+1,j] + z[i,j]) *
151+
(cu[i+1,j] + cu[i+1,j-1] + cu[i,j] + cu[i,j-1]) -
152+
tdtsdy * (h[i,j] - h[i,j-1])
153+
end
154+
end
155+
function calc_pnew!(pold::Array{Float64},cu::Array{Float64},cv::Array{Float64},pnew::Array{Float64},tdtsdx::Float64)
156+
# pnew[1:end-1,1:end-1] = pold[1:end-1,1:end-1] .- tdtsdx .*
157+
# (cu[2:end, 1:end-1] .- cu[1:end-1,1:end-1]) .-
158+
# tdtsdy .* (cv[1:end-1, 2:end] .- cv[1:end-1,1:end-1])
159+
@turbo for j=1:N,i=1:M
160+
@inbounds pnew[i,j] = pold[i,j] - tdtsdx * (cu[i+2,j] - cu[i,j]) -
161+
tdtsdy * ( cv[i,j+1] - cv[i,j])
162+
end
163+
end
164+
function bndry1!(h::Array{Float64},z::Array{Float64},cu::Array{Float64},cv::Array{Float64})
165+
@inbounds cu[1,1] = cu[M, 1]
166+
@inbounds h[M,1] = h[1, 1]
167+
@turbo for i=2:N+1
168+
@inbounds cu[1,i] = cu[M, i]
169+
@inbounds h[M,i] = h[1, i]
170+
@inbounds cv[M, i] = cv[1, i]
171+
@inbounds z[1, i] = z[M, i]
172+
end
173+
end
85174

175+
#@views cenU(A) = A[2:end,1:end-1]
176+
#@views cenV(A) = A[1:end-1,2:end]
177+
#@views cenP(A) = A[1:end-1,1:end-1]
178+
#@views cenZ(A) = A[2:end,2:end]
179+
180+
#@views im1U(A) = A[1:end-1,1:end-1]
181+
#@views jm1V(A) = A[1:end-1,1:end-1]
182+
#@views allJ(n,A) = A[n,1:end]
183+
#@views allI(n,A) = A[1:end,n]
184+
185+
186+
global tdt = dt
187+
@time begin
86188
for ncycle in 1:ITMAX
87-
@time begin
88-
global p, u, v, cu, cv, z, h, uold, vold, pold, unew, vnew, pnew, tdt, tdtsdx, tdtsdy, time, tdts8
189+
global alpha, fsdx, fsdy
190+
global p, u, v, cu, cv, z, h, uold, vold, pold, unew, vnew, pnew, tdt, tdtsdx, tdtsdy, time, tdts
89191

90-
cu[2:end, 1:end-1] = ((p[2:end, 1:end-1] .+ p[1:end-1, 1:end-1]) .* u[2:end,1:end-1]) ./ 2.0
91-
cv[1:end-1, 2:end] = ((p[1:end-1, 2:end] .+ p[1:end-1, 1:end-1]) .* v[1:end-1,2:end]) ./ 2.0
92-
z[2:end, 2:end] = (fsdx .* (v[2:end, 2:end] .- v[1:end-1, 2:end]) .-
93-
fsdy .* (u[2:end, 2:end] .- u[2:end, 1:end-1])) ./
94-
(p[1:end-1, 1:end-1] .+ p[2:end,1:end-1] .+
95-
p[2:end,2:end] .+ p[1:end-1,2:end])
192+
#
193+
# cu[2:end, 1:end-1] = ((p[2:end, 1:end-1] .+ p[1:end-1,1:end-1]) .* u[2:end,1:end-1]) ./ 2.0::Float64
194+
calc_cu!(p,u,cu)
195+
#
196+
# cv[1:end-1, 2:end] = ((p[1:end-1, 2:end] .+ p[1:end-1,1:end-1]) .* v[1:end-1,2:end]) ./ 2.0::Float64
197+
calc_cv!(p,v,cv)
198+
#
199+
# end
200+
201+
202+
203+
# z[2:end, 2:end] = (fsdx .* (v[2:end, 2:end] .- v[1:end-1, 2:end]) .-
204+
# fsdy .* (u[2:end, 2:end] .- u[2:end, 1:end-1])) ./
205+
# (p[1:end-1,1:end-1] .+ p[2:end,1:end-1] .+
206+
# p[2:end,2:end] .+ p[1:end-1,2:end])
207+
calc_z!(v,u,p,z,fsdx,fsdy)
208+
96209

97-
h[1:end-1, 1:end-1] = p[1:end-1, 1:end-1] .+ 0.25 .*
98-
(u[2:end, 1:end-1] .* u[2:end, 1:end-1] .+
99-
u[1:end-1, 1:end-1] .* u[1:end-1, 1:end-1] .+
100-
v[1:end-1, 2:end] .* v[1:end-1, 2:end] .+
101-
v[1:end-1, 1:end-1] .* v[1:end-1, 1:end-1])
210+
# h[1:end-1,1:end-1] = p[1:end-1,1:end-1] .+ 0.25::Float64 .*
211+
# (u[2:end, 1:end-1] .* u[2:end, 1:end-1] .+
212+
# u[1:end-1,1:end-1] .* u[1:end-1,1:end-1] .+
213+
# v[1:end-1, 2:end] .* v[1:end-1, 2:end] .+
214+
# v[1:end-1,1:end-1] .* v[1:end-1,1:end-1])
215+
calc_h!(p,u,v,h)
102216

103-
end
104217

105-
cu[1, :] = cu[M, :]
106-
h[M,:] = h[1, :]
107-
cv[M, 2:end] = cv[1, 2:end]
108-
z[1, 2:end] = z[M, 2:end]
218+
# bndry1!(h,z,cu,cv)
219+
cu[1,1:N+1] = cu[M, 1:N+1]
220+
h[M,1:N+1] = h[1, 1:N+1]
221+
cv[M, 2:N+1] = cv[1, 2:N+1]
222+
z[M, 2:N+1] = z[1, 2:N+1]
109223

110-
cv[:, 1] = cv[:, N]
111-
h[:, N] = h[:, 1]
112-
cu[2:end, N] = cu[2:end, 1]
113-
z[2:end, 1] = z[2:end, N]
224+
cv[1:M+1, 1] = cv[1:M+1, N]
225+
h[1:M+1, N] = h[1:M+1, 1]
226+
cu[2:M+1, N] = cu[2:M+1, 1]
227+
z[2:M+1, 1] = z[2:M+1, N]
114228

115-
cu[1, N] = cu[M, 1]
116-
cv[M, 1] = cv[1, N]
117-
z[1, 1] = z[M, N]
118-
h[M, N] = h[1, 1]
229+
cu[1, N] = cu[M, 1]
230+
cv[M, 1] = cv[1, N]
231+
z[1, 1] = z[M, N]
232+
h[M, N] = h[1, 1]
119233

120-
tdts8 = tdt / 8.0
234+
tdts8 = tdt / 8.0::Float64
121235
tdtsdx = tdt / dx
122236
tdtsdy = tdt / dy
123237

124-
@time begin
125-
unew[2:end, 1:end-1] = uold[2:end, 1:end-1] .+ tdts8 .*
126-
(z[2:end, 2:end] .+ z[2:end, 1:end-1]) .*
127-
(cv[2:end,2:end] .+ cv[2:end,1:end-1] .+
128-
cv[1:end-1,2:end] .+ cv[1:end-1,1:end-1]) .-
129-
tdtsdx .* (h[2:end, 1:end-1] .- h[1:end-1, 1:end-1])
130-
vnew[1:end-1, 2:end] = vold[1:end-1, 2:end] .- tdts8 .*
131-
(z[2:end, 2:end] .+ z[1:end-1, 2:end]) .*
132-
(cu[2:end,2:end] .+ cu[2:end,1:end-1] .+
133-
cu[1:end-1,2:end] .+ cu[1:end-1,1:end-1]) .-
134-
tdtsdy .* (h[1:end-1, 2:end] .- h[1:end-1, 1:end-1])
135-
pnew[1:end-1, 1:end-1] = pold[1:end-1, 1:end-1] .- tdtsdx .*
136-
(cu[2:end, 1:end-1] .- cu[1:end-1, 1:end-1]) .-
137-
tdtsdy .* (cv[1:end-1, 2:end] .- cv[1:end-1, 1:end-1])
138-
end
238+
#unew[2:end,1:end-1]= uold[2:end,1:end-1] .+ tdts8 .*
239+
# (z[2:end, 2:end] .+ z[2:end, 1:end-1]) .*
240+
# (cv[2:end,2:end] .+ cv[2:end,1:end-1] .+
241+
# cv[1:end-1,2:end] .+ cv[1:end-1,1:end-1]) .-
242+
# tdtsdx .* (h[2:end, 1:end-1] .- h[1:end-1,1:end-1])
243+
calc_unew!(uold,z,cv,h,unew,tdts8,tdtsdx)
244+
245+
# vnew[1:end-1,2:end] = vold[1:end-1,2:end] .- tdts8 .*
246+
# (z[2:end, 2:end] .+ z[1:end-1, 2:end]) .*
247+
# (cu[2:end,2:end] .+ cu[2:end,1:end-1] .+
248+
# cu[1:end-1,2:end] .+ cu[1:end-1,1:end-1]) .-
249+
# tdtsdy .* (h[1:end-1, 2:end] .- h[1:end-1,1:end-1])
250+
calc_vnew!(vold,z,cu,h,vnew,tdts8,tdtsdy)
139251

140-
unew[1, :] = unew[M, :]
141-
pnew[M, :] = pnew[1, :]
252+
# pnew[1:end-1,1:end-1] = pold[1:end-1,1:end-1] .- tdtsdx .*
253+
# (cu[2:end, 1:end-1] .- cu[1:end-1,1:end-1]) .-
254+
# tdtsdy .* (cv[1:end-1, 2:end] .- cv[1:end-1,1:end-1])
255+
calc_pnew!(pold,cu,cv,pnew,tdtsdx)
256+
257+
258+
# @time begin
259+
unew[1, 1:end] = unew[M, 1:end]
260+
pnew[M, 1:end] = pnew[1, 1:end]
142261
vnew[M, 2:end] = vnew[1, 2:end]
143262

144263
unew[2:end, N] = unew[2:end, 1]
145-
vnew[:, 1] = vnew[:, N]
146-
pnew[:, N] = pnew[:, 1]
264+
vnew[1:end, 1] = vnew[1:end, N]
265+
pnew[1:end, N] = pnew[1:end, 1]
147266

148-
unew[1, N] = unew[M, 1]
149-
vnew[M, 1] = vnew[1, N]
150-
pnew[M, N] = pnew[1, 1]
267+
unew[1, N] = unew[M, 1]
268+
vnew[M, 1] = vnew[1, N]
269+
pnew[M, N] = pnew[1, 1]
270+
# end
151271

152272
if ncycle>1
153-
uold[:, :] = u[:, :] .+ alpha .* (unew[:, :] .- 2.0 .* u[:, :] .+ uold[:, :])
154-
vold[:, :] = v[:, :] .+ alpha .* (vnew[:, :] .- 2.0 .* v[:, :] .+ vold[:, :])
155-
pold[:, :] = p[:, :] .+ alpha .* (pnew[:, :] .- 2.0 .* p[:, :] .+ pold[:, :])
273+
uold = u .+ alpha .* (unew .- 2.0::Float64 .* u .+ uold)
274+
vold = v .+ alpha .* (vnew .- 2.0::Float64 .* v .+ vold)
275+
pold = p .+ alpha .* (pnew .- 2.0::Float64 .* p .+ pold)
156276

157-
u[:, :] = unew[:, :]
158-
v[:, :] = vnew[:, :]
159-
p[:, :] = pnew[:, :]
277+
u = unew
278+
v = vnew
279+
p = pnew
160280
else
161281
tdt = tdt + tdt
162-
uold = copy(u)
163-
vold = copy(v)
164-
pold = copy(p)
165-
u = copy(unew)
166-
v = copy(vnew)
167-
p = copy(pnew)
282+
uold = u
283+
vold = v
284+
pold = p
285+
u = unew
286+
v = vnew
287+
p = pnew
168288
end
169289

170290
end
291+
end
171292

172293
if L_OUT
173294
println(" Cycle number: ", ITMAX)

0 commit comments

Comments
 (0)