When solving a mixed problem I observe extremely strange results with a variety of direct linear solvers. It is not surprising that serial ones (UMFPACK/SuperLU/Petsc) eventually run out of memory, but what worries me is (1) SuperLU_dist results are completely nonsensical and (2) MUMPS results seem to deteriorate (i.e. produce NaNs earlier) with increasing number of cores
(3) Petsc solver just produces NaNs. Am I missing something/doing something incorrectly? How does one debug issues like these?
The example is unfortunately somewhat long, but this is just a particular Laplace discretization (and one cannot go much simpler than Laplace, can one?). The output is below the code.
Code:
from __future__ import print_function
from dolfin import *
import numpy as np
# Laplace problem using discontinuous Petrov-Galerkin method
parameters["ghost_mode"] = "shared_facet"
set_log_level(WARNING)
method = ('umfpack','petsc','superlu','mumps','superlu_dist')[3]
comm = mpi_comm_world()
mpiRank = MPI.rank(comm)
# Create mesh and define function space
degree = 0
delta_p = 2
# Test case
u_exact = Expression("cos(pi*x[0])*cos(2*pi*x[1])", degree=degree+2)
f_exact = Expression("cos(pi*x[0])*cos(2*pi*x[1])*5*pi*pi", degree=degree+2)
# bilinear forms
# inner product in the test space
def inner_V(V1,V2,mesh):
v1,tau1 = split(V1)
v2,tau2 = split(V2)
return inner(grad(v1)+tau1,grad(v2)+tau2)*dx + inner(div(tau1),div(tau2))*dx + inner(v1,v2)*dx + inner(tau1,tau2)*dx
# linear differential operator
def b(U,V,mesh):
u,sigma,uhat,sighat = split(U)
v,tau = split(V)
n = FacetNormal(mesh)
sighatn = dot(sighat,n)
taun = dot(tau, n)
# eq1: -div(sigma) = f
eq1 = inner(sigma,grad(v))*dx - inner(sighatn('+'),v('+')-v('-'))*dS - inner(dot(sighat,n),v)*ds
# eq2: sigma - grad(u) = 0
eq2 = inner(sigma,tau)*dx + inner(u,div(tau))*dx - inner(uhat('+'),taun('+')+taun('-'))*dS - inner(uhat,taun)*ds
#
return eq1+eq2
def a(UV,dUV,mesh):
U,V = split(UV)
dU,dV = split(dUV)
return inner_V(V,dV,mesh) + b(U,dV,mesh) + b(dU,V,mesh)
def L(UV):
U,V = split(UV)
v,tau = split(V)
return inner(f_exact,v)*dx
def laplace_dpg(N):
mesh = UnitSquareMesh(N,N)
# Trial spaces
u_element = FiniteElement("DG", mesh.ufl_cell(), degree) # solution
s_element = VectorElement("DG", mesh.ufl_cell(), degree) # gradient
uh_element= FiniteElement("CG", mesh.ufl_cell(), degree+1) #traces
sh_element= FiniteElement("RT", mesh.ufl_cell(), degree+1) #fluxes
U_element = MixedElement([u_element,s_element,uh_element,sh_element])
#uh_element= FiniteElement("CG", mesh.ufl_cell(), degree+1, restriction="facet") #traces - This is not possible in the latest version of FENICS... use lowest order degree=0
#sh_element= FiniteElement("RT", mesh.ufl_cell(), degree+1, restriction="facet") #fluxes
# Test spaces
v_element = FiniteElement("DG", mesh.ufl_cell(), degree+delta_p)
t_element = VectorElement("DG", mesh.ufl_cell(), degree+delta_p)
V_element = MixedElement([v_element,t_element])
#
UV_element = MixedElement([U_element,V_element])
E = FunctionSpace(mesh,UV_element)
# boundary conditions on uhat
bc = DirichletBC(E.sub(0).sub(2), u_exact, 'on_boundary')
# Solve the system
uSol = Function(E)
UV = TrialFunction(E)
dUV = TestFunction(E)
solve(a(UV,dUV,mesh)==L(dUV), uSol, bcs=bc, solver_parameters = {'linear_solver': method})
U_,V_ = uSol.split()
u_,sigma_,uhat_,sighat_ = U_.split()
# Estimate the error
L2_err = sqrt(assemble((u_-u_exact)*(u_-u_exact)*dx))
V_err = sqrt(assemble(inner_V(V_,V_,mesh)))
return L2_err, V_err
if __name__=='__main__':
u_error0 = np.Inf
v_error0 = np.Inf
for N in 10*2**np.arange(5):
u_error,v_error = laplace_dpg(N)
if mpiRank==0:
print('N = %4d, u_error = %e, v_error = %e, slope_u = %e, slope_v = %e' \
% (N,u_error,v_error,u_error0/u_error,v_error0/v_error))
u_error0,v_error0 = u_error,v_error
Output:
UMFPACK:
N = 10, u_error = 8.499598e-02, v_error = 1.005355e+00, slope_u = inf, slope_v = inf
N = 20, u_error = 4.170205e-02, v_error = 5.167252e-01, slope_u = 2.038173e+00, slope_v = 1.945627e+00
N = 40, u_error = 2.073663e-02, v_error = 2.604149e-01, slope_u = 2.011033e+00, slope_v = 1.984238e+00
out of memory
PETSC:
N = 10, u_error = nan, v_error = nan, slope_u = nan, slope_v = nan
N = 20, u_error = nan, v_error = nan, slope_u = nan, slope_v = nan
N = 40, u_error = nan, v_error = nan, slope_u = nan, slope_v = nan
SuperLU:
N = 10, u_error = 8.499598e-02, v_error = 1.005355e+00, slope_u = inf, slope_v = inf
N = 20, u_error = 4.170205e-02, v_error = 5.167252e-01, slope_u = 2.038173e+00, slope_v = 1.945627e+00
N = 40, u_error = 2.073663e-02, v_error = 2.604149e-01, slope_u = 2.011033e+00, slope_v = 1.984238e+00
N = 80, u_error = 1.035351e-02, v_error = 1.304945e-01, slope_u = 2.002860e+00, slope_v = 1.995600e+00
Not enough memory to perform factorization.
MUMPS:
mpirun -np 1 python laplace_dpg.py
N = 10, u_error = 8.499598e-02, v_error = 1.005355e+00, slope_u = inf, slope_v = inf
N = 20, u_error = 4.170205e-02, v_error = 5.167252e-01, slope_u = 2.038173e+00, slope_v = 1.945627e+00
N = 40, u_error = 2.073663e-02, v_error = 2.604149e-01, slope_u = 2.011033e+00, slope_v = 1.984238e+00
N = 80, u_error = 1.035351e-02, v_error = 1.304945e-01, slope_u = 2.002860e+00, slope_v = 1.995600e+00
N = 160, u_error = nan, v_error = nan, slope_u = nan, slope_v = nan
mpirun -np 2 python laplace_dpg.py
N = 10, u_error = 8.499598e-02, v_error = 1.005355e+00, slope_u = inf, slope_v = inf
N = 20, u_error = 4.170205e-02, v_error = 5.167252e-01, slope_u = 2.038173e+00, slope_v = 1.945627e+00
N = 40, u_error = 2.073663e-02, v_error = 2.604149e-01, slope_u = 2.011033e+00, slope_v = 1.984238e+00
N = 80, u_error = 1.035351e-02, v_error = 1.304945e-01, slope_u = 2.002860e+00, slope_v = 1.995600e+00
N = 160, u_error = nan, v_error = nan, slope_u = nan, slope_v = nan
mpirun -np 4 python laplace_dpg.py
N = 10, u_error = 8.499598e-02, v_error = 1.005355e+00, slope_u = inf, slope_v = inf
N = 20, u_error = 4.170205e-02, v_error = 5.167252e-01, slope_u = 2.038173e+00, slope_v = 1.945627e+00
N = 40, u_error = 2.073663e-02, v_error = 2.604149e-01, slope_u = 2.011033e+00, slope_v = 1.984238e+00
N = 80, u_error = nan, v_error = nan, slope_u = nan, slope_v = nan
...
mpirun -np 24 nice -n 15 python laplace_dpg.py
N = 10, u_error = 8.499598e-02, v_error = 1.005355e+00, slope_u = inf, slope_v = inf
N = 20, u_error = 4.170205e-02, v_error = 5.167252e-01, slope_u = 2.038173e+00, slope_v = 1.945627e+00
N = 40, u_error = nan, v_error = nan, slope_u = nan, slope_v = nan
...
SuperLU_dist:
mpirun -np 2 python laplace_dpg.py
N = 10, u_error = 2.512400e-01, v_error = 2.091658e+00, slope_u = inf, slope_v = inf
N = 20, u_error = 1.559787e+09, v_error = 2.141536e+01, slope_u = 1.610732e-10, slope_v = 9.767091e-02
N = 40, u_error = 4.527557e+01, v_error = 6.164801e+03, slope_u = 3.445097e+07, slope_v = 3.473813e-03
???