Fenics 2016.1 & PETSc & MPI

Hi,

can someone tell me why mys code is not running in parallel using MPI? I am using fenics version 2016.1, so I first use this script to save the original "xml" mesh to a hdf file:

from fenics import *
mesh = Mesh("tube_4.xml")

hdf = HDF5File(mesh.mpi_comm(), "file.h5", "w")
hdf.write(mesh, "/mesh")

then, I run the following script using "mpirun -np 2 ipcs.py":

from __future__ import print_function
from fenics import *
import numpy as np
from dolfin.mesh.boundarysubdomainfinder import find_subdomain
import math

class nlp(NonlinearProblem):
    def __init__(self, a, L, bcp):
        NonlinearProblem.__init__(self)
        self.L = L
        self.a = a
        self.bcp = bcp
    def F(self, b, x):
        assemble(self.L, tensor=b)
        [bc.apply(b) for bc in self.bcp]
    def J(self, A, x):
        assemble(self.a, tensor=A)
        [bc.apply(A) for bc in self.bcp]

# Generic problem variables
T = 10.0            # final time
num_steps = 1000   # number of time steps
dt = T / num_steps # time step size

# Fluid constants
mu = 0.035         # dynamic viscosity
rho = 1.05            # density

# Cylinder dimensions
xmin = 0.
xmax = 5.
rad = 0.5

# Form compiler options
parameters["form_compiler"]["optimize"]     = True
parameters["form_compiler"]["cpp_optimize"] = True
parameters["form_compiler"]["representation"] = "quadrature"

# Create mesh
mesh = Mesh()
hdf = HDF5File(mesh.mpi_comm(), "file.h5", "r")
hdf.read(mesh, "/mesh", False)

# Boundary conditions
# Sub domain for no-slip (mark whole boundary, inflow and outflow will overwrite)
class Noslip(SubDomain):
    def inside(self, x, on_boundary):
        return (math.sqrt(x[0]**2+x[1]**2) - rad) < DOLFIN_EPS and on_boundary

# Sub domain for inflow (right)
class Outflow(SubDomain):
    def inside(self, x, on_boundary):
        return math.fabs(x[2] - xmax) < DOLFIN_EPS and on_boundary

# Sub domain for outflow (left)
class Inflow(SubDomain):
    def inside(self, x, on_boundary):
        return math.fabs(x[2] - xmin) < DOLFIN_EPS and on_boundary

class InitialConditionVelocity(Expression):
    def eval_cell(self, value, x, ufc_cell):
        value[0] = 0.0

# Create mesh functions over the cell facets
sub_domains = MeshFunction("size_t", mesh, mesh.topology().dim() - 1)

# Mark all facets as sub domain 3
sub_domains.set_all(3)

# Mark no-slip facets as sub domain 0, 0.0
noslip = Noslip()
noslip.mark(sub_domains, 0)

# Mark inflow as sub domain 1, 01
inflow = Inflow()
inflow.mark(sub_domains, 1)

# Mark outflow as sub domain 2, 0.2, True
outflow = Outflow()
outflow.mark(sub_domains, 2)

# Define subdomain integration
ds = Measure("ds", subdomain_data=sub_domains)

# Define function spaces
V = VectorFunctionSpace(mesh, 'P', 2)
Q = FunctionSpace(mesh, 'P', 1)

print("Velocity dofs = %i" % V.dim())
print("Pressure dofs = %i" % Q.dim())

# Define trial and test functions
u = TrialFunction(V)
v = TestFunction(V)
p = TrialFunction(Q)
q = TestFunction(Q)

# Define functions for solutions at previous and current time steps
u_n = Function(V)
u_vs = Function(V)
u_  = Function(V)
p_n = Function(Q)
p_  = Function(Q)

# Define expressions used in variational forms
U   = 0.5*(u_n + u)
f   = Constant((0, 0, 0))
k   = Constant(dt)
mu  = Constant(mu)

# Define no-slip boundary condition
g_noslip = Constant((0, 0, 0))
bc_noslip = DirichletBC(V, g_noslip, sub_domains, 0)
bcu = [bc_noslip]

# Define inlet and outlet boundary conditions
pInlet = (0.01)*1333.22
pOutlet = 0.0*1333.22
bc_p_in = DirichletBC(Q, Constant(pInlet),sub_domains, 1)
bc_p_out = DirichletBC(Q, Constant(pOutlet),sub_domains, 2)
bcp = [bc_p_in, bc_p_out]


p_n = project(Constant(pOutlet),Q)

# Define symmetric gradient
def epsilon(u):
    return sym(nabla_grad(u))

# Define stress tensor
def sigma(u, p):
    return 2*mu*epsilon(u) - p*Identity(len(u))

# Define facet normals
n = FacetNormal(mesh)

# Define variational problem for step 1
F1 = rho*dot((u - u_n) / k, v)*dx \
   + rho*dot(dot(u, nabla_grad(u)), v)*dx \
   + inner(sigma(U, p_n), epsilon(v))*dx \
   + dot(p_n*n, v)*ds - dot(mu*nabla_grad(U)*n, v)*ds \
   - dot(f, v)*dx
F1  = action(F1, u_vs)
# Jacobian
dF1 = derivative(F1, u_vs, u)

# Nonlinear problem
problem = nlp(dF1, F1, bcu)
solver = PETScSNESSolver()
solver.parameters["relative_tolerance"] = 1e-6

# Define variational problem for step 2
a2 = dot(nabla_grad(p), nabla_grad(q))*dx
L2 = dot(nabla_grad(p_n), nabla_grad(q))*dx - (rho/k)*div(u_)*q*dx

# Define variational problem for step 3
a3 = dot(u, v)*dx
L3 = dot(u_, v)*dx - k/rho*dot(nabla_grad(p_ - p_n), v)*dx

# Assemble matrices
A2 = assemble(a2)
A3 = assemble(a3)

# Apply boundary conditions to matrices
[bc.apply(A2) for bc in bcp]

# Create VTK files for visualization output
vtkfile_u = File('solution/velocity.pvd')
vtkfile_p = File('solution/pressure.pvd')
u_.rename('u','vtkfile_u')
p_.rename('p','vtkfile_p')
# Create time series for saving solution for later
timeseries_u = TimeSeries('solution/velocity')
timeseries_p = TimeSeries('solution/pressure')

# Create progress bar
progress = Progress('Time-stepping')

# Time-stepping
t = 0
for n in range(num_steps):

    # Update current time
    t += dt

    # Step 1: Tentative velocity step
    solver.solve(problem, u_vs.vector())
    u_.assign(u_vs)

    # Step 2: Pressure correction step
    b2 = assemble(L2)
    [bc.apply(b2) for bc in bcp]
    # solve(A2, p_.vector(), b2, 'bicgstab', 'ilu')
    solve(A2, p_.vector(), b2,'gmres', 'petsc_amg')

    # Step 3: Velocity correction step
    b3 = assemble(L3)
    # solve(A3, u_.vector(), b3, 'bicgstab')
    solve(A3, u_.vector(),b3,'gmres', 'petsc_amg')

    # Save solution to file (VTK)
    vtkfile_u << u_
    vtkfile_p << p_

    # Save solution to file (HDF5)
    timeseries_u.store(u_.vector(), t)
    timeseries_p.store(p_.vector(), t)

    # Update previous solution
    u_n.assign(u_)
    p_n.assign(p_)

    # Update progress bar
    progress.update(t / T)
    n   = FacetNormal(mesh)
    print('time: %.4f, u_max: %.3f' % (t,u_.vector().array().max()))

Running the program using just "mpirun -np 1 ipcs.py" reproduces the serial result, as expected. However, there is something in my code that prevents fenics from splitting the problem in the requested amount of processes. In fact, in some working examples that I have found, the number of DoF for each process should be roughly half of the original ones, since the mesh should be partitioned. I hope that someone can point this out. I can share the "xml" file with anyone that wants to reproduce my problem.

Best,

Lucas.