redistribute.f90 Source File


Source Code

! Modifications for optimised local copy in c_redist_22 and c_redist_32
! (and their inverse routines):
! (c) The Numerical Algorithms Group (NAG) Ltd, 2012
! on behalf of EPSRC for the HECToR project
module redistribute
!
! Redistribute distributed (integer, real, complex or logical)
! (1, 2, 3, or 4) dimensional arrays into two dimensional arrays with
! first index on local processor, and vice versa.
!
! The first operation is called 'gather' and the second is called 'scatter.'
!
! One can also do a 'fill' operation.  This consists of copying
! values from a (2, 3, or 4) dimensional array of
! (integer, real, complex, or logical ) values into
! another array with the same number of dimensions.
!
! One can also do a three index to four index redistribution for complex numbers.
!
   implicit none
   private

   public :: index_list_type, delete_list
   public :: redist_type, delete_redist
! TT>
   public :: report_map_property, measure_gather, measure_scatter
   public :: gather_count, scatter_count, time_redist
! <TT

   public :: init_redist, gather, scatter
   public :: init_fill, fill
   public :: set_redist_character_type

   interface gather
      module procedure c_redist_22, r_redist_22, i_redist_22, l_redist_22
      module procedure c_redist_32, r_redist_32, i_redist_32, l_redist_32
      module procedure c_redist_42, r_redist_42, i_redist_42, l_redist_42
      module procedure c_redist_23
      module procedure c_redist_34, r_redist_34
      module procedure c_redist_33
      module procedure c_redist_35, r_redist_35
   end interface

   interface scatter
      module procedure c_redist_12, r_redist_12, i_redist_12, l_redist_12
      module procedure c_redist_22_inv, r_redist_22_inv, i_redist_22_inv, l_redist_22_inv
      module procedure c_redist_32_inv, r_redist_32_inv, i_redist_32_inv, l_redist_32_inv
      module procedure c_redist_42_inv, r_redist_42_inv, i_redist_42_inv, l_redist_42_inv
      module procedure c_redist_33_inv
      module procedure c_redist_34_inv, r_redist_34_inv
      module procedure c_redist_35_inv, r_redist_35_inv
   end interface

! TT>
   interface measure_gather
      module procedure measure_gather_32, measure_gather_33
      module procedure measure_gather_22
   end interface

   interface measure_scatter
      module procedure measure_scatter_23, measure_scatter_33
      module procedure measure_scatter_22
   end interface

   integer :: gather_count = 0, scatter_count = 0
   real, save :: time_redist(2) = 0.
! <TT

   interface fill
      module procedure c_fill_2, c_fill_3, c_fill_4
      module procedure r_fill_2, r_fill_3, r_fill_4
      module procedure i_fill_2, i_fill_3, i_fill_4
      module procedure l_fill_2, l_fill_3, l_fill_4
   end interface

   type :: index_map
      integer :: nn
      integer, dimension(:), pointer :: k => null()
      integer, dimension(:), pointer :: l => null()
      integer, dimension(:), pointer :: m => null()
      integer, dimension(:), pointer :: n => null()
      integer, dimension(:), pointer :: o => null()
   end type index_map

   ! TT: want to add map name, from_layout and to_layout
   type :: redist_type
      private
      integer, dimension(5) :: to_low, from_low, to_high, from_high
      type(index_map), dimension(:), pointer :: to => null()
      type(index_map), dimension(:), pointer :: from => null()
      complex, dimension(:), pointer :: complex_buff => null()
      real, dimension(:), pointer :: real_buff => null()
      integer, dimension(:), pointer :: integer_buff => null()
      logical, dimension(:), pointer :: logical_buff => null()
      character(len=3) :: redistname = ""
   end type redist_type

   type :: index_list_type
      integer, dimension(:), pointer :: first => null()
      integer, dimension(:), pointer :: second => null()
      integer, dimension(:), pointer :: third => null()
      integer, dimension(:), pointer :: fourth => null()
      integer, dimension(:), pointer :: fifth => null()
   end type index_list_type

contains

   subroutine set_redist_character_type(r, chartype)

      type(redist_type), intent(inout) :: r
      character(3), intent(in) :: chartype

      r%redistname = chartype

   end subroutine set_redist_character_type

   subroutine init_redist(r, char, to_low, to_high, to_list, &
                          from_low, from_high, from_list, ierr)

      use mp, only: iproc, nproc, proc0
      type(redist_type), intent(inout) :: r
      character(1), intent(in) :: char
      type(index_list_type), dimension(0:nproc - 1), intent(in) :: to_list, from_list
      integer, dimension(:), intent(in) :: from_low, to_high, from_high, to_low

      integer :: j, ip, n_to, n_from, buff_size
      integer, optional, intent(out) :: ierr

      allocate (r%to(0:nproc - 1), r%from(0:nproc - 1))

      if (present(ierr)) ierr = 0
      buff_size = 0

      do ip = 0, nproc - 1
         if (associated(to_list(ip)%first)) then
            n_to = size(to_list(ip)%first)
            r%to(ip)%nn = n_to

            allocate (r%to(ip)%k(n_to))
            allocate (r%to(ip)%l(n_to))

            r%to(ip)%k = to_list(ip)%first
            r%to(ip)%l = to_list(ip)%second

            if (associated(to_list(ip)%third)) then
               allocate (r%to(ip)%m(n_to))
               r%to(ip)%m = to_list(ip)%third
            end if

            if (associated(to_list(ip)%fourth)) then
               allocate (r%to(ip)%n(n_to))
               r%to(ip)%n = to_list(ip)%fourth
            end if

            if (associated(to_list(ip)%fifth)) then
               allocate (r%to(ip)%o(n_to))
               r%to(ip)%o = to_list(ip)%fifth
            end if

            if (ip /= iproc) buff_size = max(buff_size, n_to)
         else
            r%to(ip)%nn = 0
         end if
      end do

      do j = 1, size(from_low)
         r%from_low(j) = from_low(j)
      end do

      do j = 1, size(from_high)
         r%from_high(j) = from_high(j)
      end do

      do j = 1, size(to_high)
         r%to_high(j) = to_high(j)
      end do

      do j = 1, size(to_low)
         r%to_low(j) = to_low(j)
      end do

      do ip = 0, nproc - 1
         if (associated(from_list(ip)%first)) then

            n_from = size(from_list(ip)%first)
            r%from(ip)%nn = n_from

            allocate (r%from(ip)%k(n_from))
            allocate (r%from(ip)%l(n_from))

            r%from(ip)%k = from_list(ip)%first
            r%from(ip)%l = from_list(ip)%second

            if (associated(from_list(ip)%third)) then
               allocate (r%from(ip)%m(n_from))
               r%from(ip)%m = from_list(ip)%third
            end if

            if (associated(from_list(ip)%fourth)) then
               allocate (r%from(ip)%n(n_from))
               r%from(ip)%n = from_list(ip)%fourth
            end if

            if (associated(from_list(ip)%fifth)) then
               allocate (r%from(ip)%o(n_from))
               r%from(ip)%o = from_list(ip)%fifth
            end if

            if (ip /= iproc) buff_size = max(buff_size, n_from)
         else
            r%from(ip)%nn = 0
         end if
      end do

      select case (char)
      case ('c')
         if (buff_size > 0) allocate (r%complex_buff(buff_size))
      case ('r')
         if (buff_size > 0) allocate (r%real_buff(buff_size))
      case ('i')
         if (buff_size > 0) allocate (r%integer_buff(buff_size))
      case ('l')
         if (buff_size > 0) allocate (r%logical_buff(buff_size))
      case default
         if (proc0) then
            write (*, *) 'Type to be redistributed invalid.  Must stop.'
            write (*, *) char
         end if
         stop
      end select

   end subroutine init_redist

   !  subroutine init_fill (f, char, to_low, to_high, to_list, &
   !       from_low, from_high, from_list, ierr)
   subroutine init_fill(f, char, to_low, to_list, &
                        from_low, from_list, ierr)

      use mp, only: nproc, proc0, iproc
      type(redist_type), intent(out) :: f
      character(1), intent(in) :: char
! TT> caused a problem on PGI compiler
!    type (index_list_type), dimension (0:) :: to_list, from_list
      type(index_list_type), dimension(0:nproc - 1), intent(in) :: to_list, from_list
! <TT
      !    integer, dimension(:), intent (in) :: to_low, from_low, to_high, from_high
      integer, dimension(:), intent(in) :: to_low, from_low
      integer, optional, intent(out) :: ierr

      integer :: j, ip, n_to, n_from, buff_size

      if (present(ierr)) ierr = 0
      do j = 1, size(to_low)
         f%to_low(j) = to_low(j)
      end do

      do j = 1, size(from_low)
         f%from_low(j) = from_low(j)
      end do

      allocate (f%to(0:nproc - 1), f%from(0:nproc - 1))

      buff_size = 0
      do ip = 0, nproc - 1
         if (associated(to_list(ip)%first)) then
            n_to = size(to_list(ip)%first)
            f%to(ip)%nn = n_to
            allocate (f%to(ip)%k(n_to))
            f%to(ip)%k = to_list(ip)%first

            if (associated(to_list(ip)%second)) then
               allocate (f%to(ip)%l(n_to))
               f%to(ip)%l = to_list(ip)%second
            end if

            if (associated(to_list(ip)%third)) then
               allocate (f%to(ip)%m(n_to))
               f%to(ip)%m = to_list(ip)%third
            end if

            if (associated(to_list(ip)%fourth)) then
               allocate (f%to(ip)%n(n_to))
               f%to(ip)%n = to_list(ip)%fourth
            end if

            if (associated(to_list(ip)%fifth)) then
               allocate (f%to(ip)%o(n_to))
               f%to(ip)%o = to_list(ip)%fifth
            end if

            if (ip /= iproc) buff_size = max(buff_size, n_to)
         else
            f%to(ip)%nn = 0
         end if
      end do

      do ip = 0, nproc - 1
         if (associated(from_list(ip)%first)) then
            n_from = size(from_list(ip)%first)
            f%from(ip)%nn = n_from
            allocate (f%from(ip)%k(n_from))
            f%from(ip)%k = from_list(ip)%first

            if (associated(from_list(ip)%second)) then
               allocate (f%from(ip)%l(n_from))
               f%from(ip)%l = from_list(ip)%second
            end if

            if (associated(from_list(ip)%third)) then
               allocate (f%from(ip)%m(n_from))
               f%from(ip)%m = from_list(ip)%third
            end if

            if (associated(from_list(ip)%fourth)) then
               allocate (f%from(ip)%n(n_from))
               f%from(ip)%n = from_list(ip)%fourth
            end if

            if (associated(from_list(ip)%fifth)) then
               allocate (f%from(ip)%o(n_from))
               f%from(ip)%o = from_list(ip)%fifth
            end if

            if (ip /= iproc) buff_size = max(buff_size, n_from)
         else
            f%from(ip)%nn = 0
         end if
      end do

      select case (char)
      case ('c')
         if (buff_size > 0) allocate (f%complex_buff(buff_size))
      case ('r')
         if (buff_size > 0) allocate (f%real_buff(buff_size))
      case ('i')
         if (buff_size > 0) allocate (f%integer_buff(buff_size))
      case ('l')
         if (buff_size > 0) allocate (f%logical_buff(buff_size))
      case default
         if (proc0) then
            write (*, *) 'Type to be redistributed invalid.  Must stop.'
            write (*, *) char
         end if
         stop
      end select

   end subroutine init_fill

   subroutine delete_redist(r)

      use mp, only: nproc
      type(redist_type), intent(in out) :: r

      integer :: i

      if (associated(r%to)) then
         do i = 0, nproc - 1
            if (associated(r%to(i)%k)) deallocate (r%to(i)%k)
            if (associated(r%to(i)%l)) deallocate (r%to(i)%l)
            if (associated(r%to(i)%m)) deallocate (r%to(i)%m)
            if (associated(r%to(i)%n)) deallocate (r%to(i)%n)
         end do
         deallocate (r%to)
      end if

      if (associated(r%from)) then
         do i = 0, nproc - 1
            if (associated(r%from(i)%k)) deallocate (r%from(i)%k)
            if (associated(r%from(i)%l)) deallocate (r%from(i)%l)
            if (associated(r%from(i)%m)) deallocate (r%from(i)%m)
            if (associated(r%from(i)%n)) deallocate (r%from(i)%n)
         end do
         deallocate (r%from)
      end if

      if (associated(r%complex_buff)) deallocate (r%complex_buff)
      if (associated(r%real_buff)) deallocate (r%real_buff)
      if (associated(r%integer_buff)) deallocate (r%integer_buff)
      if (associated(r%logical_buff)) deallocate (r%logical_buff)

   end subroutine delete_redist

   subroutine delete_list(list)
      use mp, only: nproc
! TT> caused a problem on PGI compiler
!    type (index_list_type), dimension(0:) :: list
      type(index_list_type), dimension(0:nproc - 1), intent(inout) :: list
! <TT

      integer :: ip

      do ip = 0, nproc - 1
         if (associated(list(ip)%first)) deallocate (list(ip)%first)
         if (associated(list(ip)%second)) deallocate (list(ip)%second)
         if (associated(list(ip)%third)) deallocate (list(ip)%third)
         if (associated(list(ip)%fourth)) deallocate (list(ip)%fourth)
         if (associated(list(ip)%fifth)) deallocate (list(ip)%fifth)
      end do

   end subroutine delete_list

   subroutine c_redist_12(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      complex, dimension(r%from_low(1):), intent(in) :: from_here

      complex, dimension(r%to_low(1):, r%to_low(2):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%from(iproc)%nn
         to_here(r%to(iproc)%k(i), r%to(iproc)%l(i)) &
            = from_here(r%from(iproc)%k(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%complex_buff(i) = from_here(r%from(ipto)%k(i))
               end do
               call send(r%complex_buff(1:r%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%complex_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%complex_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%complex_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%complex_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%complex_buff(i) = from_here(r%from(ipto)%k(i))
               end do
               call send(r%complex_buff(1:r%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine c_redist_12

   subroutine c_redist_22(r, from_here, to_here)

      type(redist_type), intent(in out) :: r

      complex, dimension(r%from_low(1):, &
                         r%from_low(2):), intent(in) :: from_here

      complex, dimension(r%to_low(1):, &
                         r%to_low(2):), intent(in out) :: to_here

      call c_redist_22_old_copy(r, from_here, to_here)
      call c_redist_22_mpi_copy(r, from_here, to_here)

   end subroutine c_redist_22

   subroutine c_redist_22_old_copy(r, from_here, to_here)

      use mp, only: iproc
      type(redist_type), intent(in out) :: r

      complex, dimension(r%from_low(1):, &
                         r%from_low(2):), intent(in) :: from_here

      complex, dimension(r%to_low(1):, &
                         r%to_low(2):), intent(in out) :: to_here

      integer :: i

!CMR
! In the stella standard FFT situation this routine maps
!         xxf(it,ixxf) to yxf(ik,iyxf) data type
!         where it is kx (or x) index, ik is ky (or y) index,
!         ixxf is (y,ig,isgn,"les") and iyxf is (x,ig,isgn,"les")
!
      do i = 1, r%from(iproc)%nn
!
! redistribute from local processor to local processor
! NB r%from(iproc)%nn is #elements sent by THIS processor to THIS processor
!    In this situation the data at (r%to(iproc)%k(i),r%to(iproc)%l(i))
!    should come from (r%from(iproc)%k(i), r%from(iproc)%l(i)).
!
! This do loop, in stella standard FFT situation, corresponds to:
!    to_here(ik,iyxf)=from_here(it,ixxf)
!
         to_here(r%to(iproc)%k(i), &
                 r%to(iproc)%l(i)) &
            = from_here(r%from(iproc)%k(i), &
                        r%from(iproc)%l(i))
      end do

   end subroutine c_redist_22_old_copy

   subroutine c_redist_22_mpi_copy(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      complex, dimension(r%from_low(1):, &
                         r%from_low(2):), intent(in) :: from_here

      complex, dimension(r%to_low(1):, &
                         r%to_low(2):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%complex_buff(i) = from_here(r%from(ipto)%k(i), &
                                                r%from(ipto)%l(i))
               end do
               call send(r%complex_buff(1:r%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%complex_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%complex_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%complex_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%complex_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%complex_buff(i) = from_here(r%from(ipto)%k(i), &
                                                r%from(ipto)%l(i))
               end do
               call send(r%complex_buff(1:r%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine c_redist_22_mpi_copy

   subroutine c_redist_22_inv(r, from_here, to_here)

      type(redist_type), intent(in out) :: r

      complex, dimension(r%to_low(1):, &
                         r%to_low(2):), intent(in) :: from_here

      complex, dimension(r%from_low(1):, &
                         r%from_low(2):), intent(in out) :: to_here

      call c_redist_22_inv_old_copy(r, from_here, to_here)
      call c_redist_22_inv_mpi_copy(r, from_here, to_here)

   end subroutine c_redist_22_inv

   subroutine c_redist_22_inv_old_copy(r, from_here, to_here)

      use mp, only: iproc
      type(redist_type), intent(in out) :: r

      complex, dimension(r%to_low(1):, &
                         r%to_low(2):), intent(in) :: from_here

      complex, dimension(r%from_low(1):, &
                         r%from_low(2):), intent(in out) :: to_here

      integer :: i

!CMR
! In the stella standard FFT situation this routine maps
!         yxf(ik,iyxf) to xxf(it,ixxf) data type
!         where it is kx (or x) index, ik is ky (or y) index,
!         ixxf is (y,ig,isgn,"les") and iyxf is (x,ig,isgn,"les")
!
      do i = 1, r%to(iproc)%nn
!
! redistribute from local processor to local processor
! NB r%from(iproc)%nn is #elements sent by THIS processor to THIS processor
!    In this situation the data at (r%from(iproc)%k(i),r%from(iproc)%l(i))
!    should come from (r%to(iproc)%k(i), r%to(iproc)%l(i)).
!
! This do loop, in stella standard FFT situation, corresponds to:
!    to_here(it,ixxf)=from_here(ik,iyxf)
!
         to_here(r%from(iproc)%k(i), &
                 r%from(iproc)%l(i)) &
            = from_here(r%to(iproc)%k(i), &
                        r%to(iproc)%l(i))
      end do

   end subroutine c_redist_22_inv_old_copy

   subroutine c_redist_22_inv_mpi_copy(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      complex, dimension(r%to_low(1):, &
                         r%to_low(2):), intent(in) :: from_here

      complex, dimension(r%from_low(1):, &
                         r%from_low(2):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%complex_buff(i) = from_here(r%to(ipto)%k(i), &
                                                r%to(ipto)%l(i))
               end do
               call send(r%complex_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%complex_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i)) &
                     = r%complex_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%complex_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i)) &
                     = r%complex_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%complex_buff(i) = from_here(r%to(ipto)%k(i), &
                                                r%to(ipto)%l(i))
               end do
               call send(r%complex_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

         end if
      end do

   end subroutine c_redist_22_inv_mpi_copy

   subroutine c_redist_32(r, from_here, to_here)

      use job_manage, only: time_message

      type(redist_type), intent(in out) :: r

      complex, dimension(r%from_low(1):, &
                         r%from_low(2):, &
                         r%from_low(3):), intent(in) :: from_here

      complex, dimension(r%to_low(1):, &
                         r%to_low(2):), intent(in out) :: to_here

      call c_redist_32_old_copy(r, from_here, to_here)
      call c_redist_32_mpi_copy(r, from_here, to_here)

   end subroutine c_redist_32

   subroutine c_redist_32_old_copy(r, from_here, to_here)

      use mp, only: iproc
      type(redist_type), intent(in out) :: r

      complex, dimension(r%from_low(1):, &
                         r%from_low(2):, &
                         r%from_low(3):), intent(in) :: from_here

      complex, dimension(r%to_low(1):, &
                         r%to_low(2):), intent(in out) :: to_here

      integer :: i

!CMR
! In the stella standard FFT situation this routine maps
!         g(ig, isgn, iglo) to xxf(it,ixxf) data type
!         where it is kx (or x) index, ixxf is (y,ig,isgn,"les")
!         and iglo is ("xyles")
      do i = 1, r%from(iproc)%nn
!
! redistribute from local processor to local processor
! NB r%from(iproc)%nn is #elements sent by THIS processor to THIS processor
!    In this situation the data at (r%to(iproc)%k(i),r%to(iproc)%l(i))
!    should come from (r%from(iproc)%k(i),r%from(iproc)%l(i),r%from(iproc)%m(i)).
!
! This do loop, in stella standard FFT situation, corresponds to:
!    to_here(it,ixxf)=from_here(ig,isgn,iglo)
!
         to_here(r%to(iproc)%k(i), &
                 r%to(iproc)%l(i)) &
            = from_here(r%from(iproc)%k(i), &
                        r%from(iproc)%l(i), &
                        r%from(iproc)%m(i))
      end do

   end subroutine c_redist_32_old_copy

   subroutine c_redist_32_mpi_copy(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      complex, dimension(r%from_low(1):, &
                         r%from_low(2):, &
                         r%from_low(3):), intent(in) :: from_here

      complex, dimension(r%to_low(1):, &
                         r%to_low(2):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%complex_buff(i) = from_here(r%from(ipto)%k(i), &
                                                r%from(ipto)%l(i), &
                                                r%from(ipto)%m(i))
               end do
               call send(r%complex_buff(1:r%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%complex_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%complex_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%complex_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%complex_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%complex_buff(i) = from_here(r%from(ipto)%k(i), &
                                                r%from(ipto)%l(i), &
                                                r%from(ipto)%m(i))
               end do
               call send(r%complex_buff(1:r%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine c_redist_32_mpi_copy

   subroutine c_redist_32_inv(r, from_here, to_here)

      use job_manage, only: time_message

      type(redist_type), intent(in out) :: r

      complex, dimension(r%to_low(1):, &
                         r%to_low(2):), intent(in) :: from_here

      complex, dimension(r%from_low(1):, &
                         r%from_low(2):, &
                         r%from_low(3):), intent(in out) :: to_here

      call c_redist_32_inv_old_copy(r, from_here, to_here)
      call c_redist_32_inv_mpi_copy(r, from_here, to_here)

   end subroutine c_redist_32_inv

   subroutine c_redist_32_inv_old_copy(r, from_here, to_here)

      use mp, only: iproc

      type(redist_type), intent(in out) :: r

      complex, dimension(r%to_low(1):, &
                         r%to_low(2):), intent(in) :: from_here

      complex, dimension(r%from_low(1):, &
                         r%from_low(2):, &
                         r%from_low(3):), intent(in out) :: to_here

      integer :: i

!CMR
! In the stella standard FFT situation this routine maps
!         xxf(it,ixxf) to g(ig, isgn, iglo) data type
!         where it is kx (or x) index, ixxf is (y,ig,isgn,"les")
!         and iglo is ("xyles")

      do i = 1, r%to(iproc)%nn
!
! redistribute from local processor to local processor
! NB r%to(iproc)%nn is #elements sent by THIS processor to THIS processor
!    In this situation the data at (r%from(iproc)%k(i),r%from(iproc)%l(i),r%from(iproc)%m(i))
!    should come from (r%to(iproc)%k(i),r%to(iproc)%l(i)).
!
! This do loop, in stella standard FFT situation, corresponds to:
!    to_here(ig,isgn,iglo)=from_here(it,ixxf)
!
         to_here(r%from(iproc)%k(i), &
                 r%from(iproc)%l(i), &
                 r%from(iproc)%m(i)) &
            = from_here(r%to(iproc)%k(i), &
                        r%to(iproc)%l(i))
      end do

   end subroutine c_redist_32_inv_old_copy

   subroutine c_redist_32_inv_mpi_copy(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive

      type(redist_type), intent(in out) :: r

      complex, dimension(r%to_low(1):, &
                         r%to_low(2):), intent(in) :: from_here

      complex, dimension(r%from_low(1):, &
                         r%from_low(2):, &
                         r%from_low(3):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%complex_buff(i) = from_here(r%to(ipto)%k(i), &
                                                r%to(ipto)%l(i))
               end do
               call send(r%complex_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%complex_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i), &
                          r%from(ipfrom)%m(i)) &
                     = r%complex_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%complex_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i), &
                          r%from(ipfrom)%m(i)) &
                     = r%complex_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%complex_buff(i) = from_here(r%to(ipto)%k(i), &
                                                r%to(ipto)%l(i))
               end do
               call send(r%complex_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

         end if
      end do

   end subroutine c_redist_32_inv_mpi_copy

   subroutine c_redist_42(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      complex, dimension(r%from_low(1):, &
                         r%from_low(2):, &
                         r%from_low(3):, &
                         r%from_low(4):), intent(in) :: from_here

      complex, dimension(r%to_low(1):, &
                         r%to_low(2):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%from(iproc)%nn
         to_here(r%to(iproc)%k(i), &
                 r%to(iproc)%l(i)) &
            = from_here(r%from(iproc)%k(i), &
                        r%from(iproc)%l(i), &
                        r%from(iproc)%m(i), &
                        r%from(iproc)%n(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%complex_buff(i) = from_here(r%from(ipto)%k(i), &
                                                r%from(ipto)%l(i), &
                                                r%from(ipto)%m(i), &
                                                r%from(ipto)%n(i))
               end do
               call send(r%complex_buff(1:r%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%complex_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%complex_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%complex_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%complex_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%complex_buff(i) = from_here(r%from(ipto)%k(i), &
                                                r%from(ipto)%l(i), &
                                                r%from(ipto)%m(i), &
                                                r%from(ipto)%n(i))
               end do
               call send(r%complex_buff(1:r%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine c_redist_42

   subroutine c_redist_42_inv(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      complex, dimension(r%to_low(1):, &
                         r%to_low(2):), intent(in) :: from_here

      complex, dimension(r%from_low(1):, &
                         r%from_low(2):, &
                         r%from_low(3):, &
                         r%from_low(4):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%to(iproc)%nn
         to_here(r%from(iproc)%k(i), &
                 r%from(iproc)%l(i), &
                 r%from(iproc)%m(i), &
                 r%from(iproc)%n(i)) &
            = from_here(r%to(iproc)%k(i), &
                        r%to(iproc)%l(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%complex_buff(i) = from_here(r%to(ipto)%k(i), &
                                                r%to(ipto)%l(i))
               end do
               call send(r%complex_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%complex_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i), &
                          r%from(ipfrom)%m(i), &
                          r%from(ipfrom)%n(i)) &
                     = r%complex_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%complex_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i), &
                          r%from(ipfrom)%m(i), &
                          r%from(ipfrom)%n(i)) &
                     = r%complex_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%complex_buff(i) = from_here(r%to(ipto)%k(i), &
                                                r%to(ipto)%l(i))
               end do
               call send(r%complex_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

         end if
      end do

   end subroutine c_redist_42_inv

   subroutine c_redist_23(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      complex, dimension(r%from_low(1):, &
                         r%from_low(2):), intent(in) :: from_here

      complex, dimension(r%to_low(1):, &
                         r%to_low(2):, &
                         r%to_low(3):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%from(iproc)%nn
         to_here(r%to(iproc)%k(i), &
                 r%to(iproc)%l(i), &
                 r%to(iproc)%m(i)) &
            = from_here(r%from(iproc)%k(i), &
                        r%from(iproc)%l(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%complex_buff(i) = from_here(r%from(ipto)%k(i), &
                                                r%from(ipto)%l(i))
               end do
               call send(r%complex_buff(1:r%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%complex_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i), &
                          r%to(ipfrom)%m(i)) &
                     = r%complex_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%complex_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i), &
                          r%to(ipfrom)%m(i)) &
                     = r%complex_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%complex_buff(i) = from_here(r%from(ipto)%k(i), &
                                                r%from(ipto)%l(i))
               end do
               call send(r%complex_buff(1:r%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine c_redist_23

   subroutine c_redist_34(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      complex, dimension(r%from_low(1):, &
                         r%from_low(2):, &
                         r%from_low(3):), intent(in) :: from_here

      complex, dimension(r%to_low(1):, &
                         r%to_low(2):, &
                         r%to_low(3):, &
                         r%to_low(4):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%from(iproc)%nn
         to_here(r%to(iproc)%k(i), &
                 r%to(iproc)%l(i), &
                 r%to(iproc)%m(i), &
                 r%to(iproc)%n(i)) &
            = from_here(r%from(iproc)%k(i), &
                        r%from(iproc)%l(i), &
                        r%from(iproc)%m(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%complex_buff(i) = from_here(r%from(ipto)%k(i), &
                                                r%from(ipto)%l(i), &
                                                r%from(ipto)%m(i))
               end do
               call send(r%complex_buff(1:r%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%complex_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i), &
                          r%to(ipfrom)%m(i), &
                          r%to(ipfrom)%n(i)) &
                     = r%complex_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%complex_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i), &
                          r%to(ipfrom)%m(i), &
                          r%to(ipfrom)%n(i)) &
                     = r%complex_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%complex_buff(i) = from_here(r%from(ipto)%k(i), &
                                                r%from(ipto)%l(i), &
                                                r%from(ipto)%m(i))
               end do
               call send(r%complex_buff(1:r%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine c_redist_34

   subroutine c_redist_34_inv(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      complex, dimension(r%to_low(1):, &
                         r%to_low(2):, &
                         r%to_low(3):, &
                         r%to_low(4):), intent(in) :: from_here

      complex, dimension(r%from_low(1):, &
                         r%from_low(2):, &
                         r%from_low(3):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%to(iproc)%nn
         to_here(r%from(iproc)%k(i), &
                 r%from(iproc)%l(i), &
                 r%from(iproc)%m(i)) &
            = from_here(r%to(iproc)%k(i), &
                        r%to(iproc)%l(i), &
                        r%to(iproc)%m(i), &
                        r%to(iproc)%n(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%complex_buff(i) = from_here(r%to(ipto)%k(i), &
                                                r%to(ipto)%l(i), &
                                                r%to(ipto)%m(i), &
                                                r%to(ipto)%n(i))
               end do
               call send(r%complex_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%complex_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i), &
                          r%from(ipfrom)%m(i)) &
                     = r%complex_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%complex_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i), &
                          r%from(ipfrom)%m(i)) &
                     = r%complex_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%complex_buff(i) = from_here(r%to(ipto)%k(i), &
                                                r%to(ipto)%l(i), &
                                                r%to(ipto)%m(i), &
                                                r%to(ipto)%n(i))
               end do
               call send(r%complex_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

         end if
      end do

   end subroutine c_redist_34_inv

   subroutine r_redist_34(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      real, dimension(r%from_low(1):, &
                      r%from_low(2):, &
                      r%from_low(3):), intent(in) :: from_here

      real, dimension(r%to_low(1):, &
                      r%to_low(2):, &
                      r%to_low(3):, &
                      r%to_low(4):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%from(iproc)%nn
         to_here(r%to(iproc)%k(i), &
                 r%to(iproc)%l(i), &
                 r%to(iproc)%m(i), &
                 r%to(iproc)%n(i)) &
            = from_here(r%from(iproc)%k(i), &
                        r%from(iproc)%l(i), &
                        r%from(iproc)%m(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%real_buff(i) = from_here(r%from(ipto)%k(i), &
                                             r%from(ipto)%l(i), &
                                             r%from(ipto)%m(i))
               end do
               call send(r%real_buff(1:r%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%real_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i), &
                          r%to(ipfrom)%m(i), &
                          r%to(ipfrom)%n(i)) &
                     = r%real_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%real_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i), &
                          r%to(ipfrom)%m(i), &
                          r%to(ipfrom)%n(i)) &
                     = r%real_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%real_buff(i) = from_here(r%from(ipto)%k(i), &
                                             r%from(ipto)%l(i), &
                                             r%from(ipto)%m(i))
               end do
               call send(r%real_buff(1:r%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine r_redist_34

   subroutine r_redist_34_inv(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      real, dimension(r%to_low(1):, &
                      r%to_low(2):, &
                      r%to_low(3):, &
                      r%to_low(4):), intent(in) :: from_here

      real, dimension(r%from_low(1):, &
                      r%from_low(2):, &
                      r%from_low(3):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%to(iproc)%nn
         to_here(r%from(iproc)%k(i), &
                 r%from(iproc)%l(i), &
                 r%from(iproc)%m(i)) &
            = from_here(r%to(iproc)%k(i), &
                        r%to(iproc)%l(i), &
                        r%to(iproc)%m(i), &
                        r%to(iproc)%n(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%real_buff(i) = from_here(r%to(ipto)%k(i), &
                                             r%to(ipto)%l(i), &
                                             r%to(ipto)%m(i), &
                                             r%to(ipto)%n(i))
               end do
               call send(r%real_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%real_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i), &
                          r%from(ipfrom)%m(i)) &
                     = r%real_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%real_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i), &
                          r%from(ipfrom)%m(i)) &
                     = r%real_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%real_buff(i) = from_here(r%to(ipto)%k(i), &
                                             r%to(ipto)%l(i), &
                                             r%to(ipto)%m(i), &
                                             r%to(ipto)%n(i))
               end do
               call send(r%real_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

         end if
      end do

   end subroutine r_redist_34_inv

   subroutine c_redist_35(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      complex, dimension(r%from_low(1):, &
                         r%from_low(2):, &
                         r%from_low(3):), intent(in) :: from_here

      complex, dimension(r%to_low(1):, &
                         r%to_low(2):, &
                         r%to_low(3):, &
                         r%to_low(4):, &
                         r%to_low(5):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%from(iproc)%nn
         to_here(r%to(iproc)%k(i), &
                 r%to(iproc)%l(i), &
                 r%to(iproc)%m(i), &
                 r%to(iproc)%n(i), &
                 r%to(iproc)%o(i)) &
            = from_here(r%from(iproc)%k(i), &
                        r%from(iproc)%l(i), &
                        r%from(iproc)%m(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%complex_buff(i) = from_here(r%from(ipto)%k(i), &
                                                r%from(ipto)%l(i), &
                                                r%from(ipto)%m(i))
               end do
               call send(r%complex_buff(1:r%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%complex_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i), &
                          r%to(ipfrom)%m(i), &
                          r%to(ipfrom)%n(i), &
                          r%to(ipfrom)%o(i)) &
                     = r%complex_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%complex_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i), &
                          r%to(ipfrom)%m(i), &
                          r%to(ipfrom)%n(i), &
                          r%to(ipfrom)%o(i)) &
                     = r%complex_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%complex_buff(i) = from_here(r%from(ipto)%k(i), &
                                                r%from(ipto)%l(i), &
                                                r%from(ipto)%m(i))
               end do
               call send(r%complex_buff(1:r%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine c_redist_35

   subroutine r_redist_35(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      real, dimension(r%from_low(1):, &
                      r%from_low(2):, &
                      r%from_low(3):), intent(in) :: from_here

      real, dimension(r%to_low(1):, &
                      r%to_low(2):, &
                      r%to_low(3):, &
                      r%to_low(4):, &
                      r%to_low(5):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%from(iproc)%nn
         to_here(r%to(iproc)%k(i), &
                 r%to(iproc)%l(i), &
                 r%to(iproc)%m(i), &
                 r%to(iproc)%n(i), &
                 r%to(iproc)%o(i)) &
            = from_here(r%from(iproc)%k(i), &
                        r%from(iproc)%l(i), &
                        r%from(iproc)%m(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%real_buff(i) = from_here(r%from(ipto)%k(i), &
                                             r%from(ipto)%l(i), &
                                             r%from(ipto)%m(i))
               end do
               call send(r%real_buff(1:r%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%real_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i), &
                          r%to(ipfrom)%m(i), &
                          r%to(ipfrom)%n(i), &
                          r%to(ipfrom)%o(i)) &
                     = r%real_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%real_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i), &
                          r%to(ipfrom)%m(i), &
                          r%to(ipfrom)%n(i), &
                          r%to(ipfrom)%o(i)) &
                     = r%real_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%real_buff(i) = from_here(r%from(ipto)%k(i), &
                                             r%from(ipto)%l(i), &
                                             r%from(ipto)%m(i))
               end do
               call send(r%real_buff(1:r%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine r_redist_35

   subroutine c_redist_35_inv(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      complex, dimension(r%to_low(1):, &
                         r%to_low(2):, &
                         r%to_low(3):, &
                         r%to_low(4):, &
                         r%to_low(5):), intent(in) :: from_here

      complex, dimension(r%from_low(1):, &
                         r%from_low(2):, &
                         r%from_low(3):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%to(iproc)%nn
         to_here(r%from(iproc)%k(i), &
                 r%from(iproc)%l(i), &
                 r%from(iproc)%m(i)) &
            = from_here(r%to(iproc)%k(i), &
                        r%to(iproc)%l(i), &
                        r%to(iproc)%m(i), &
                        r%to(iproc)%n(i), &
                        r%to(iproc)%o(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%complex_buff(i) = from_here(r%to(ipto)%k(i), &
                                                r%to(ipto)%l(i), &
                                                r%to(ipto)%m(i), &
                                                r%to(ipto)%n(i), &
                                                r%to(ipto)%o(i))
               end do
               call send(r%complex_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%complex_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i), &
                          r%from(ipfrom)%m(i)) &
                     = r%complex_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%complex_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i), &
                          r%from(ipfrom)%m(i)) &
                     = r%complex_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%complex_buff(i) = from_here(r%to(ipto)%k(i), &
                                                r%to(ipto)%l(i), &
                                                r%to(ipto)%m(i), &
                                                r%to(ipto)%n(i), &
                                                r%to(ipto)%o(i))
               end do
               call send(r%complex_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

         end if
      end do

   end subroutine c_redist_35_inv

   subroutine r_redist_35_inv(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      real, dimension(r%to_low(1):, &
                      r%to_low(2):, &
                      r%to_low(3):, &
                      r%to_low(4):, &
                      r%to_low(5):), intent(in) :: from_here

      real, dimension(r%from_low(1):, &
                      r%from_low(2):, &
                      r%from_low(3):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%to(iproc)%nn
         to_here(r%from(iproc)%k(i), &
                 r%from(iproc)%l(i), &
                 r%from(iproc)%m(i)) &
            = from_here(r%to(iproc)%k(i), &
                        r%to(iproc)%l(i), &
                        r%to(iproc)%m(i), &
                        r%to(iproc)%n(i), &
                        r%to(iproc)%o(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%real_buff(i) = from_here(r%to(ipto)%k(i), &
                                             r%to(ipto)%l(i), &
                                             r%to(ipto)%m(i), &
                                             r%to(ipto)%n(i), &
                                             r%to(ipto)%o(i))
               end do
               call send(r%real_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%real_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i), &
                          r%from(ipfrom)%m(i)) &
                     = r%real_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%real_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i), &
                          r%from(ipfrom)%m(i)) &
                     = r%real_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%real_buff(i) = from_here(r%to(ipto)%k(i), &
                                             r%to(ipto)%l(i), &
                                             r%to(ipto)%m(i), &
                                             r%to(ipto)%n(i), &
                                             r%to(ipto)%o(i))
               end do
               call send(r%real_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

         end if
      end do

   end subroutine r_redist_35_inv

   subroutine r_redist_12(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      real, dimension(r%from_low(1):), intent(in) :: from_here

      real, dimension(r%to_low(1):, r%to_low(2):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%from(iproc)%nn
         to_here(r%to(iproc)%k(i), r%to(iproc)%l(i)) &
            = from_here(r%from(iproc)%k(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%real_buff(i) = from_here(r%from(ipto)%k(i))
               end do
               call send(r%real_buff(1:r%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%real_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%real_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%real_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%real_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%real_buff(i) = from_here(r%from(ipto)%k(i))
               end do
               call send(r%real_buff(1:r%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine r_redist_12

   subroutine r_redist_22(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      real, dimension(r%from_low(1):, &
                      r%from_low(2):), intent(in) :: from_here

      real, dimension(r%to_low(1):, &
                      r%to_low(2):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%from(iproc)%nn
         to_here(r%to(iproc)%k(i), &
                 r%to(iproc)%l(i)) &
            = from_here(r%from(iproc)%k(i), &
                        r%from(iproc)%l(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%real_buff(i) = from_here(r%from(ipto)%k(i), &
                                             r%from(ipto)%l(i))
               end do
               call send(r%real_buff(1:r%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%real_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%real_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%real_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%real_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%real_buff(i) = from_here(r%from(ipto)%k(i), &
                                             r%from(ipto)%l(i))
               end do
               call send(r%real_buff(1:r%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine r_redist_22

   subroutine r_redist_22_inv(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      real, dimension(r%to_low(1):, &
                      r%to_low(2):), intent(in) :: from_here

      real, dimension(r%from_low(1):, &
                      r%from_low(2):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%to(iproc)%nn
         to_here(r%from(iproc)%k(i), &
                 r%from(iproc)%l(i)) &
            = from_here(r%to(iproc)%k(i), &
                        r%to(iproc)%l(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%real_buff(i) = from_here(r%to(ipto)%k(i), &
                                             r%to(ipto)%l(i))
               end do
               call send(r%real_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%real_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i)) &
                     = r%real_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%real_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i)) &
                     = r%real_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%real_buff(i) = from_here(r%to(ipto)%k(i), &
                                             r%to(ipto)%l(i))
               end do
               call send(r%real_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

         end if
      end do

   end subroutine r_redist_22_inv

   subroutine r_redist_32(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      real, dimension(r%from_low(1):, &
                      r%from_low(2):, &
                      r%from_low(3):), intent(in) :: from_here

      real, dimension(r%to_low(1):, &
                      r%to_low(2):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%from(iproc)%nn
         to_here(r%to(iproc)%k(i), &
                 r%to(iproc)%l(i)) &
            = from_here(r%from(iproc)%k(i), &
                        r%from(iproc)%l(i), &
                        r%from(iproc)%m(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%real_buff(i) = from_here(r%from(ipto)%k(i), &
                                             r%from(ipto)%l(i), &
                                             r%from(ipto)%m(i))
               end do
               call send(r%real_buff(1:r%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%real_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%real_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%real_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%real_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%real_buff(i) = from_here(r%from(ipto)%k(i), &
                                             r%from(ipto)%l(i), &
                                             r%from(ipto)%m(i))
               end do
               call send(r%real_buff(1:r%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine r_redist_32

   subroutine r_redist_32_inv(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      real, dimension(r%to_low(1):, &
                      r%to_low(2):), intent(in) :: from_here

      real, dimension(r%from_low(1):, &
                      r%from_low(2):, &
                      r%from_low(3):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%to(iproc)%nn
         to_here(r%from(iproc)%k(i), &
                 r%from(iproc)%l(i), &
                 r%from(iproc)%m(i)) &
            = from_here(r%to(iproc)%k(i), &
                        r%to(iproc)%l(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%real_buff(i) = from_here(r%to(ipto)%k(i), &
                                             r%to(ipto)%l(i))
               end do
               call send(r%real_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%real_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i), &
                          r%from(ipfrom)%m(i)) &
                     = r%real_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%real_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i), &
                          r%from(ipfrom)%m(i)) &
                     = r%real_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%real_buff(i) = from_here(r%to(ipto)%k(i), &
                                             r%to(ipto)%l(i))
               end do
               call send(r%real_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

         end if
      end do

   end subroutine r_redist_32_inv

   subroutine r_redist_42(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      real, dimension(r%from_low(1):, &
                      r%from_low(2):, &
                      r%from_low(3):, &
                      r%from_low(4):), intent(in) :: from_here

      real, dimension(r%to_low(1):, &
                      r%to_low(2):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%from(iproc)%nn
         to_here(r%to(iproc)%k(i), &
                 r%to(iproc)%l(i)) &
            = from_here(r%from(iproc)%k(i), &
                        r%from(iproc)%l(i), &
                        r%from(iproc)%m(i), &
                        r%from(iproc)%n(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%real_buff(i) = from_here(r%from(ipto)%k(i), &
                                             r%from(ipto)%l(i), &
                                             r%from(ipto)%m(i), &
                                             r%from(ipto)%n(i))
               end do
               call send(r%real_buff(1:r%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%real_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%real_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%real_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%real_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%real_buff(i) = from_here(r%from(ipto)%k(i), &
                                             r%from(ipto)%l(i), &
                                             r%from(ipto)%m(i), &
                                             r%from(ipto)%n(i))
               end do
               call send(r%real_buff(1:r%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine r_redist_42

   subroutine r_redist_42_inv(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      real, dimension(r%to_low(1):, &
                      r%to_low(2):), intent(in) :: from_here

      real, dimension(r%from_low(1):, &
                      r%from_low(2):, &
                      r%from_low(3):, &
                      r%from_low(4):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%to(iproc)%nn
         to_here(r%from(iproc)%k(i), &
                 r%from(iproc)%l(i), &
                 r%from(iproc)%m(i), &
                 r%from(iproc)%n(i)) &
            = from_here(r%to(iproc)%k(i), &
                        r%to(iproc)%l(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%real_buff(i) = from_here(r%to(ipto)%k(i), &
                                             r%to(ipto)%l(i))
               end do
               call send(r%real_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%real_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i), &
                          r%from(ipfrom)%m(i), &
                          r%from(ipfrom)%n(i)) &
                     = r%real_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%real_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i), &
                          r%from(ipfrom)%m(i), &
                          r%from(ipfrom)%n(i)) &
                     = r%real_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%real_buff(i) = from_here(r%to(ipto)%k(i), &
                                             r%to(ipto)%l(i))
               end do
               call send(r%real_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

         end if
      end do

   end subroutine r_redist_42_inv

   subroutine i_redist_12(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      integer, dimension(r%from_low(1):), intent(in) :: from_here

      integer, dimension(r%to_low(1):, r%to_low(2):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%from(iproc)%nn
         to_here(r%to(iproc)%k(i), r%to(iproc)%l(i)) &
            = from_here(r%from(iproc)%k(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%integer_buff(i) = from_here(r%from(ipto)%k(i))
               end do
               call send(r%integer_buff(1:r%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%integer_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%integer_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%integer_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%integer_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%integer_buff(i) = from_here(r%from(ipto)%k(i))
               end do
               call send(r%integer_buff(1:r%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine i_redist_12

   subroutine i_redist_22(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      integer, dimension(r%from_low(1):, &
                         r%from_low(2):), intent(in) :: from_here

      integer, dimension(r%to_low(1):, &
                         r%to_low(2):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%from(iproc)%nn
         to_here(r%to(iproc)%k(i), &
                 r%to(iproc)%l(i)) &
            = from_here(r%from(iproc)%k(i), &
                        r%from(iproc)%l(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%integer_buff(i) = from_here(r%from(ipto)%k(i), &
                                                r%from(ipto)%l(i))
               end do
               call send(r%integer_buff(1:r%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%integer_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%integer_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%integer_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%integer_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%integer_buff(i) = from_here(r%from(ipto)%k(i), &
                                                r%from(ipto)%l(i))
               end do
               call send(r%integer_buff(1:r%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine i_redist_22

   subroutine i_redist_22_inv(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      integer, dimension(r%to_low(1):, &
                         r%to_low(2):), intent(in) :: from_here

      integer, dimension(r%from_low(1):, &
                         r%from_low(2):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%to(iproc)%nn
         to_here(r%from(iproc)%k(i), &
                 r%from(iproc)%l(i)) &
            = from_here(r%to(iproc)%k(i), &
                        r%to(iproc)%l(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%integer_buff(i) = from_here(r%to(ipto)%k(i), &
                                                r%to(ipto)%l(i))
               end do
               call send(r%integer_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%integer_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i)) &
                     = r%integer_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%integer_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i)) &
                     = r%integer_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%integer_buff(i) = from_here(r%to(ipto)%k(i), &
                                                r%to(ipto)%l(i))
               end do
               call send(r%integer_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

         end if
      end do

   end subroutine i_redist_22_inv

   subroutine i_redist_32(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      integer, dimension(r%from_low(1):, &
                         r%from_low(2):, &
                         r%from_low(3):), intent(in) :: from_here

      integer, dimension(r%to_low(1):, &
                         r%to_low(2):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%from(iproc)%nn
         to_here(r%to(iproc)%k(i), &
                 r%to(iproc)%l(i)) &
            = from_here(r%from(iproc)%k(i), &
                        r%from(iproc)%l(i), &
                        r%from(iproc)%m(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%integer_buff(i) = from_here(r%from(ipto)%k(i), &
                                                r%from(ipto)%l(i), &
                                                r%from(ipto)%m(i))
               end do
               call send(r%integer_buff(1:r%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%integer_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%integer_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%integer_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%integer_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%integer_buff(i) = from_here(r%from(ipto)%k(i), &
                                                r%from(ipto)%l(i), &
                                                r%from(ipto)%m(i))
               end do
               call send(r%integer_buff(1:r%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine i_redist_32

   subroutine i_redist_32_inv(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      integer, dimension(r%to_low(1):, &
                         r%to_low(2):), intent(in) :: from_here

      integer, dimension(r%from_low(1):, &
                         r%from_low(2):, &
                         r%from_low(3):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%to(iproc)%nn
         to_here(r%from(iproc)%k(i), &
                 r%from(iproc)%l(i), &
                 r%from(iproc)%m(i)) &
            = from_here(r%to(iproc)%k(i), &
                        r%to(iproc)%l(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%integer_buff(i) = from_here(r%to(ipto)%k(i), &
                                                r%to(ipto)%l(i))
               end do
               call send(r%integer_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%integer_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i), &
                          r%from(ipfrom)%m(i)) &
                     = r%integer_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%integer_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i), &
                          r%from(ipfrom)%m(i)) &
                     = r%integer_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%integer_buff(i) = from_here(r%to(ipto)%k(i), &
                                                r%to(ipto)%l(i))
               end do
               call send(r%integer_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

         end if
      end do

   end subroutine i_redist_32_inv

   subroutine i_redist_42(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      integer, dimension(r%from_low(1):, &
                         r%from_low(2):, &
                         r%from_low(3):, &
                         r%from_low(4):), intent(in) :: from_here

      integer, dimension(r%to_low(1):, &
                         r%to_low(2):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%from(iproc)%nn
         to_here(r%to(iproc)%k(i), &
                 r%to(iproc)%l(i)) &
            = from_here(r%from(iproc)%k(i), &
                        r%from(iproc)%l(i), &
                        r%from(iproc)%m(i), &
                        r%from(iproc)%n(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%integer_buff(i) = from_here(r%from(ipto)%k(i), &
                                                r%from(ipto)%l(i), &
                                                r%from(ipto)%m(i), &
                                                r%from(ipto)%n(i))
               end do
               call send(r%integer_buff(1:r%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%integer_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%integer_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%integer_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%integer_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%integer_buff(i) = from_here(r%from(ipto)%k(i), &
                                                r%from(ipto)%l(i), &
                                                r%from(ipto)%m(i), &
                                                r%from(ipto)%n(i))
               end do
               call send(r%integer_buff(1:r%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine i_redist_42

   subroutine i_redist_42_inv(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      integer, dimension(r%to_low(1):, &
                         r%to_low(2):), intent(in) :: from_here

      integer, dimension(r%from_low(1):, &
                         r%from_low(2):, &
                         r%from_low(3):, &
                         r%from_low(4):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%to(iproc)%nn
         to_here(r%from(iproc)%k(i), &
                 r%from(iproc)%l(i), &
                 r%from(iproc)%m(i), &
                 r%from(iproc)%n(i)) &
            = from_here(r%to(iproc)%k(i), &
                        r%to(iproc)%l(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%integer_buff(i) = from_here(r%to(ipto)%k(i), &
                                                r%to(ipto)%l(i))
               end do
               call send(r%integer_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%integer_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i), &
                          r%from(ipfrom)%m(i), &
                          r%from(ipfrom)%n(i)) &
                     = r%integer_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%integer_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i), &
                          r%from(ipfrom)%m(i), &
                          r%from(ipfrom)%n(i)) &
                     = r%integer_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%integer_buff(i) = from_here(r%to(ipto)%k(i), &
                                                r%to(ipto)%l(i))
               end do
               call send(r%integer_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

         end if
      end do

   end subroutine i_redist_42_inv

   subroutine l_redist_12(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      logical, dimension(r%from_low(1):), intent(in) :: from_here

      logical, dimension(r%to_low(1):, r%to_low(2):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%from(iproc)%nn
         to_here(r%to(iproc)%k(i), r%to(iproc)%l(i)) &
            = from_here(r%from(iproc)%k(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%logical_buff(i) = from_here(r%from(ipto)%k(i))
               end do
               call send(r%logical_buff(1:r%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%logical_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%logical_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%logical_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%logical_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%logical_buff(i) = from_here(r%from(ipto)%k(i))
               end do
               call send(r%logical_buff(1:r%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine l_redist_12

   subroutine l_redist_22(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      logical, dimension(r%from_low(1):, &
                         r%from_low(2):), intent(in) :: from_here

      logical, dimension(r%to_low(1):, &
                         r%to_low(2):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%from(iproc)%nn
         to_here(r%to(iproc)%k(i), &
                 r%to(iproc)%l(i)) &
            = from_here(r%from(iproc)%k(i), &
                        r%from(iproc)%l(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%logical_buff(i) = from_here(r%from(ipto)%k(i), &
                                                r%from(ipto)%l(i))
               end do
               call send(r%logical_buff(1:r%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%logical_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%logical_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%logical_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%logical_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%logical_buff(i) = from_here(r%from(ipto)%k(i), &
                                                r%from(ipto)%l(i))
               end do
               call send(r%logical_buff(1:r%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine l_redist_22

   subroutine l_redist_22_inv(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      logical, dimension(r%to_low(1):, &
                         r%to_low(2):), intent(in) :: from_here

      logical, dimension(r%from_low(1):, &
                         r%from_low(2):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%to(iproc)%nn
         to_here(r%from(iproc)%k(i), &
                 r%from(iproc)%l(i)) &
            = from_here(r%to(iproc)%k(i), &
                        r%to(iproc)%l(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%logical_buff(i) = from_here(r%to(ipto)%k(i), &
                                                r%to(ipto)%l(i))
               end do
               call send(r%logical_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%logical_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i)) &
                     = r%logical_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%logical_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i)) &
                     = r%logical_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%logical_buff(i) = from_here(r%to(ipto)%k(i), &
                                                r%to(ipto)%l(i))
               end do
               call send(r%logical_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

         end if
      end do

   end subroutine l_redist_22_inv

   subroutine l_redist_32(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      logical, dimension(r%from_low(1):, &
                         r%from_low(2):, &
                         r%from_low(3):), intent(in) :: from_here

      logical, dimension(r%to_low(1):, &
                         r%to_low(2):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%from(iproc)%nn
         to_here(r%to(iproc)%k(i), &
                 r%to(iproc)%l(i)) &
            = from_here(r%from(iproc)%k(i), &
                        r%from(iproc)%l(i), &
                        r%from(iproc)%m(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%logical_buff(i) = from_here(r%from(ipto)%k(i), &
                                                r%from(ipto)%l(i), &
                                                r%from(ipto)%m(i))
               end do
               call send(r%logical_buff(1:r%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%logical_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%logical_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%logical_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%logical_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%logical_buff(i) = from_here(r%from(ipto)%k(i), &
                                                r%from(ipto)%l(i), &
                                                r%from(ipto)%m(i))
               end do
               call send(r%logical_buff(1:r%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine l_redist_32

   subroutine l_redist_32_inv(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      logical, dimension(r%to_low(1):, &
                         r%to_low(2):), intent(in) :: from_here

      logical, dimension(r%from_low(1):, &
                         r%from_low(2):, &
                         r%from_low(3):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%to(iproc)%nn
         to_here(r%from(iproc)%k(i), &
                 r%from(iproc)%l(i), &
                 r%from(iproc)%m(i)) &
            = from_here(r%to(iproc)%k(i), &
                        r%to(iproc)%l(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%logical_buff(i) = from_here(r%to(ipto)%k(i), &
                                                r%to(ipto)%l(i))
               end do
               call send(r%logical_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%logical_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i), &
                          r%from(ipfrom)%m(i)) &
                     = r%logical_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%logical_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i), &
                          r%from(ipfrom)%m(i)) &
                     = r%logical_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%logical_buff(i) = from_here(r%to(ipto)%k(i), &
                                                r%to(ipto)%l(i))
               end do
               call send(r%logical_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

         end if
      end do

   end subroutine l_redist_32_inv

   subroutine l_redist_42(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      logical, dimension(r%from_low(1):, &
                         r%from_low(2):, &
                         r%from_low(3):, &
                         r%from_low(4):), intent(in) :: from_here

      logical, dimension(r%to_low(1):, &
                         r%to_low(2):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%from(iproc)%nn
         to_here(r%to(iproc)%k(i), &
                 r%to(iproc)%l(i)) &
            = from_here(r%from(iproc)%k(i), &
                        r%from(iproc)%l(i), &
                        r%from(iproc)%m(i), &
                        r%from(iproc)%n(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%logical_buff(i) = from_here(r%from(ipto)%k(i), &
                                                r%from(ipto)%l(i), &
                                                r%from(ipto)%m(i), &
                                                r%from(ipto)%n(i))
               end do
               call send(r%logical_buff(1:r%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%logical_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%logical_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%logical_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i)) &
                     = r%logical_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%logical_buff(i) = from_here(r%from(ipto)%k(i), &
                                                r%from(ipto)%l(i), &
                                                r%from(ipto)%m(i), &
                                                r%from(ipto)%n(i))
               end do
               call send(r%logical_buff(1:r%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine l_redist_42

   subroutine l_redist_42_inv(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      logical, dimension(r%to_low(1):, &
                         r%to_low(2):), intent(in) :: from_here

      logical, dimension(r%from_low(1):, &
                         r%from_low(2):, &
                         r%from_low(3):, &
                         r%from_low(4):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%to(iproc)%nn
         to_here(r%from(iproc)%k(i), &
                 r%from(iproc)%l(i), &
                 r%from(iproc)%m(i), &
                 r%from(iproc)%n(i)) &
            = from_here(r%to(iproc)%k(i), &
                        r%to(iproc)%l(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%logical_buff(i) = from_here(r%to(ipto)%k(i), &
                                                r%to(ipto)%l(i))
               end do
               call send(r%logical_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%logical_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i), &
                          r%from(ipfrom)%m(i), &
                          r%from(ipfrom)%n(i)) &
                     = r%logical_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%logical_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i), &
                          r%from(ipfrom)%m(i), &
                          r%from(ipfrom)%n(i)) &
                     = r%logical_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%logical_buff(i) = from_here(r%to(ipto)%k(i), &
                                                r%to(ipto)%l(i))
               end do
               call send(r%logical_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

         end if
      end do

   end subroutine l_redist_42_inv

! TT>
   subroutine c_redist_33(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      complex, dimension(r%from_low(1):, &
                         r%from_low(2):, &
                         r%from_low(3):), intent(in) :: from_here

      complex, dimension(r%to_low(1):, &
                         r%to_low(2):, &
                         r%to_low(3):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%from(iproc)%nn
         to_here(r%to(iproc)%k(i), &
                 r%to(iproc)%l(i), &
                 r%to(iproc)%m(i)) &
            = from_here(r%from(iproc)%k(i), &
                        r%from(iproc)%l(i), &
                        r%from(iproc)%m(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%complex_buff(i) = from_here(r%from(ipto)%k(i), &
                                                r%from(ipto)%l(i), &
                                                r%from(ipto)%m(i))
               end do
               call send(r%complex_buff(1:r%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%complex_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i), &
                          r%to(ipfrom)%m(i)) &
                     = r%complex_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%to(ipfrom)%nn > 0) then
               call receive(r%complex_buff(1:r%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%to(ipfrom)%nn
                  to_here(r%to(ipfrom)%k(i), &
                          r%to(ipfrom)%l(i), &
                          r%to(ipfrom)%m(i)) &
                     = r%complex_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%from(ipto)%nn > 0) then
               do i = 1, r%from(ipto)%nn
                  r%complex_buff(i) = from_here(r%from(ipto)%k(i), &
                                                r%from(ipto)%l(i), &
                                                r%from(ipto)%m(i))
               end do
               call send(r%complex_buff(1:r%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine c_redist_33

   subroutine c_redist_33_inv(r, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: r

      complex, dimension(r%to_low(1):, &
                         r%to_low(2):, &
                         r%to_low(3):), intent(in) :: from_here

      complex, dimension(r%from_low(1):, &
                         r%from_low(2):, &
                         r%from_low(3):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, r%to(iproc)%nn
         to_here(r%from(iproc)%k(i), &
                 r%from(iproc)%l(i), &
                 r%from(iproc)%m(i)) &
            = from_here(r%to(iproc)%k(i), &
                        r%to(iproc)%l(i), &
                        r%to(iproc)%m(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%complex_buff(i) = from_here(r%to(ipto)%k(i), &
                                                r%to(ipto)%l(i), &
                                                r%to(ipto)%m(i))
               end do
               call send(r%complex_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%complex_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i), &
                          r%from(ipfrom)%m(i)) &
                     = r%complex_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (r%from(ipfrom)%nn > 0) then
               call receive(r%complex_buff(1:r%from(ipfrom)%nn), ipfrom, idp)
               do i = 1, r%from(ipfrom)%nn
                  to_here(r%from(ipfrom)%k(i), &
                          r%from(ipfrom)%l(i), &
                          r%from(ipfrom)%m(i)) &
                     = r%complex_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (r%to(ipto)%nn > 0) then
               do i = 1, r%to(ipto)%nn
                  r%complex_buff(i) = from_here(r%to(ipto)%k(i), &
                                                r%to(ipto)%l(i), &
                                                r%to(ipto)%m(i))
               end do
               call send(r%complex_buff(1:r%to(ipto)%nn), ipto, idp)
            end if

         end if
      end do

   end subroutine c_redist_33_inv
! <TT

   subroutine c_fill_2(f, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: f

      complex, dimension(f%from_low(1):, &
                         f%from_low(2):), intent(in) :: from_here

      complex, dimension(f%to_low(1):, &
                         f%to_low(2):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, f%from(iproc)%nn
         to_here(f%to(iproc)%k(i), &
                 f%to(iproc)%l(i)) &
            = from_here(f%from(iproc)%k(i), &
                        f%from(iproc)%l(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (f%from(ipto)%nn > 0) then
               do i = 1, f%from(ipto)%nn
                  f%complex_buff(i) = from_here(f%from(ipto)%k(i), &
                                                f%from(ipto)%l(i))
               end do
               call send(f%complex_buff(1:f%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (f%to(ipfrom)%nn > 0) then
               call receive(f%complex_buff(1:f%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, f%to(ipfrom)%nn
                  to_here(f%to(ipfrom)%k(i), &
                          f%to(ipfrom)%l(i)) &
                     = f%complex_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (f%to(ipfrom)%nn > 0) then
               call receive(f%complex_buff(1:f%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, f%to(ipfrom)%nn
                  to_here(f%to(ipfrom)%k(i), &
                          f%to(ipfrom)%l(i)) &
                     = f%complex_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (f%from(ipto)%nn > 0) then
               do i = 1, f%from(ipto)%nn
                  f%complex_buff(i) = from_here(f%from(ipto)%k(i), &
                                                f%from(ipto)%l(i))
               end do
               call send(f%complex_buff(1:f%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine c_fill_2

   subroutine c_fill_3(f, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: f

      complex, dimension(f%from_low(1):, &
                         f%from_low(2):, &
                         f%from_low(3):), intent(in) :: from_here

      complex, dimension(f%to_low(1):, &
                         f%to_low(2):, &
                         f%to_low(3):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, f%from(iproc)%nn
         to_here(f%to(iproc)%k(i), &
                 f%to(iproc)%l(i), &
                 f%to(iproc)%m(i)) &
            = from_here(f%from(iproc)%k(i), &
                        f%from(iproc)%l(i), &
                        f%from(iproc)%m(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (f%from(ipto)%nn > 0) then
               do i = 1, f%from(ipto)%nn
                  f%complex_buff(i) = from_here(f%from(ipto)%k(i), &
                                                f%from(ipto)%l(i), &
                                                f%from(ipto)%m(i))
               end do
               call send(f%complex_buff(1:f%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (f%to(ipfrom)%nn > 0) then
               call receive(f%complex_buff(1:f%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, f%to(ipfrom)%nn
                  to_here(f%to(ipfrom)%k(i), &
                          f%to(ipfrom)%l(i), &
                          f%to(ipfrom)%m(i)) &
                     = f%complex_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (f%to(ipfrom)%nn > 0) then
               call receive(f%complex_buff(1:f%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, f%to(ipfrom)%nn
                  to_here(f%to(ipfrom)%k(i), &
                          f%to(ipfrom)%l(i), &
                          f%to(ipfrom)%m(i)) &
                     = f%complex_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (f%from(ipto)%nn > 0) then
               do i = 1, f%from(ipto)%nn
                  f%complex_buff(i) = from_here(f%from(ipto)%k(i), &
                                                f%from(ipto)%l(i), &
                                                f%from(ipto)%m(i))
               end do
               call send(f%complex_buff(1:f%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine c_fill_3

   subroutine c_fill_4(f, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: f

      complex, dimension(f%from_low(1):, &
                         f%from_low(2):, &
                         f%from_low(3):, &
                         f%from_low(4):), intent(in) :: from_here

      complex, dimension(f%to_low(1):, &
                         f%to_low(2):, &
                         f%to_low(3):, &
                         f%to_low(4):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, f%from(iproc)%nn
         to_here(f%to(iproc)%k(i), &
                 f%to(iproc)%l(i), &
                 f%to(iproc)%m(i), &
                 f%to(iproc)%n(i)) &
            = from_here(f%from(iproc)%k(i), &
                        f%from(iproc)%l(i), &
                        f%from(iproc)%m(i), &
                        f%from(iproc)%n(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (f%from(ipto)%nn > 0) then
               do i = 1, f%from(ipto)%nn
                  f%complex_buff(i) = from_here(f%from(ipto)%k(i), &
                                                f%from(ipto)%l(i), &
                                                f%from(ipto)%m(i), &
                                                f%from(ipto)%n(i))
               end do
               call send(f%complex_buff(1:f%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (f%to(ipfrom)%nn > 0) then
               call receive(f%complex_buff(1:f%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, f%to(ipfrom)%nn
                  to_here(f%to(ipfrom)%k(i), &
                          f%to(ipfrom)%l(i), &
                          f%to(ipfrom)%m(i), &
                          f%to(ipfrom)%n(i)) &
                     = f%complex_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (f%to(ipfrom)%nn > 0) then
               call receive(f%complex_buff(1:f%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, f%to(ipfrom)%nn
                  to_here(f%to(ipfrom)%k(i), &
                          f%to(ipfrom)%l(i), &
                          f%to(ipfrom)%m(i), &
                          f%to(ipfrom)%n(i)) &
                     = f%complex_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (f%from(ipto)%nn > 0) then
               do i = 1, f%from(ipto)%nn
                  f%complex_buff(i) = from_here(f%from(ipto)%k(i), &
                                                f%from(ipto)%l(i), &
                                                f%from(ipto)%m(i), &
                                                f%from(ipto)%n(i))
               end do
               call send(f%complex_buff(1:f%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine c_fill_4

   subroutine r_fill_2(f, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: f

      real, dimension(f%from_low(1):, &
                      f%from_low(2):), intent(in) :: from_here

      real, dimension(f%to_low(1):, &
                      f%to_low(2):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, f%from(iproc)%nn
         to_here(f%to(iproc)%k(i), &
                 f%to(iproc)%l(i)) &
            = from_here(f%from(iproc)%k(i), &
                        f%from(iproc)%l(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (f%from(ipto)%nn > 0) then
               do i = 1, f%from(ipto)%nn
                  f%real_buff(i) = from_here(f%from(ipto)%k(i), &
                                             f%from(ipto)%l(i))
               end do
               call send(f%real_buff(1:f%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (f%to(ipfrom)%nn > 0) then
               call receive(f%real_buff(1:f%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, f%to(ipfrom)%nn
                  to_here(f%to(ipfrom)%k(i), &
                          f%to(ipfrom)%l(i)) &
                     = f%real_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (f%to(ipfrom)%nn > 0) then
               call receive(f%real_buff(1:f%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, f%to(ipfrom)%nn
                  to_here(f%to(ipfrom)%k(i), &
                          f%to(ipfrom)%l(i)) &
                     = f%real_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (f%from(ipto)%nn > 0) then
               do i = 1, f%from(ipto)%nn
                  f%real_buff(i) = from_here(f%from(ipto)%k(i), &
                                             f%from(ipto)%l(i))
               end do
               call send(f%real_buff(1:f%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine r_fill_2

   subroutine r_fill_3(f, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: f

      real, dimension(f%from_low(1):, &
                      f%from_low(2):, &
                      f%from_low(3):), intent(in) :: from_here

      real, dimension(f%to_low(1):, &
                      f%to_low(2):, &
                      f%to_low(3):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, f%from(iproc)%nn
         to_here(f%to(iproc)%k(i), &
                 f%to(iproc)%l(i), &
                 f%to(iproc)%m(i)) &
            = from_here(f%from(iproc)%k(i), &
                        f%from(iproc)%l(i), &
                        f%from(iproc)%m(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (f%from(ipto)%nn > 0) then
               do i = 1, f%from(ipto)%nn
                  f%real_buff(i) = from_here(f%from(ipto)%k(i), &
                                             f%from(ipto)%l(i), &
                                             f%from(ipto)%m(i))
               end do
               call send(f%real_buff(1:f%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (f%to(ipfrom)%nn > 0) then
               call receive(f%real_buff(1:f%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, f%to(ipfrom)%nn
                  to_here(f%to(ipfrom)%k(i), &
                          f%to(ipfrom)%l(i), &
                          f%to(ipfrom)%m(i)) &
                     = f%real_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (f%to(ipfrom)%nn > 0) then
               call receive(f%real_buff(1:f%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, f%to(ipfrom)%nn
                  to_here(f%to(ipfrom)%k(i), &
                          f%to(ipfrom)%l(i), &
                          f%to(ipfrom)%m(i)) &
                     = f%real_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (f%from(ipto)%nn > 0) then
               do i = 1, f%from(ipto)%nn
                  f%real_buff(i) = from_here(f%from(ipto)%k(i), &
                                             f%from(ipto)%l(i), &
                                             f%from(ipto)%m(i))
               end do
               call send(f%real_buff(1:f%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine r_fill_3

   subroutine r_fill_4(f, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: f

      real, dimension(f%from_low(1):, &
                      f%from_low(2):, &
                      f%from_low(3):, &
                      f%from_low(4):), intent(in) :: from_here

      real, dimension(f%to_low(1):, &
                      f%to_low(2):, &
                      f%to_low(3):, &
                      f%to_low(4):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, f%from(iproc)%nn
         to_here(f%to(iproc)%k(i), &
                 f%to(iproc)%l(i), &
                 f%to(iproc)%m(i), &
                 f%to(iproc)%n(i)) &
            = from_here(f%from(iproc)%k(i), &
                        f%from(iproc)%l(i), &
                        f%from(iproc)%m(i), &
                        f%from(iproc)%n(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (f%from(ipto)%nn > 0) then
               do i = 1, f%from(ipto)%nn
                  f%real_buff(i) = from_here(f%from(ipto)%k(i), &
                                             f%from(ipto)%l(i), &
                                             f%from(ipto)%m(i), &
                                             f%from(ipto)%n(i))
               end do
               call send(f%real_buff(1:f%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (f%to(ipfrom)%nn > 0) then
               call receive(f%real_buff(1:f%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, f%to(ipfrom)%nn
                  to_here(f%to(ipfrom)%k(i), &
                          f%to(ipfrom)%l(i), &
                          f%to(ipfrom)%m(i), &
                          f%to(ipfrom)%n(i)) &
                     = f%real_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (f%to(ipfrom)%nn > 0) then
               call receive(f%real_buff(1:f%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, f%to(ipfrom)%nn
                  to_here(f%to(ipfrom)%k(i), &
                          f%to(ipfrom)%l(i), &
                          f%to(ipfrom)%m(i), &
                          f%to(ipfrom)%n(i)) &
                     = f%real_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (f%from(ipto)%nn > 0) then
               do i = 1, f%from(ipto)%nn
                  f%real_buff(i) = from_here(f%from(ipto)%k(i), &
                                             f%from(ipto)%l(i), &
                                             f%from(ipto)%m(i), &
                                             f%from(ipto)%n(i))
               end do
               call send(f%real_buff(1:f%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine r_fill_4

   subroutine i_fill_2(f, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: f

      integer, dimension(f%from_low(1):, &
                         f%from_low(2):), intent(in) :: from_here

      integer, dimension(f%to_low(1):, &
                         f%to_low(2):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, f%from(iproc)%nn
         to_here(f%to(iproc)%k(i), &
                 f%to(iproc)%l(i)) &
            = from_here(f%from(iproc)%k(i), &
                        f%from(iproc)%l(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (f%from(ipto)%nn > 0) then
               do i = 1, f%from(ipto)%nn
                  f%integer_buff(i) = from_here(f%from(ipto)%k(i), &
                                                f%from(ipto)%l(i))
               end do
               call send(f%integer_buff(1:f%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (f%to(ipfrom)%nn > 0) then
               call receive(f%integer_buff(1:f%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, f%to(ipfrom)%nn
                  to_here(f%to(ipfrom)%k(i), &
                          f%to(ipfrom)%l(i)) &
                     = f%integer_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (f%to(ipfrom)%nn > 0) then
               call receive(f%integer_buff(1:f%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, f%to(ipfrom)%nn
                  to_here(f%to(ipfrom)%k(i), &
                          f%to(ipfrom)%l(i)) &
                     = f%integer_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (f%from(ipto)%nn > 0) then
               do i = 1, f%from(ipto)%nn
                  f%integer_buff(i) = from_here(f%from(ipto)%k(i), &
                                                f%from(ipto)%l(i))
               end do
               call send(f%integer_buff(1:f%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine i_fill_2

   subroutine i_fill_3(f, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: f

      integer, dimension(f%from_low(1):, &
                         f%from_low(2):, &
                         f%from_low(3):), intent(in) :: from_here

      integer, dimension(f%to_low(1):, &
                         f%to_low(2):, &
                         f%to_low(3):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, f%from(iproc)%nn
         to_here(f%to(iproc)%k(i), &
                 f%to(iproc)%l(i), &
                 f%to(iproc)%m(i)) &
            = from_here(f%from(iproc)%k(i), &
                        f%from(iproc)%l(i), &
                        f%from(iproc)%m(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (f%from(ipto)%nn > 0) then
               do i = 1, f%from(ipto)%nn
                  f%integer_buff(i) = from_here(f%from(ipto)%k(i), &
                                                f%from(ipto)%l(i), &
                                                f%from(ipto)%m(i))
               end do
               call send(f%integer_buff(1:f%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (f%to(ipfrom)%nn > 0) then
               call receive(f%integer_buff(1:f%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, f%to(ipfrom)%nn
                  to_here(f%to(ipfrom)%k(i), &
                          f%to(ipfrom)%l(i), &
                          f%to(ipfrom)%m(i)) &
                     = f%integer_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (f%to(ipfrom)%nn > 0) then
               call receive(f%integer_buff(1:f%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, f%to(ipfrom)%nn
                  to_here(f%to(ipfrom)%k(i), &
                          f%to(ipfrom)%l(i), &
                          f%to(ipfrom)%m(i)) &
                     = f%integer_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (f%from(ipto)%nn > 0) then
               do i = 1, f%from(ipto)%nn
                  f%integer_buff(i) = from_here(f%from(ipto)%k(i), &
                                                f%from(ipto)%l(i), &
                                                f%from(ipto)%m(i))
               end do
               call send(f%integer_buff(1:f%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine i_fill_3

   subroutine i_fill_4(f, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: f

      integer, dimension(f%from_low(1):, &
                         f%from_low(2):, &
                         f%from_low(3):, &
                         f%from_low(4):), intent(in) :: from_here

      integer, dimension(f%to_low(1):, &
                         f%to_low(2):, &
                         f%to_low(3):, &
                         f%to_low(4):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, f%from(iproc)%nn
         to_here(f%to(iproc)%k(i), &
                 f%to(iproc)%l(i), &
                 f%to(iproc)%m(i), &
                 f%to(iproc)%n(i)) &
            = from_here(f%from(iproc)%k(i), &
                        f%from(iproc)%l(i), &
                        f%from(iproc)%m(i), &
                        f%from(iproc)%n(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (f%from(ipto)%nn > 0) then
               do i = 1, f%from(ipto)%nn
                  f%integer_buff(i) = from_here(f%from(ipto)%k(i), &
                                                f%from(ipto)%l(i), &
                                                f%from(ipto)%m(i), &
                                                f%from(ipto)%n(i))
               end do
               call send(f%integer_buff(1:f%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (f%to(ipfrom)%nn > 0) then
               call receive(f%integer_buff(1:f%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, f%to(ipfrom)%nn
                  to_here(f%to(ipfrom)%k(i), &
                          f%to(ipfrom)%l(i), &
                          f%to(ipfrom)%m(i), &
                          f%to(ipfrom)%n(i)) &
                     = f%integer_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (f%to(ipfrom)%nn > 0) then
               call receive(f%integer_buff(1:f%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, f%to(ipfrom)%nn
                  to_here(f%to(ipfrom)%k(i), &
                          f%to(ipfrom)%l(i), &
                          f%to(ipfrom)%m(i), &
                          f%to(ipfrom)%n(i)) &
                     = f%integer_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (f%from(ipto)%nn > 0) then
               do i = 1, f%from(ipto)%nn
                  f%integer_buff(i) = from_here(f%from(ipto)%k(i), &
                                                f%from(ipto)%l(i), &
                                                f%from(ipto)%m(i), &
                                                f%from(ipto)%n(i))
               end do
               call send(f%integer_buff(1:f%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine i_fill_4

   subroutine l_fill_2(f, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: f

      logical, dimension(f%from_low(1):, &
                         f%from_low(2):), intent(in) :: from_here

      logical, dimension(f%to_low(1):, &
                         f%to_low(2):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, f%from(iproc)%nn
         to_here(f%to(iproc)%k(i), &
                 f%to(iproc)%l(i)) &
            = from_here(f%from(iproc)%k(i), &
                        f%from(iproc)%l(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (f%from(ipto)%nn > 0) then
               do i = 1, f%from(ipto)%nn
                  f%logical_buff(i) = from_here(f%from(ipto)%k(i), &
                                                f%from(ipto)%l(i))
               end do
               call send(f%logical_buff(1:f%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (f%to(ipfrom)%nn > 0) then
               call receive(f%logical_buff(1:f%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, f%to(ipfrom)%nn
                  to_here(f%to(ipfrom)%k(i), &
                          f%to(ipfrom)%l(i)) &
                     = f%logical_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (f%to(ipfrom)%nn > 0) then
               call receive(f%logical_buff(1:f%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, f%to(ipfrom)%nn
                  to_here(f%to(ipfrom)%k(i), &
                          f%to(ipfrom)%l(i)) &
                     = f%logical_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (f%from(ipto)%nn > 0) then
               do i = 1, f%from(ipto)%nn
                  f%logical_buff(i) = from_here(f%from(ipto)%k(i), &
                                                f%from(ipto)%l(i))
               end do
               call send(f%logical_buff(1:f%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine l_fill_2

   subroutine l_fill_3(f, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: f

      logical, dimension(f%from_low(1):, &
                         f%from_low(2):, &
                         f%from_low(3):), intent(in) :: from_here

      logical, dimension(f%to_low(1):, &
                         f%to_low(2):, &
                         f%to_low(3):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, f%from(iproc)%nn
         to_here(f%to(iproc)%k(i), &
                 f%to(iproc)%l(i), &
                 f%to(iproc)%m(i)) &
            = from_here(f%from(iproc)%k(i), &
                        f%from(iproc)%l(i), &
                        f%from(iproc)%m(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (f%from(ipto)%nn > 0) then
               do i = 1, f%from(ipto)%nn
                  f%logical_buff(i) = from_here(f%from(ipto)%k(i), &
                                                f%from(ipto)%l(i), &
                                                f%from(ipto)%m(i))
               end do
               call send(f%logical_buff(1:f%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (f%to(ipfrom)%nn > 0) then
               call receive(f%logical_buff(1:f%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, f%to(ipfrom)%nn
                  to_here(f%to(ipfrom)%k(i), &
                          f%to(ipfrom)%l(i), &
                          f%to(ipfrom)%m(i)) &
                     = f%logical_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (f%to(ipfrom)%nn > 0) then
               call receive(f%logical_buff(1:f%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, f%to(ipfrom)%nn
                  to_here(f%to(ipfrom)%k(i), &
                          f%to(ipfrom)%l(i), &
                          f%to(ipfrom)%m(i)) &
                     = f%logical_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (f%from(ipto)%nn > 0) then
               do i = 1, f%from(ipto)%nn
                  f%logical_buff(i) = from_here(f%from(ipto)%k(i), &
                                                f%from(ipto)%l(i), &
                                                f%from(ipto)%m(i))
               end do
               call send(f%logical_buff(1:f%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine l_fill_3

   subroutine l_fill_4(f, from_here, to_here)

      use mp, only: iproc, nproc, send, receive
      type(redist_type), intent(in out) :: f

      logical, dimension(f%from_low(1):, &
                         f%from_low(2):, &
                         f%from_low(3):, &
                         f%from_low(4):), intent(in) :: from_here

      logical, dimension(f%to_low(1):, &
                         f%to_low(2):, &
                         f%to_low(3):, &
                         f%to_low(4):), intent(in out) :: to_here

      integer :: i, idp, ipto, ipfrom, iadp

      ! redistribute from local processor to local processor
      do i = 1, f%from(iproc)%nn
         to_here(f%to(iproc)%k(i), &
                 f%to(iproc)%l(i), &
                 f%to(iproc)%m(i), &
                 f%to(iproc)%n(i)) &
            = from_here(f%from(iproc)%k(i), &
                        f%from(iproc)%l(i), &
                        f%from(iproc)%m(i), &
                        f%from(iproc)%n(i))
      end do

      ! redistribute to idpth next processor from idpth preceding processor
      ! or redistribute from idpth preceding processor to idpth next processor
      ! to avoid deadlocks
      do idp = 1, nproc - 1
         ipto = mod(iproc + idp, nproc)
         ipfrom = mod(iproc + nproc - idp, nproc)
         iadp = min(idp, nproc - idp)
         ! avoid deadlock AND ensure mostly parallel resolution
         if (mod(iproc / iadp, 2) == 0) then

            ! send to idpth next processor
            if (f%from(ipto)%nn > 0) then
               do i = 1, f%from(ipto)%nn
                  f%logical_buff(i) = from_here(f%from(ipto)%k(i), &
                                                f%from(ipto)%l(i), &
                                                f%from(ipto)%m(i), &
                                                f%from(ipto)%n(i))
               end do
               call send(f%logical_buff(1:f%from(ipto)%nn), ipto, idp)
            end if

            ! receive from idpth preceding processor
            if (f%to(ipfrom)%nn > 0) then
               call receive(f%logical_buff(1:f%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, f%to(ipfrom)%nn
                  to_here(f%to(ipfrom)%k(i), &
                          f%to(ipfrom)%l(i), &
                          f%to(ipfrom)%m(i), &
                          f%to(ipfrom)%n(i)) &
                     = f%logical_buff(i)
               end do
            end if
         else
            ! receive from idpth preceding processor
            if (f%to(ipfrom)%nn > 0) then
               call receive(f%logical_buff(1:f%to(ipfrom)%nn), ipfrom, idp)
               do i = 1, f%to(ipfrom)%nn
                  to_here(f%to(ipfrom)%k(i), &
                          f%to(ipfrom)%l(i), &
                          f%to(ipfrom)%m(i), &
                          f%to(ipfrom)%n(i)) &
                     = f%logical_buff(i)
               end do
            end if

            ! send to idpth next processor
            if (f%from(ipto)%nn > 0) then
               do i = 1, f%from(ipto)%nn
                  f%logical_buff(i) = from_here(f%from(ipto)%k(i), &
                                                f%from(ipto)%l(i), &
                                                f%from(ipto)%m(i), &
                                                f%from(ipto)%n(i))
               end do
               call send(f%logical_buff(1:f%from(ipto)%nn), ipto, idp)
            end if
         end if
      end do

   end subroutine l_fill_4

   subroutine report_map_property(r)

      use mp, only: iproc, nproc, proc0, sum_reduce, max_reduce
      type(redist_type), intent(in) :: r
      type :: redist_prp
         integer :: local_max, local_total
         integer :: comm_max, comm_total
         integer :: elm_max, elm_total
      end type redist_prp
      type(redist_prp) :: prp
      integer :: ip, rank_from, rank_to
      integer, dimension(:), allocatable :: lbd_from, lbd_to

      prp%comm_max = 0
      prp%comm_total = 0
      prp%elm_total = 0

      do ip = 0, nproc - 1
         if (ip == iproc) then
            prp%local_total = r%to(ip)%nn
         else
            if (r%to(ip)%nn > 0) then
               prp%comm_total = prp%comm_total + 1
               prp%elm_total = prp%elm_total + r%to(ip)%nn
            end if
         end if
      end do
      prp%local_max = prp%local_total
      prp%comm_max = prp%comm_total
      prp%elm_max = prp%elm_total

      call max_reduce(prp%local_max, 0)
      call sum_reduce(prp%local_total, 0)
      call max_reduce(prp%comm_max, 0)
      call sum_reduce(prp%comm_total, 0)
      call max_reduce(prp%elm_max, 0)
      call sum_reduce(prp%elm_total, 0)

      if (proc0) then
         rank_from = 1
         if (associated(r%from(0)%l)) rank_from = 2
         if (associated(r%from(0)%m)) rank_from = 3
         if (associated(r%from(0)%n)) rank_from = 4
         rank_to = 1
         if (associated(r%to(0)%l)) rank_to = 2
         if (associated(r%to(0)%m)) rank_to = 3
         if (associated(r%to(0)%n)) rank_to = 4
         allocate (lbd_from(rank_from), lbd_to(rank_to))
         lbd_from = r%from_low(1:rank_from)
         lbd_to = r%to_low(1:rank_to)
         print '(a,i2,a,i2)', 'From rank', rank_from, ' to rank', rank_to
         print '(a,t20,4i10)', 'From lbound (proc0)', r%from_low(1:rank_from)
         print '(a,t20,4i10)', 'To lbound (proc0)', r%to_low(1:rank_to)
         print '(a,t49,a,t64,a)', '--- Redistribution statistics ---', &
            'max', 'avg'
         print '(a,t40,i12,t55,f15.2)', 'Number of local move elements', &
            prp%local_max, real(prp%local_total) / real(nproc)
         print '(a,t40,i12,t60,f10.2)', &
            'Number of inter-processor communications', &
            prp%comm_max, real(prp%comm_total) / real(nproc)
         print '(a,t40,i12,t55,f15.2)', &
            'Number of inter-processor move elements', &
            prp%elm_max, real(prp%elm_total) / real(nproc)
         print *
      end if

   end subroutine report_map_property

   subroutine measure_gather_32(map, gin, gout)

      use job_manage, only: time_message
      use mp, only: proc0

      type(redist_type), intent(in out) :: map
      complex, dimension(:, :, :), intent(in) :: gin
      complex, dimension(:, :), intent(out) :: gout

      if (proc0) call time_message(.false., time_redist, ' Redistribution')
      call c_redist_32(map, gin, gout)
      if (proc0) call time_message(.false., time_redist, ' Redistribution')
      gather_count = gather_count + 1

   end subroutine measure_gather_32

   subroutine measure_scatter_23(map, gin, gout)

      use job_manage, only: time_message
      use mp, only: proc0

      type(redist_type), intent(in out) :: map
      complex, dimension(:, :), intent(in) :: gin
      complex, dimension(:, :, :), intent(out) :: gout

      if (proc0) call time_message(.false., time_redist, ' Redistribution')
      call c_redist_32_inv(map, gin, gout)
      if (proc0) call time_message(.false., time_redist, ' Redistribution')
      scatter_count = scatter_count + 1

   end subroutine measure_scatter_23

   subroutine measure_gather_33(map, gin, gout)

      use job_manage, only: time_message
      use mp, only: proc0

      type(redist_type), intent(in out) :: map
      complex, dimension(:, :, :), intent(in) :: gin
      complex, dimension(:, :, :), intent(out) :: gout

      if (proc0) call time_message(.false., time_redist, ' Redistribution')
      call c_redist_33(map, gin, gout)
      if (proc0) call time_message(.false., time_redist, ' Redistribution')
      gather_count = gather_count + 1

   end subroutine measure_gather_33

   subroutine measure_scatter_33(map, gin, gout)

      use job_manage, only: time_message
      use mp, only: proc0

      type(redist_type), intent(in out) :: map
      complex, dimension(:, :, :), intent(in) :: gin
      complex, dimension(:, :, :), intent(out) :: gout

      if (proc0) call time_message(.false., time_redist, ' Redistribution')
      call c_redist_33_inv(map, gin, gout)
      if (proc0) call time_message(.false., time_redist, ' Redistribution')
      scatter_count = scatter_count + 1

   end subroutine measure_scatter_33

   subroutine measure_gather_22(map, gin, gout)

      use job_manage, only: time_message
      use mp, only: proc0

      type(redist_type), intent(in out) :: map
      complex, dimension(:, :), intent(in) :: gin
      complex, dimension(:, :), intent(out) :: gout

      if (proc0) call time_message(.false., time_redist, ' Redistribution')
      call c_redist_22(map, gin, gout)
      if (proc0) call time_message(.false., time_redist, ' Redistribution')
      gather_count = gather_count + 1

   end subroutine measure_gather_22

   subroutine measure_scatter_22(map, gin, gout)

      use job_manage, only: time_message
      use mp, only: proc0

      type(redist_type), intent(in out) :: map
      complex, dimension(:, :), intent(in) :: gin
      complex, dimension(:, :), intent(out) :: gout

      if (proc0) call time_message(.false., time_redist, ' Redistribution')
      call c_redist_22_inv(map, gin, gout)
      if (proc0) call time_message(.false., time_redist, ' Redistribution')
      scatter_count = scatter_count + 1

   end subroutine measure_scatter_22

end module redistribute