blender-archive/extern/mantaflow/helper/util/rcmatrix.h

//
//  Helper matrix class, RCMatrix.h
//  Required for PD optimizations (guiding)
//  Thanks to Ryoichi Ando, and Robert Bridson
//

#ifndef RCMATRIX3_H
#define RCMATRIX3_H

#include <iterator>
#include <cassert>
#include <vector>
#include <fstream>

// index type
#define int_index long long

// link to omp & tbb for now
#if OPENMP == 1 || TBB == 1
#  define MANTA_ENABLE_PARALLEL 1
// allow the preconditioner to be computed in parallel? (can lead to slightly non-deterministic
// results)
#  define MANTA_ENABLE_PARALLEL_PC 0
#else
#  define MANTA_ENABLE_PARALLEL 0
#  define MANTA_ENABLE_PARALLEL_PC 0
#endif

#if MANTA_ENABLE_PARALLEL == 1
#  include <thread>
#  include <algorithm>

static const int manta_num_threads = std::thread::hardware_concurrency();

// For clang
#  define parallel_for(total_size) \
    { \
      int_index parallel_array_size = (total_size); \
      std::vector<std::thread> threads(manta_num_threads); \
      for (int thread_number = 0; thread_number < manta_num_threads; thread_number++) { \
  threads[thread_number] = std::thread([&](int_index parallel_array_size, int_index thread_number ) { \
		for( int_index parallel_index=thread_number; parallel_index < parallel_array_size; parallel_index += manta_num_threads ) {

#  define parallel_end \
    } \
    },parallel_array_size,thread_number); \
    } \
    for (auto &thread : threads) \
      thread.join(); \
    }

#  define parallel_block \
    { \
      std::vector<std::thread> threads; \
      {

#  define do_parallel threads.push_back( std::thread([&]() {
#  define do_end \
    } ) );

#  define block_end \
    } \
    for (auto &thread : threads) { \
      thread.join(); \
    } \
    }

#else

#  define parallel_for(size) \
    { \
      int thread_number = 0; \
      int_index parallel_index = 0; \
      for (int_index parallel_index = 0; parallel_index < (int_index)size; parallel_index++) {
#  define parallel_end \
    } \
    thread_number = parallel_index = 0; \
    }

#  define parallel_block
#  define do_parallel
#  define do_end
#  define block_end

#endif

#include "vectorbase.h"

namespace Manta {

static const unsigned default_expected_none_zeros = 7;

template<class N, class T> struct RCMatrix {
  struct RowEntry {
    std::vector<N> index;
    std::vector<T> value;
  };
  RCMatrix() : n(0), expected_none_zeros(default_expected_none_zeros)
  {
  }
  RCMatrix(N size, N expected_none_zeros = default_expected_none_zeros)
      : n(0), expected_none_zeros(expected_none_zeros)
  {
    resize(size);
  }
  RCMatrix(const RCMatrix &m) : n(0), expected_none_zeros(default_expected_none_zeros)
  {
    init(m);
  }
  RCMatrix &operator=(const RCMatrix &m)
  {
    expected_none_zeros = m.expected_none_zeros;
    init(m);
    return *this;
  }
  RCMatrix &operator=(RCMatrix &&m)
  {
    matrix = m.matrix;
    offsets = m.offsets;
    expected_none_zeros = m.expected_none_zeros;
    n = m.n;
    m.n = 0;
    m.matrix.clear();
    m.offsets.clear();
    return *this;
  }
  RCMatrix(RCMatrix &&m)
      : n(m.n), expected_none_zeros(m.expected_none_zeros), matrix(m.matrix), offsets(m.offsets)
  {
    m.n = 0;
    m.matrix.clear();
    m.offsets.clear();
  }
  void init(const RCMatrix &m)
  {
    expected_none_zeros = m.expected_none_zeros;
    resize(m.n);
    parallel_for(n)
    {
      N i = parallel_index;
      if (m.matrix[i]) {
        alloc_row(i);
        matrix[i]->index = m.matrix[i]->index;
        matrix[i]->value = m.matrix[i]->value;
      }
      else {
        dealloc_row(i);
      }
    }
    parallel_end
  }
  ~RCMatrix()
  {
    clear();
  }
  void clear()
  {
    for (N i = 0; i < n; i++) {
      dealloc_row(i);
      matrix[i] = nullptr;
      if (offsets.size())
        offsets[i] = 0;
    }
  };
  bool empty(N i) const
  {
    return matrix[i] == nullptr;
  }
  N row_nonzero_size(N i) const
  {
    return matrix[i] == nullptr ? 0 : matrix[i]->index.size();
  }
  void resize(N size, N expected_none_zeros = 0)
  {
    if (!expected_none_zeros) {
      expected_none_zeros = this->expected_none_zeros;
    }
    if (n > size) {
      // Shrinking
      for (N i = size ? size - 1 : 0; i < n; i++)
        dealloc_row(i);
      matrix.resize(size);
    }
    else if (n < size) {
      // Expanding
      matrix.resize(size);
      for (N i = n; i < size; i++) {
        matrix[i] = nullptr;
        if (offsets.size())
          offsets[i] = 0;
      }
    }
    n = size;
  }
  void alloc_row(N i)
  {
    assert(i < n);
    if (!matrix[i]) {
      matrix[i] = new RowEntry;
      matrix[i]->index.reserve(expected_none_zeros);
      matrix[i]->value.reserve(expected_none_zeros);
      if (offsets.size())
        offsets[i] = 0;
    }
  }
  void dealloc_row(N i)
  {
    assert(i < n);
    if (matrix[i]) {
      if (offsets.empty() || !offsets[i])
        delete matrix[i];
      matrix[i] = nullptr;
      if (offsets.size())
        offsets[i] = 0;
    }
  }
  T operator()(N i, N j) const
  {
    assert(i < n);
    for (Iterator it = row_begin(i); it; ++it) {
      if (it.index() == j)
        return it.value();
    }
    return T(0.0);
  }
  void add_to_element_checked(N i, N j, T val)
  {
    if ((i < 0) || (j < 0) || (i >= n) || (j >= n))
      return;
    add_to_element(i, j, val);
  }
  void add_to_element(N i, N j, T increment_value)
  {
    if (std::abs(increment_value) > VECTOR_EPSILON) {
      assert(i < n);
      assert(offsets.empty() || offsets[i] == 0);
      alloc_row(i);
      std::vector<N> &index = matrix[i]->index;
      std::vector<T> &value = matrix[i]->value;
      for (N k = 0; k < (N)index.size(); ++k) {
        if (index[k] == j) {
          value[k] += increment_value;
          return;
        }
        else if (index[k] > j) {
          index.insert(index.begin() + k, j);
          value.insert(value.begin() + k, increment_value);
          return;
        }
      }
      index.push_back(j);
      value.push_back(increment_value);
    }
  }

  void set_element(N i, N j, T v)
  {
    if (std::abs(v) > VECTOR_EPSILON) {
      assert(i < n);
      assert(offsets.empty() || offsets[i] == 0);
      alloc_row(i);
      std::vector<N> &index = matrix[i]->index;
      std::vector<T> &value = matrix[i]->value;
      for (N k = 0; k < (N)index.size(); ++k) {
        if (index[k] == j) {
          value[k] = v;
          return;
        }
        else if (index[k] > j) {
          index.insert(index.begin() + k, j);
          value.insert(value.begin() + k, v);
          return;
        }
      }
      index.push_back(j);
      value.push_back(v);
    }
  }

  // Make sure that j is the biggest column in the row, no duplication allowed
  void fix_element(N i, N j, T v)
  {
    if (std::abs(v) > VECTOR_EPSILON) {
      assert(i < n);
      assert(offsets.empty() || offsets[i] == 0);
      alloc_row(i);
      std::vector<N> &index = matrix[i]->index;
      std::vector<T> &value = matrix[i]->value;
      index.push_back(j);
      value.push_back(v);
    }
  }
  int_index trim_zero_entries(double e = VECTOR_EPSILON)
  {
    std::vector<int_index> deleted_entries(n, 0);
    parallel_for(n)
    {
      N i = parallel_index;
      if (matrix[i]) {
        std::vector<N> &index = matrix[i]->index;
        std::vector<T> &value = matrix[i]->value;
        N head = 0;
        N k = 0;
        for (k = 0; k < index.size(); ++k) {
          if (std::abs(value[k]) > e) {
            index[head] = index[k];
            value[head] = value[k];
            ++head;
          }
        }
        if (head != k) {
          index.erase(index.begin() + head, index.end());
          value.erase(value.begin() + head, value.end());
          deleted_entries[i] += k - head;
        }
        if (!offsets.size() && !head) {
          remove_row(i);
        }
      }
    }
    parallel_end
        //
        int_index sum_deleted(0);
    for (int_index i = 0; i < n; i++)
      sum_deleted += deleted_entries[i];
    return sum_deleted;
  }
  void remove_reference(N i)
  {
    if (offsets.size() && offsets[i] && matrix[i]) {
      RowEntry *save = matrix[i];
      matrix[i] = new RowEntry;
      *matrix[i] = *save;
      for (N &index : matrix[i]->index)
        index += offsets[i];
      offsets[i] = 0;
    }
  }
  void remove_row(N i)
  {
    dealloc_row(i);
  }
  bool is_symmetric(double e = VECTOR_EPSILON) const
  {
    std::vector<bool> flags(n, true);
    parallel_for(n)
    {
      N i = parallel_index;
      bool flag = true;
      for (Iterator it = row_begin(i); it; ++it) {
        N index = it.index();
        T value = it.value();
        if (std::abs(value) > e) {
          bool found_entry = false;
          for (Iterator it_i = row_begin(index); it_i; ++it_i) {
            if (it_i.index() == i) {
              found_entry = true;
              if (std::abs(value - it_i.value()) > e) {
                flag = false;
                break;
              }
            }
          }
          if (!found_entry)
            flag = false;
          if (!flag)
            break;
        }
      }
      flags[i] = flag;
    }
    parallel_end for (N i = 0; i < matrix.size(); ++i)
    {
      if (!flags[i])
        return false;
    }
    return true;
  }

  void expand()
  {
    if (offsets.empty())
      return;
    for (N i = 1; i < n; i++) {
      if (offsets[i]) {
        RowEntry *ref = matrix[i];
        matrix[i] = new RowEntry;
        *matrix[i] = *ref;
        for (N j = 0; j < (N)matrix[i]->index.size(); j++) {
          matrix[i]->index[j] += offsets[i];
        }
      }
    }
    offsets.resize(0);
  }

  N column(N i) const
  {
    return empty(i) ? 0 : row_begin(i, row_nonzero_size(i) - 1).index();
  }
  N getColumnSize() const
  {
    N max_column(0);
    auto column = [&](N i) {
      N max_column(0);
      for (Iterator it = row_begin(i); it; ++it)
        max_column = std::max(max_column, it.index());
      return max_column + 1;
    };
    for (N i = 0; i < n; i++)
      max_column = std::max(max_column, column(i));
    return max_column;
  }
  N getNonzeroSize() const
  {
    N nonzeros(0);
    for (N i = 0; i < n; ++i) {
      nonzeros += row_nonzero_size(i);
    }
    return nonzeros;
  }
  class Iterator : std::iterator<std::input_iterator_tag, T> {
   public:
    Iterator(const RowEntry *rowEntry, N k, N offset) : rowEntry(rowEntry), k(k), offset(offset)
    {
    }
    operator bool() const
    {
      return rowEntry != nullptr && k < (N)rowEntry->index.size();
    }
    Iterator &operator++()
    {
      ++k;
      return *this;
    }
    T value() const
    {
      return rowEntry->value[k];
    }
    N index() const
    {
      return rowEntry->index[k] + offset;
    }
    N index_raw() const
    {
      return rowEntry->index[k];
    }
    N size() const
    {
      return rowEntry == nullptr ? 0 : rowEntry->index.size();
    }

   protected:
    const RowEntry *rowEntry;
    N k, offset;
  };
  Iterator row_begin(N n, N k = 0) const
  {
    return Iterator(matrix[n], k, offsets.size() ? offsets[n] : 0);
  }
  class DynamicIterator : public Iterator {
   public:
    DynamicIterator(RowEntry *rowEntry, N k, N offset)
        : rowEntry(rowEntry), Iterator(rowEntry, k, offset)
    {
    }
    void setValue(T value)
    {
      rowEntry->value[Iterator::k] = value;
    }
    void setIndex(N index)
    {
      rowEntry->index[Iterator::k] = index;
    }

   protected:
    RowEntry *rowEntry;
  };
  DynamicIterator dynamic_row_begin(N n, N k = 0)
  {
    N offset = offsets.size() ? offsets[n] : 0;
    if (offset) {
      printf("---- Warning ----\n");
      printf("Dynamic iterator is not allowed for referenced rows.\n");
      printf("You should be very careful otherwise this causes some bugs.\n");
      printf(
          "We encourage you that you convert this row into a raw format, then loop over it...\n");
      printf("-----------------\n");
      exit(0);
    }
    return DynamicIterator(matrix[n], k, offset);
  }
  RCMatrix transpose(N rowsize = 0,
                     unsigned expected_none_zeros = default_expected_none_zeros) const
  {
    if (!rowsize)
      rowsize = getColumnSize();
    RCMatrix result(rowsize, expected_none_zeros);
    for (N i = 0; i < n; i++) {
      for (Iterator it = row_begin(i); it; ++it)
        result.fix_element(it.index(), i, it.value());
    }
    return result;
  }

  RCMatrix getKtK() const
  {
    RCMatrix m = transpose();
    RCMatrix result(n, expected_none_zeros);
    // Run in parallel
    parallel_for(result.n)
    {
      N i = parallel_index;
      for (Iterator it_A = m.row_begin(i); it_A; ++it_A) {
        N j = it_A.index();
        assert(j < n);
        T a = it_A.value();
        if (std::abs(a) > VECTOR_EPSILON) {
          for (Iterator it_B = row_begin(j); it_B; ++it_B) {
            // result.add_to_element(i,it_B.index(),it_B.value()*a);
            double value = it_B.value() * a;
            if (std::abs(value) > VECTOR_EPSILON)
              result.add_to_element(i, it_B.index(), value);
          }
        }
      }
    }
    parallel_end return result;
  }

  RCMatrix operator*(const RCMatrix &m) const
  {
    RCMatrix result(n, expected_none_zeros);
    // Run in parallel
    parallel_for(result.n)
    {
      N i = parallel_index;
      for (Iterator it_A = row_begin(i); it_A; ++it_A) {
        N j = it_A.index();
        assert(j < m.n);
        T a = it_A.value();
        if (std::abs(a) > VECTOR_EPSILON) {
          for (Iterator it_B = m.row_begin(j); it_B; ++it_B) {
            // result.add_to_element(i,it_B.index(),it_B.value()*a);
            double value = it_B.value() * a;
            if (std::abs(value) > VECTOR_EPSILON)
              result.add_to_element(i, it_B.index(), value);
          }
        }
      }
    }
    parallel_end return result;
  }

  RCMatrix sqrt() const
  {
    RCMatrix result(n, expected_none_zeros);
    // Run in parallel
    parallel_for(result.n)
    {
      N i = parallel_index;
      for (Iterator it_A = row_begin(i); it_A; ++it_A) {
        N j = it_A.index();
        result.set_element(i, j, std::sqrt(it_A.value()));
      }
    }
    parallel_end return result;
  }

  RCMatrix operator*(const double k) const
  {
    RCMatrix result(n, expected_none_zeros);
    // Run in parallel
    parallel_for(result.n)
    {
      N i = parallel_index;
      for (Iterator it_A = row_begin(i); it_A; ++it_A) {
        N j = it_A.index();
        result.add_to_element(i, j, it_A.value() * k);
      }
    }
    parallel_end return result;
  }

  RCMatrix applyKernel(const RCMatrix &kernel, const int nx, const int ny) const
  {
    RCMatrix result(n, expected_none_zeros);
    // find center position of kernel (half of kernel size)
    int kCols = kernel.n, kRows = kernel.n, rows = nx, cols = ny;
    int kCenterX = kCols / 2;
    int kCenterY = kRows / 2;
    // Run in parallel
    parallel_for(result.n)
    {
      N i = parallel_index;
      if (i >= rows)
        break;
      for (Iterator it_A = row_begin(i); it_A; ++it_A) {
        N j = it_A.index();
        if (j >= cols)
          break;
        for (int m = 0; m < kRows; ++m) {    // kernel rows
          int mm = kRows - 1 - m;            // row index of flipped kernel
          for (int n = 0; n < kCols; ++n) {  // kernel columns
            int nn = kCols - 1 - n;          // column index of flipped kernel
            // index of input signal, used for checking boundary
            int ii = i + (m - kCenterY);
            int jj = j + (n - kCenterX);
            // ignore input samples which are out of bound
            if (ii >= 0 && ii < rows && jj >= 0 && jj < cols)
              result.add_to_element(i, j, (*this)(ii, jj) * kernel(mm, nn));
          }
        }
      }
    }
    parallel_end return result;
  }

  RCMatrix applyHorizontalKernel(const RCMatrix &kernel, const int nx, const int ny) const
  {
    RCMatrix result(n, expected_none_zeros);
    // find center position of kernel (half of kernel size)
    int kCols = kernel.n, kRows = 1, rows = nx, cols = ny;
    int kCenterX = kCols / 2;
    int kCenterY = kRows / 2;
    // Run in parallel
    parallel_for(result.n)
    {
      N i = parallel_index;
      if (i >= rows)
        break;
      for (Iterator it_A = row_begin(i); it_A; ++it_A) {
        N j = it_A.index();
        if (j >= cols)
          break;
        for (int m = 0; m < kRows; ++m) {    // kernel rows
          int mm = kRows - 1 - m;            // row index of flipped kernel
          for (int n = 0; n < kCols; ++n) {  // kernel columns
            int nn = kCols - 1 - n;          // column index of flipped kernel
            // index of input signal, used for checking boundary
            int ii = i + (m - kCenterY);
            int jj = j + (n - kCenterX);
            // ignore input samples which are out of bound
            if (ii >= 0 && ii < rows && jj >= 0 && jj < cols)
              result.add_to_element(i, j, (*this)(ii, jj) * kernel(mm, nn));
          }
        }
      }
    }
    parallel_end return result;
  }

  RCMatrix applyVerticalKernel(const RCMatrix &kernel, const int nx, const int ny) const
  {
    RCMatrix result(n, expected_none_zeros);
    // find center position of kernel (half of kernel size)
    int kCols = 1, kRows = kernel.n, rows = nx, cols = ny;
    int kCenterX = kCols / 2;
    int kCenterY = kRows / 2;
    // Run in parallel
    parallel_for(result.n)
    {
      N i = parallel_index;
      if (i >= rows)
        break;
      for (Iterator it_A = row_begin(i); it_A; ++it_A) {
        N j = it_A.index();
        if (j >= cols)
          break;
        for (int m = 0; m < kRows; ++m) {    // kernel rows
          int mm = kRows - 1 - m;            // row index of flipped kernel
          for (int n = 0; n < kCols; ++n) {  // kernel columns
            int nn = kCols - 1 - n;          // column index of flipped kernel
            // index of input signal, used for checking boundary
            int ii = i + (m - kCenterY);
            int jj = j + (n - kCenterX);
            // ignore input samples which are out of bound
            if (ii >= 0 && ii < rows && jj >= 0 && jj < cols)
              result.add_to_element(i, j, (*this)(ii, jj) * kernel(mm, nn));
          }
        }
      }
    }
    parallel_end return result;
  }

  RCMatrix applySeparableKernel(const RCMatrix &kernelH,
                                const RCMatrix &kernelV,
                                const int nx,
                                const int ny) const
  {
    return applyHorizontalKernel(kernelH, nx, ny).applyVerticalKernel(kernelV, nx, ny);
  }

  RCMatrix applySeparableKernelTwice(const RCMatrix &kernelH,
                                     const RCMatrix &kernelV,
                                     const int nx,
                                     const int ny) const
  {
    return applySeparableKernel(kernelH, kernelV, nx, ny)
        .applySeparableKernel(kernelH, kernelV, nx, ny);
  }

  std::vector<T> operator*(const std::vector<T> &rhs) const
  {
    std::vector<T> result(n, 0.0);
    multiply(rhs, result);
    return result;
  }
  void multiply(const std::vector<T> &rhs, std::vector<T> &result) const
  {
    result.resize(n);
    for (N i = 0; i < n; i++) {
      T new_value = 0.0;
      for (Iterator it = row_begin(i); it; ++it) {
        N j_index = it.index();
        assert(j_index < rhs.size());
        new_value += rhs[j_index] * it.value();
      }
      result[i] = new_value;
    }
  }
  RCMatrix operator+(const RCMatrix &m) const
  {
    RCMatrix A(*this);
    return std::move(A.add(m));
  }
  RCMatrix &add(const RCMatrix &m)
  {
    if (m.n > n)
      resize(m.n);
    parallel_for(m.n)
    {
      N i = parallel_index;
      for (Iterator it = m.row_begin(i); it; ++it) {
        add_to_element(i, it.index(), it.value());
      }
    }
    parallel_end return *this;
  }
  RCMatrix operator-(const RCMatrix &m) const
  {
    RCMatrix A(*this);
    return std::move(A.sub(m));
  }
  RCMatrix &sub(const RCMatrix &m)
  {
    if (m.n > n)
      resize(m.n);
    parallel_for(m.n)
    {
      N i = parallel_index;
      for (Iterator it = m.row_begin(i); it; ++it) {
        add_to_element(i, it.index(), -it.value());
      }
    }
    parallel_end return *this;
  }
  RCMatrix &replace(const RCMatrix &m, int rowInd, int colInd)
  {
    if (m.n > n)
      resize(m.n);
    parallel_for(m.n)
    {
      N i = parallel_index;
      for (Iterator it = m.row_begin(i); it; ++it) {
        set_element(i + rowInd, it.index() + colInd, it.value());
      }
    }
    parallel_end return *this;
  }
  Real max_residual(const std::vector<T> &lhs, const std::vector<T> &rhs) const
  {
    std::vector<T> r = operator*(lhs);
    Real max_residual = 0.0;
    for (N i = 0; i < rhs.size(); i++) {
      if (!empty(i))
        max_residual = std::max(max_residual, std::abs(r[i] - rhs[i]));
    }
    return max_residual;
  }
  std::vector<T> residual_vector(const std::vector<T> &lhs, const std::vector<T> &rhs) const
  {
    std::vector<T> result = operator*(lhs);
    assert(result.size() == rhs.size());
    for (N i = 0; i < result.size(); i++) {
      result[i] = std::abs(result[i] - rhs[i]);
    }
    return result;
  }
  T norm() const
  {
    T result(0.0);
    for (N i = 0; i < n; ++i) {
      for (Iterator it = row_begin(i); it; ++it) {
        result = std::max(result, std::abs(it.value()));
      }
    }
    return result;
  }

  T norm_L2_sqr() const
  {
    T result(0.0);
    for (N i = 0; i < n; ++i) {
      for (Iterator it = row_begin(i); it; ++it) {
        result += (it.value()) * (it.value());
      }
    }
    return result;
  }

  void write_matlab(std::ostream &output,
                    unsigned int rows,
                    unsigned int columns,
                    const char *variable_name)
  {
    output << variable_name << "=sparse([";
    for (N i = 0; i < n; ++i) {
      if (matrix[i]) {
        const std::vector<N> &index = matrix[i]->index;
        for (N j = 0; j < (N)index.size(); ++j) {
          output << i + 1 << " ";
        }
      }
    }
    output << "],...\n  [";
    for (N i = 0; i < n; ++i) {
      if (matrix[i]) {
        const std::vector<N> &index = matrix[i]->index;
        for (N j = 0; j < (N)index.size(); ++j) {
          output << index[j] + (offsets.empty() ? 0 : offsets[i]) + 1 << " ";
        }
      }
    }
    output << "],...\n  [";
    for (N i = 0; i < n; ++i) {
      if (matrix[i]) {
        const std::vector<T> &value = matrix[i]->value;
        for (N j = 0; j < value.size(); ++j) {
          output << value[j] << " ";
        }
      }
    }
    output << "], " << rows << ", " << columns << ");" << std::endl;
  };
  void export_matlab(std::string filename, std::string name)
  {
    // Export this matrix
    std::ofstream file;
    file.open(filename.c_str());
    write_matlab(file, n, getColumnSize(), name.c_str());
    file.close();
  }
  void print_readable(std::string name, bool printNonZero = true)
  {
    std::cout << name << " \n";
    for (int i = 0; i < n; ++i) {
      for (int j = 0; j < n; ++j) {
        if (printNonZero) {
          if ((*this)(i, j) == 0) {
            std::cout << "  .";
            continue;
          }
        }
        else {
          if ((*this)(i, j) == 0) {
            continue;
          }
        }

        if ((*this)(i, j) >= 0)
          std::cout << " ";
        std::cout << " " << (*this)(i, j);
      }
      std::cout << " \n";
    }
  }
  ///
  N n;
  N expected_none_zeros;
  std::vector<RowEntry *> matrix;
  std::vector<int> offsets;
};

template<class N, class T>
static inline RCMatrix<N, T> operator*(const std::vector<T> &diagonal, const RCMatrix<N, T> &A)
{
  RCMatrix<N, T> result(A);
  parallel_for(result.n)
  {
    N row(parallel_index);
    for (auto it = result.dynamic_row_begin(row); it; ++it) {
      it.setValue(it.value() * diagonal[row]);
    }
  }
  parallel_end return std::move(result);
}

template<class N, class T> struct RCFixedMatrix {
  std::vector<N> rowstart;
  std::vector<N> index;
  std::vector<T> value;
  N n;
  N max_rowlength;
  //
  RCFixedMatrix() : n(0), max_rowlength(0)
  {
  }
  RCFixedMatrix(const RCMatrix<N, T> &matrix)
  {
    n = matrix.n;
    rowstart.resize(n + 1);
    rowstart[0] = 0;
    max_rowlength = 0;
    for (N i = 0; i < n; i++) {
      if (!matrix.empty(i)) {
        rowstart[i + 1] = rowstart[i] + matrix.row_nonzero_size(i);
        max_rowlength = std::max(max_rowlength, rowstart[i + 1] - rowstart[i]);
      }
      else {
        rowstart[i + 1] = rowstart[i];
      }
    }
    value.resize(rowstart[n]);
    index.resize(rowstart[n]);
    N j = 0;
    for (N i = 0; i < n; i++) {
      for (typename RCMatrix<N, T>::Iterator it = matrix.row_begin(i); it; ++it) {
        value[j] = it.value();
        index[j] = it.index();
        ++j;
      }
    }
  }
  class Iterator : std::iterator<std::input_iterator_tag, T> {
   public:
    Iterator(N start, N end, const std::vector<N> &index, const std::vector<T> &value)
        : index_array(index), value_array(value), k(start), start(start), end(end)
    {
    }
    operator bool() const
    {
      return k < end;
    }
    Iterator &operator++()
    {
      ++k;
      return *this;
    }
    T value() const
    {
      return value_array[k];
    }
    N index() const
    {
      return index_array[k];
    }
    N size() const
    {
      return end - start;
    }

   private:
    const std::vector<N> &index_array;
    const std::vector<T> &value_array;
    N k, start, end;
  };
  Iterator row_begin(N n) const
  {
    return Iterator(rowstart[n], rowstart[n + 1], index, value);
  }
  std::vector<T> operator*(const std::vector<T> &rhs) const
  {
    std::vector<T> result(n, 0.0);
    multiply(rhs, result);
    return result;
  }
  void multiply(const std::vector<T> &rhs, std::vector<T> &result) const
  {
    result.resize(n);
    parallel_for(n)
    {
      N i = parallel_index;
      T new_value = 0.0;
      for (Iterator it = row_begin(i); it; ++it) {
        N j_index = it.index();
        assert(j_index < rhs.size());
        new_value += rhs[j_index] * it.value();
      }
      result[i] = new_value;
    }
    parallel_end
  }
  RCMatrix<N, T> operator*(const RCFixedMatrix &m) const
  {
    RCMatrix<N, T> result(n, max_rowlength);
    // Run in parallel
    parallel_for(result.n)
    {
      N i = parallel_index;
      for (Iterator it_A = row_begin(i); it_A; ++it_A) {
        N j = it_A.index();
        assert(j < m.n);
        T a = it_A.value();
        if (std::abs(a) > VECTOR_EPSILON) {
          for (Iterator it_B = m.row_begin(j); it_B; ++it_B) {
            result.add_to_element(i, it_B.index(), it_B.value() * a);
          }
        }
      }
    }
    parallel_end return result;
  }

  RCMatrix<N, T> toRCMatrix() const
  {
    RCMatrix<N, T> result(n, 0);
    parallel_for(n)
    {
      N i = parallel_index;
      N size = rowstart[i + 1] - rowstart[i];
      result.matrix[i] = new typename RCMatrix<N, T>::RowEntry;
      result.matrix[i]->index.resize(size);
      result.matrix[i]->value.resize(size);
      for (N j = 0; j < size; j++) {
        result.matrix[i]->index[j] = index[rowstart[i] + j];
        result.matrix[i]->value[j] = value[rowstart[i] + j];
      }
    }
    parallel_end return result;
  }
};

typedef RCMatrix<int, Real> Matrix;
typedef RCFixedMatrix<int, Real> FixedMatrix;

}

#undef parallel_for
#undef parallel_end

#undef parallel_block
#undef do_parallel
#undef do_end
#undef block_end

#endif