#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN

#include <doctest.h>

#include <taskflow/taskflow.hpp>
#include <taskflow/algorithm/pipeline.hpp>

// --------------------------------------------------------
// Testcase: 1 pipe, L lines, w workers
// --------------------------------------------------------
void pipeline_1P(size_t L, unsigned w, tf::PipeType type) {

  tf::Executor executor(w);
    
  const size_t maxN = 100;

  std::vector<int> source(maxN);
  std::iota(source.begin(), source.end(), 0);

  // iterate different data amount (1, 2, 3, 4, 5, ... 1000000)
  for (size_t N = 0; N <= maxN; N++) {
    
    // serial direction
    if (type == tf::PipeType::SERIAL) {
      tf::Taskflow taskflow;
      size_t j = 0;
      tf::Pipeline pl (L, tf::Pipe{type, [L, N, &j, &source](auto& pf) mutable {
        if (j == N) {
          pf.stop();
          return;
        }
        REQUIRE(j == source[j]);
        REQUIRE(pf.token() % L == pf.line());
        j++;
      }});

      //taskflow.pipeline(pl);
      auto pipeline = taskflow.composed_of(pl).name("module_of_pipeline");

      auto test = taskflow.emplace([&](){
        REQUIRE(j == N);
        REQUIRE(pl.num_tokens() == N);    
      }).name("test");

      pipeline.precede(test);

      executor.run_until(taskflow, [counter=3, j]() mutable{
        j = 0;
        return counter --== 0;  
      }).get();
    }
    // parallel pipe
    //else if(type == tf::PipeType::PARALLEL) {
    //  
    //  tf::Taskflow taskflow;

    //  std::atomic<size_t> j = 0;
    //  std::mutex mutex;
    //  std::vector<int> collection;

    //  tf::Pipeline pl(L, tf::Pipe{type, 
    //  [N, &j, &mutex, &collection](auto& pf) mutable {

    //    auto ticket = j.fetch_add(1);

    //    if(ticket >= N) {
    //      pf.stop();
    //      return;
    //    }
    //    std::scoped_lock<std::mutex> lock(mutex);
    //    collection.push_back(ticket);
    //  }});

    //  taskflow.composed_of(pl);
    //  executor.run(taskflow).wait();
    //  REQUIRE(collection.size() == N);
    //  std::sort(collection.begin(), collection.end());
    //  for(size_t k=0; k<N; k++) {
    //    REQUIRE(collection[k] == k);
    //  }

    //  j = 0;
    //  collection.clear();
    //  executor.run(taskflow).wait();
    //  REQUIRE(collection.size() == N);
    //  std::sort(collection.begin(), collection.end());
    //  for(size_t k=0; k<N; k++) {
    //    REQUIRE(collection[k] == k);
    //  }
    //}
  }
}

// serial pipe with one line
TEST_CASE("Pipeline.1P(S).1L.1W" * doctest::timeout(300)) {
  pipeline_1P(1, 1, tf::PipeType::SERIAL);
}

TEST_CASE("Pipeline.1P(S).1L.2W" * doctest::timeout(300)) {
  pipeline_1P(1, 2, tf::PipeType::SERIAL);
}

TEST_CASE("Pipeline.1P(S).1L.3W" * doctest::timeout(300)) {
  pipeline_1P(1, 3, tf::PipeType::SERIAL);
}

TEST_CASE("Pipeline.1P(S).1L.4W" * doctest::timeout(300)) {
  pipeline_1P(1, 4, tf::PipeType::SERIAL);
}

// serial pipe with two lines
TEST_CASE("Pipeline.1P(S).2L.1W" * doctest::timeout(300)) {
  pipeline_1P(2, 1, tf::PipeType::SERIAL);
}

TEST_CASE("Pipeline.1P(S).2L.2W" * doctest::timeout(300)) {
  pipeline_1P(2, 2, tf::PipeType::SERIAL);
}

TEST_CASE("Pipeline.1P(S).2L.3W" * doctest::timeout(300)) {
  pipeline_1P(2, 3, tf::PipeType::SERIAL);
}

TEST_CASE("Pipeline.1P(S).2L.4W" * doctest::timeout(300)) {
  pipeline_1P(2, 4, tf::PipeType::SERIAL);
}

// serial pipe with three lines
TEST_CASE("Pipeline.1P(S).3L.1W" * doctest::timeout(300)) {
  pipeline_1P(3, 1, tf::PipeType::SERIAL);
}

TEST_CASE("Pipeline.1P(S).3L.2W" * doctest::timeout(300)) {
  pipeline_1P(3, 2, tf::PipeType::SERIAL);
}

TEST_CASE("Pipeline.1P(S).3L.3W" * doctest::timeout(300)) {
  pipeline_1P(3, 3, tf::PipeType::SERIAL);
}

TEST_CASE("Pipeline.1P(S).3L.4W" * doctest::timeout(300)) {
  pipeline_1P(3, 4, tf::PipeType::SERIAL);
}

// serial pipe with three lines
TEST_CASE("Pipeline.1P(S).4L.1W" * doctest::timeout(300)) {
  pipeline_1P(4, 1, tf::PipeType::SERIAL);
}

TEST_CASE("Pipeline.1P(S).4L.2W" * doctest::timeout(300)) {
  pipeline_1P(4, 2, tf::PipeType::SERIAL);
}

TEST_CASE("Pipeline.1P(S).4L.3W" * doctest::timeout(300)) {
  pipeline_1P(4, 3, tf::PipeType::SERIAL);
}

TEST_CASE("Pipeline.1P(S).4L.4W" * doctest::timeout(300)) {
  pipeline_1P(4, 4, tf::PipeType::SERIAL);
}


//// ---- parallel pipe ----
//
//// parallel pipe with one line
//TEST_CASE("Pipeline.1P(P).1L.1W" * doctest::timeout(300)) {
//  pipeline_1P(1, 1, tf::PipeType::PARALLEL);
//}
//
//TEST_CASE("Pipeline.1P(P).1L.2W" * doctest::timeout(300)) {
//  pipeline_1P(1, 2, tf::PipeType::PARALLEL);
//}
//
//TEST_CASE("Pipeline.1P(P).1L.3W" * doctest::timeout(300)) {
//  pipeline_1P(1, 3, tf::PipeType::PARALLEL);
//}
//
//TEST_CASE("Pipeline.1P(P).1L.4W" * doctest::timeout(300)) {
//  pipeline_1P(1, 4, tf::PipeType::PARALLEL);
//}
//
//// parallel pipe with two lines
//TEST_CASE("Pipeline.1P(P).2L.1W" * doctest::timeout(300)) {
//  pipeline_1P(2, 1, tf::PipeType::PARALLEL);
//}
//
//TEST_CASE("Pipeline.1P(P).2L.2W" * doctest::timeout(300)) {
//  pipeline_1P(2, 2, tf::PipeType::PARALLEL);
//}
//
//TEST_CASE("Pipeline.1P(P).2L.3W" * doctest::timeout(300)) {
//  pipeline_1P(2, 3, tf::PipeType::PARALLEL);
//}
//
//TEST_CASE("Pipeline.1P(P).2L.4W" * doctest::timeout(300)) {
//  pipeline_1P(2, 4, tf::PipeType::PARALLEL);
//}
//
//// parallel pipe with three lines
//TEST_CASE("Pipeline.1P(P).3L.1W" * doctest::timeout(300)) {
//  pipeline_1P(3, 1, tf::PipeType::PARALLEL);
//}
//
//TEST_CASE("Pipeline.1P(P).3L.2W" * doctest::timeout(300)) {
//  pipeline_1P(3, 2, tf::PipeType::PARALLEL);
//}
//
//TEST_CASE("Pipeline.1P(P).3L.3W" * doctest::timeout(300)) {
//  pipeline_1P(3, 3, tf::PipeType::PARALLEL);
//}
//
//TEST_CASE("Pipeline.1P(P).3L.4W" * doctest::timeout(300)) {
//  pipeline_1P(3, 4, tf::PipeType::PARALLEL);
//}
//
//// parallel pipe with four lines
//TEST_CASE("Pipeline.1P(P).4L.1W" * doctest::timeout(300)) {
//  pipeline_1P(4, 1, tf::PipeType::PARALLEL);
//}
//
//TEST_CASE("Pipeline.1P(P).4L.2W" * doctest::timeout(300)) {
//  pipeline_1P(4, 2, tf::PipeType::PARALLEL);
//}
//
//TEST_CASE("Pipeline.1P(P).4L.3W" * doctest::timeout(300)) {
//  pipeline_1P(4, 3, tf::PipeType::PARALLEL);
//}
//
//TEST_CASE("Pipeline.1P(P).4L.4W" * doctest::timeout(300)) {
//  pipeline_1P(4, 4, tf::PipeType::PARALLEL);
//}

// ----------------------------------------------------------------------------
// two pipes (SS), L lines, W workers
// ----------------------------------------------------------------------------

void pipeline_2P_SS(size_t L, unsigned w) {

  tf::Executor executor(w);

  const size_t maxN = 100;

  std::vector<int> source(maxN);
  std::iota(source.begin(), source.end(), 0);
  std::vector<std::array<int, 2>> mybuffer(L);

  for(size_t N = 0; N <= maxN; N++) {

    tf::Taskflow taskflow;
      
    size_t j1 = 0, j2 = 0;
    size_t cnt = 1;

    tf::Pipeline pl(
      L,
      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j1, &mybuffer, L](auto& pf) mutable {
        if(j1 == N) {
          pf.stop();
          return;
        }
        REQUIRE(j1 == source[j1]);
        REQUIRE(pf.token() % L == pf.line());
        //*(pf.output()) = source[j1] + 1;
        mybuffer[pf.line()][pf.pipe()] = source[j1] + 1;
        j1++;
      }},

      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j2, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j2 < N);
        REQUIRE(pf.token() % L == pf.line());
        REQUIRE(source[j2] + 1 == mybuffer[pf.line()][pf.pipe() - 1]);
        //REQUIRE(source[j2] + 1 == *(pf.input()));
        j2++;
      }}
    );
    
    auto pipeline = taskflow.composed_of(pl).name("module_of_pipeline");
    auto test = taskflow.emplace([&](){
      REQUIRE(j1 == N);
      REQUIRE(j2 == N);
      REQUIRE(pl.num_tokens() == cnt * N);
    }).name("test");
    
    pipeline.precede(test);
     
    executor.run_n(taskflow, 3, [&]() mutable {
      j1 = 0;
      j2 = 0;
      for(size_t i = 0; i < mybuffer.size(); ++i){
        for(size_t j = 0; j < mybuffer[0].size(); ++j){
          mybuffer[i][j] = 0;
        }
      }
      cnt++;
    }).get();    
  }
}

// two pipes (SS)
TEST_CASE("Pipeline.2P(SS).1L.1W" * doctest::timeout(300)) {
  pipeline_2P_SS(1, 1);
}

TEST_CASE("Pipeline.2P(SS).1L.2W" * doctest::timeout(300)) {
  pipeline_2P_SS(1, 2);
}

TEST_CASE("Pipeline.2P(SS).1L.3W" * doctest::timeout(300)) {
  pipeline_2P_SS(1, 3);
}

TEST_CASE("Pipeline.2P(SS).1L.4W" * doctest::timeout(300)) {
  pipeline_2P_SS(1, 4);
}

TEST_CASE("Pipeline.2P(SS).2L.1W" * doctest::timeout(300)) {
  pipeline_2P_SS(2, 1);
}

TEST_CASE("Pipeline.2P(SS).2L.2W" * doctest::timeout(300)) {
  pipeline_2P_SS(2, 2);
}

TEST_CASE("Pipeline.2P(SS).2L.3W" * doctest::timeout(300)) {
  pipeline_2P_SS(2, 3);
}

TEST_CASE("Pipeline.2P(SS).2L.4W" * doctest::timeout(300)) {
  pipeline_2P_SS(2, 4);
}

TEST_CASE("Pipeline.2P(SS).3L.1W" * doctest::timeout(300)) {
  pipeline_2P_SS(3, 1);
}

TEST_CASE("Pipeline.2P(SS).3L.2W" * doctest::timeout(300)) {
  pipeline_2P_SS(3, 2);
}

TEST_CASE("Pipeline.2P(SS).3L.3W" * doctest::timeout(300)) {
  pipeline_2P_SS(3, 3);
}

TEST_CASE("Pipeline.2P(SS).3L.4W" * doctest::timeout(300)) {
  pipeline_2P_SS(3, 4);
}

TEST_CASE("Pipeline.2P(SS).4L.1W" * doctest::timeout(300)) {
  pipeline_2P_SS(4, 1);
}

TEST_CASE("Pipeline.2P(SS).4L.2W" * doctest::timeout(300)) {
  pipeline_2P_SS(4, 2);
}

TEST_CASE("Pipeline.2P(SS).4L.3W" * doctest::timeout(300)) {
  pipeline_2P_SS(4, 3);
}

TEST_CASE("Pipeline.2P(SS).4L.4W" * doctest::timeout(300)) {
  pipeline_2P_SS(4, 4);
}

// ----------------------------------------------------------------------------
// two pipes (SP), L lines, W workers
// ----------------------------------------------------------------------------
void pipeline_2P_SP(size_t L, unsigned w) {

  tf::Executor executor(w);

  const size_t maxN = 100;

  std::vector<int> source(maxN);
  std::iota(source.begin(), source.end(), 0);
  std::vector<std::array<int, 2>> mybuffer(L);

  for(size_t N = 0; N <= maxN; N++) {

    tf::Taskflow taskflow;
      
    size_t j1 = 0;
    std::atomic<size_t> j2 = 0;
    std::mutex mutex;
    std::vector<int> collection;
    size_t cnt = 1;

    tf::Pipeline pl(L,
      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j1, &mybuffer, L](auto& pf) mutable {
        if(j1 == N) {
          pf.stop();
          return;
        }
        REQUIRE(j1 == source[j1]);
        REQUIRE(pf.token() % L == pf.line());
        //*(pf.output()) = source[j1] + 1;
        mybuffer[pf.line()][pf.pipe()] = source[j1] + 1;
        j1++;
      }},

      tf::Pipe{tf::PipeType::PARALLEL, 
      [N, &collection, &mutex, &j2, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j2++ < N);
        {
          std::scoped_lock<std::mutex> lock(mutex);
          REQUIRE(pf.token() % L == pf.line());
          collection.push_back(mybuffer[pf.line()][pf.pipe() - 1]);
        }
      }}
    );
    
    auto pipeline = taskflow.composed_of(pl).name("module_of_pipeline");
    auto test = taskflow.emplace([&](){
      REQUIRE(j1 == N);
      REQUIRE(j2 == N);
      
      std::sort(collection.begin(), collection.end());
      for(size_t i = 0; i < N; i++) {
        REQUIRE(collection[i] == i + 1);
      }
      REQUIRE(pl.num_tokens() == cnt * N);
    }).name("test");
    
    pipeline.precede(test);
    
    executor.run_n(taskflow, 3, [&]() mutable {
      j1 = j2 = 0;
      collection.clear();
      for(size_t i = 0; i < mybuffer.size(); ++i){
        for(size_t j = 0; j < mybuffer[0].size(); ++j){
          mybuffer[i][j] = 0;
        }
      }
      cnt++;
    }).get();
  }
}

// two pipes (SP)
TEST_CASE("Pipeline.2P(SP).1L.1W" * doctest::timeout(300)) {
  pipeline_2P_SP(1, 1);
}

TEST_CASE("Pipeline.2P(SP).1L.2W" * doctest::timeout(300)) {
  pipeline_2P_SP(1, 2);
}

TEST_CASE("Pipeline.2P(SP).1L.3W" * doctest::timeout(300)) {
  pipeline_2P_SP(1, 3);
}

TEST_CASE("Pipeline.2P(SP).1L.4W" * doctest::timeout(300)) {
  pipeline_2P_SP(1, 4);
}

TEST_CASE("Pipeline.2P(SP).2L.1W" * doctest::timeout(300)) {
  pipeline_2P_SP(2, 1);
}

TEST_CASE("Pipeline.2P(SP).2L.2W" * doctest::timeout(300)) {
  pipeline_2P_SP(2, 2);
}

TEST_CASE("Pipeline.2P(SP).2L.3W" * doctest::timeout(300)) {
  pipeline_2P_SP(2, 3);
}

TEST_CASE("Pipeline.2P(SP).2L.4W" * doctest::timeout(300)) {
  pipeline_2P_SP(2, 4);
}

TEST_CASE("Pipeline.2P(SP).3L.1W" * doctest::timeout(300)) {
  pipeline_2P_SP(3, 1);
}

TEST_CASE("Pipeline.2P(SP).3L.2W" * doctest::timeout(300)) {
  pipeline_2P_SP(3, 2);
}

TEST_CASE("Pipeline.2P(SP).3L.3W" * doctest::timeout(300)) {
  pipeline_2P_SP(3, 3);
}

TEST_CASE("Pipeline.2P(SP).3L.4W" * doctest::timeout(300)) {
  pipeline_2P_SP(3, 4);
}

TEST_CASE("Pipeline.2P(SP).4L.1W" * doctest::timeout(300)) {
  pipeline_2P_SP(4, 1);
}

TEST_CASE("Pipeline.2P(SP).4L.2W" * doctest::timeout(300)) {
  pipeline_2P_SP(4, 2);
}

TEST_CASE("Pipeline.2P(SP).4L.3W" * doctest::timeout(300)) {
  pipeline_2P_SP(4, 3);
}

TEST_CASE("Pipeline.2P(SP).4L.4W" * doctest::timeout(300)) {
  pipeline_2P_SP(4, 4);
}

/*
// ----------------------------------------------------------------------------
// two pipes (PS), L lines, W workers
// ----------------------------------------------------------------------------

// TODO: need to discuss the interface
void pipeline_2P_PS(size_t L, unsigned w) {

  tf::Executor executor(w);

  const size_t maxN = 100;

  std::vector<int> source(maxN);
  std::iota(source.begin(), source.end(), 0);

  for(size_t N = 0; N <= maxN; N++) {

    tf::Taskflow taskflow;
      
    std::atomic<size_t> j1 = 0;
    size_t j2 = 0;
    std::mutex mutex;
    std::vector<int> collection1;
    std::vector<int> collection2;

    tf::Pipeline pl(L, 
      tf::Pipe{tf::PipeType::PARALLEL, 
      [N, &source, &j1, &mutex, &collection1](auto& pf) mutable {

        auto ticket = j1.fetch_add(1);

        if(ticket >= N) {
          pf.stop();
          return;
        }

        *(pf.output()) = source[ticket] + 1;
        {
          std::scoped_lock<std::mutex> lock(mutex);
          collection1.push_back(source[ticket]);
        }
      }},
      tf::Pipe{tf::PipeType::SERIAL, 
      [N, &collection2, &source, &j2](auto& pf) mutable {
        REQUIRE(j2 < N);
        collection2.push_back(*(pf.input()));
        j2++;
      }}
    );
    
    auto pipeline = taskflow.composed_of(pl).name("module_of_pipeline");
    auto test = taskflow.emplace([&](){
      
      REQUIRE(collection1.size() == N);
      REQUIRE(j2 == N);
      std::sort(collection1.begin(), collection1.end());
      std::sort(collection2.begin(), collection2.end());

      for(size_t i = 0; i < N; i++) {
        REQUIRE(collection1[i] == i);
        REQUIRE(collection2[i] == i+1);
      }
    }).name("test");

    pipeline.precede(test);

    executor.run_n(taskflow, 3, []() mutable {
      j1 = j2 = 0;
      collection1.clear();
      collection2.clear();
    }).get();

    //executor.run(taskflow).wait();
    //
    //REQUIRE(collection1.size() == N);
    //REQUIRE(j2 == N);

    //std::sort(collection1.begin(), collection1.end());
    //std::sort(collection2.begin(), collection2.end());

    //for(size_t i = 0; i < N; i++) {
    //  REQUIRE(collection1[i] == i);
    //  REQUIRE(collection2[i] == i+1);
    //}
    //
    //j1 = j2 = 0;
    //collection1.clear();
    //collection2.clear();
    //executor.run(taskflow).wait();
    //REQUIRE(collection1.size() == N);
    //REQUIRE(j2 == N);
    //std::sort(collection1.begin(), collection1.end());
    //std::sort(collection2.begin(), collection2.end());
    //for(size_t i = 0; i < N; i++) {
    //  REQUIRE(collection1[i] == i);
    //  REQUIRE(collection2[i] == i+1);
    //}
  }
}

// two pipes (PS)
//TEST_CASE("Pipeline.2P(PS).1L.1W" * doctest::timeout(300)) {
//  pipeline_2P_PS(1, 1);
//}
//
//TEST_CASE("Pipeline.2P(PS).1L.2W" * doctest::timeout(300)) {
//  pipeline_2P_PS(1, 2);
//}
//
//TEST_CASE("Pipeline.2P(PS).1L.3W" * doctest::timeout(300)) {
//  pipeline_2P_PS(1, 3);
//}
//
//TEST_CASE("Pipeline.2P(PS).1L.4W" * doctest::timeout(300)) {
//  pipeline_2P_PS(1, 4);
//}
//
//TEST_CASE("Pipeline.2P(PS).2L.1W" * doctest::timeout(300)) {
//  pipeline_2P_PS(2, 1);
//}
//
//TEST_CASE("Pipeline.2P(PS).2L.2W" * doctest::timeout(300)) {
//  pipeline_2P_PS(2, 2);
//}
//
//TEST_CASE("Pipeline.2P(PS).2L.3W" * doctest::timeout(300)) {
//  pipeline_2P_PS(2, 3);
//}
//
//TEST_CASE("Pipeline.2P(PS).2L.4W" * doctest::timeout(300)) {
//  pipeline_2P_PS(2, 4);
//}
//
//TEST_CASE("Pipeline.2P(PS).3L.1W" * doctest::timeout(300)) {
//  pipeline_2P_PS(3, 1);
//}
//
//TEST_CASE("Pipeline.2P(PS).3L.2W" * doctest::timeout(300)) {
//  pipeline_2P_PS(3, 2);
//}
//
//TEST_CASE("Pipeline.2P(PS).3L.3W" * doctest::timeout(300)) {
//  pipeline_2P_PS(3, 3);
//}
//
//TEST_CASE("Pipeline.2P(PS).3L.4W" * doctest::timeout(300)) {
//  pipeline_2P_PS(3, 4);
//}
//
//TEST_CASE("Pipeline.2P(PS).4L.1W" * doctest::timeout(300)) {
//  pipeline_2P_PS(4, 1);
//}
//
//TEST_CASE("Pipeline.2P(PS).4L.2W" * doctest::timeout(300)) {
//  pipeline_2P_PS(4, 2);
//}
//
//TEST_CASE("Pipeline.2P(PS).4L.3W" * doctest::timeout(300)) {
//  pipeline_2P_PS(4, 3);
//}
//
//TEST_CASE("Pipeline.2P(PS).4L.4W" * doctest::timeout(300)) {
//  pipeline_2P_PS(4, 4);
//}


// ----------------------------------------------------------------------------
// two pipes (PP), L lines, W workers
// ----------------------------------------------------------------------------
void pipeline_2P_PP(size_t L, unsigned w) {

  tf::Executor executor(w);

  const size_t maxN = 100;

  std::vector<int> source(maxN);
  std::iota(source.begin(), source.end(), 0);

  for(size_t N = 0; N <= maxN; N++) {

    tf::Taskflow taskflow;
      
    std::atomic<size_t> j1 = 0;
    std::atomic<size_t> j2 = 0;
    std::mutex mutex1;
    std::mutex mutex2;
    std::vector<int> collection1;
    std::vector<int> collection2;

    tf::Pipeline pl(L, 
      tf::Pipe{tf::PipeType::PARALLEL, 
      [N, &source, &j1, &mutex1, &collection1](auto& pf) mutable {
        auto ticket = j1.fetch_add(1);

        if(ticket >= N) {
          pf.stop();
          return;
        }

        *(pf.output()) = source[ticket] + 1;
        {
          std::scoped_lock<std::mutex> lock(mutex1);
          collection1.push_back(source[ticket]);
        }
      }},
      tf::Pipe{tf::PipeType::SERIAL, 
      [N, &collection2, &source, &j2, &mutex2](auto& pf) mutable {
        REQUIRE(j2++ < N);
        {
          std::scoped_lock<std::mutex> lock(mutex2);
          collection2.push_back(*(pf.input()));
        }
      }}
    );
    
    auto pipeline = taskflow.composed_of(pl).name("module_of_pipeline");
    auto test = taskflow.emplace([&](){
      REQUIRE(collection1.size() == N);
      REQUIRE(collection2.size() == N);

      std::sort(collection1.begin(), collection1.end());
      std::sort(collection2.begin(), collection2.end());

      for(size_t i = 0; i < N; i++) {
        REQUIRE(collection1[i] == i);
        REQUIRE(collection2[i] == i + 1);
      }
    }).name("test");
    
    pipeline.precede(test);

    executor.run_n(taskflow, 3, [&]() mutable {
      j1 = j2 = 0;

      collection1.clear();
      collection2.clear();
    }).get();
    
    //executor.run(taskflow).wait();
    //
    //REQUIRE(collection1.size() == N);
    //REQUIRE(collection2.size() == N);

    //std::sort(collection1.begin(), collection1.end());
    //std::sort(collection2.begin(), collection2.end());

    //for(size_t i = 0; i < N; i++) {
    //  REQUIRE(collection1[i] == i);
    //  REQUIRE(collection2[i] == i + 1);
    //}
    //
    //j1 = j2 = 0;
    //collection1.clear();
    //collection2.clear();
    //executor.run(taskflow).wait();
    //REQUIRE(collection1.size() == N);
    //REQUIRE(collection2.size() == N);
    //std::sort(collection1.begin(), collection1.end());
    //std::sort(collection2.begin(), collection2.end());
    //for(size_t i = 0; i < N; i++) {
    //  REQUIRE(collection1[i] == i);
    //  REQUIRE(collection2[i] == i + 1);
    //}
  }
}

// two pipes (PP)
//TEST_CASE("Pipeline.2P(PP).1L.1W" * doctest::timeout(300)) {
//  pipeline_2P_PP(1, 1);
//}
//
//TEST_CASE("Pipeline.2P(PP).1L.2W" * doctest::timeout(300)) {
//  pipeline_2P_PP(1, 2);
//}
//
//TEST_CASE("Pipeline.2P(PP).1L.3W" * doctest::timeout(300)) {
//  pipeline_2P_PP(1, 3);
//}
//
//TEST_CASE("Pipeline.2P(PP).1L.4W" * doctest::timeout(300)) {
//  pipeline_2P_PP(1, 4);
//}
//
//TEST_CASE("Pipeline.2P(PP).2L.1W" * doctest::timeout(300)) {
//  pipeline_2P_PP(2, 1);
//}
//
//TEST_CASE("Pipeline.2P(PP).2L.2W" * doctest::timeout(300)) {
//  pipeline_2P_PP(2, 2);
//}
//
//TEST_CASE("Pipeline.2P(PP).2L.3W" * doctest::timeout(300)) {
//  pipeline_2P_PP(2, 3);
//}
//
//TEST_CASE("Pipeline.2P(PP).2L.4W" * doctest::timeout(300)) {
//  pipeline_2P_PP(2, 4);
//}
//
//TEST_CASE("Pipeline.2P(PP).3L.1W" * doctest::timeout(300)) {
//  pipeline_2P_PP(3, 1);
//}
//
//TEST_CASE("Pipeline.2P(PP).3L.2W" * doctest::timeout(300)) {
//  pipeline_2P_PP(3, 2);
//}
//
//TEST_CASE("Pipeline.2P(PP).3L.3W" * doctest::timeout(300)) {
//  pipeline_2P_PP(3, 3);
//}
//
//TEST_CASE("Pipeline.2P(PP).3L.4W" * doctest::timeout(300)) {
//  pipeline_2P_PP(3, 4);
//}
//
//TEST_CASE("Pipeline.2P(PP).4L.1W" * doctest::timeout(300)) {
//  pipeline_2P_PP(4, 1);
//}
//
//TEST_CASE("Pipeline.2P(PP).4L.2W" * doctest::timeout(300)) {
//  pipeline_2P_PP(4, 2);
//}
//
//TEST_CASE("Pipeline.2P(PP).4L.3W" * doctest::timeout(300)) {
//  pipeline_2P_PP(4, 3);
//}
//
//TEST_CASE("Pipeline.2P(PP).4L.4W" * doctest::timeout(300)) {
//  pipeline_2P_PP(4, 4);
//}
*/

// ----------------------------------------------------------------------------
// three pipes (SSS), L lines, W workers
// ----------------------------------------------------------------------------
void pipeline_3P_SSS(size_t L, unsigned w) {

  tf::Executor executor(w);

  const size_t maxN = 100;

  std::vector<int> source(maxN);
  std::iota(source.begin(), source.end(), 0);
  std::vector<std::array<int, 3>> mybuffer(L);

  for(size_t N = 0; N <= maxN; N++) {

    tf::Taskflow taskflow;
      
    size_t j1 = 0, j2 = 0, j3 = 0;
    size_t cnt = 1;

    tf::Pipeline pl(L, 
      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j1, &mybuffer, L](auto& pf) mutable {
        if(j1 == N) {
          pf.stop();
          return;
        }
        REQUIRE(j1 == source[j1]);
        REQUIRE(pf.token() % L == pf.line());
        //*(pf.output()) = source[j1] + 1;
        mybuffer[pf.line()][pf.pipe()] = source[j1] + 1;
        j1++;
      }},

      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j2, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j2 < N);
        REQUIRE(source[j2] + 1 == mybuffer[pf.line()][pf.pipe() - 1]);
        REQUIRE(pf.token() % L == pf.line());

        //*(pf.output()) = source[j2] + 1;
        mybuffer[pf.line()][pf.pipe()] = source[j2] + 1;
        j2++;
      }},

      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j3, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j3 < N);
        REQUIRE(source[j3] + 1 == mybuffer[pf.line()][pf.pipe() - 1]);
        REQUIRE(pf.token() % L == pf.line());
        j3++;
      }}
    );
    
    auto pipeline = taskflow.composed_of(pl).name("module_of_pipeline");
    auto test = taskflow.emplace([&](){
      REQUIRE(j1 == N);
      REQUIRE(j2 == N);
      REQUIRE(j3 == N);
      REQUIRE(pl.num_tokens() == cnt * N);
    }).name("test");

    pipeline.precede(test);

    executor.run_n(taskflow, 3, [&]() mutable {
      j1 = j2 = j3 = 0;
      for(size_t i = 0; i < mybuffer.size(); ++i){
        for(size_t j = 0; j < mybuffer[0].size(); ++j){
          mybuffer[i][j] = 0;
        }
      }
      cnt++;
    }).get();
  }
}

// three pipes (SSS)
TEST_CASE("Pipeline.3P(SSS).1L.1W" * doctest::timeout(300)) {
  pipeline_3P_SSS(1, 1);
}

TEST_CASE("Pipeline.3P(SSS).1L.2W" * doctest::timeout(300)) {
  pipeline_3P_SSS(1, 2);
}

TEST_CASE("Pipeline.3P(SSS).1L.3W" * doctest::timeout(300)) {
  pipeline_3P_SSS(1, 3);
}

TEST_CASE("Pipeline.3P(SSS).1L.4W" * doctest::timeout(300)) {
  pipeline_3P_SSS(1, 4);
}

TEST_CASE("Pipeline.3P(SSS).2L.1W" * doctest::timeout(300)) {
  pipeline_3P_SSS(2, 1);
}

TEST_CASE("Pipeline.3P(SSS).2L.2W" * doctest::timeout(300)) {
  pipeline_3P_SSS(2, 2);
}

TEST_CASE("Pipeline.3P(SSS).2L.3W" * doctest::timeout(300)) {
  pipeline_3P_SSS(2, 3);
}

TEST_CASE("Pipeline.3P(SSS).2L.4W" * doctest::timeout(300)) {
  pipeline_3P_SSS(2, 4);
}

TEST_CASE("Pipeline.3P(SSS).3L.1W" * doctest::timeout(300)) {
  pipeline_3P_SSS(3, 1);
}

TEST_CASE("Pipeline.3P(SSS).3L.2W" * doctest::timeout(300)) {
  pipeline_3P_SSS(3, 2);
}

TEST_CASE("Pipeline.3P(SSS).3L.3W" * doctest::timeout(300)) {
  pipeline_3P_SSS(3, 3);
}

TEST_CASE("Pipeline.3P(SSS).3L.4W" * doctest::timeout(300)) {
  pipeline_3P_SSS(3, 4);
}

TEST_CASE("Pipeline.3P(SSS).4L.1W" * doctest::timeout(300)) {
  pipeline_3P_SSS(4, 1);
}

TEST_CASE("Pipeline.3P(SSS).4L.2W" * doctest::timeout(300)) {
  pipeline_3P_SSS(4, 2);
}

TEST_CASE("Pipeline.3P(SSS).4L.3W" * doctest::timeout(300)) {
  pipeline_3P_SSS(4, 3);
}

TEST_CASE("Pipeline.3P(SSS).4L.4W" * doctest::timeout(300)) {
  pipeline_3P_SSS(4, 4);
}



// ----------------------------------------------------------------------------
// three pipes (SSP), L lines, W workers
// ----------------------------------------------------------------------------
void pipeline_3P_SSP(size_t L, unsigned w) {

  tf::Executor executor(w);

  const size_t maxN = 100;

  std::vector<int> source(maxN);
  std::iota(source.begin(), source.end(), 0);
  std::vector<std::array<int, 3>> mybuffer(L);

  for(size_t N = 0; N <= maxN; N++) {

    tf::Taskflow taskflow;
      
    size_t j1 = 0, j2 = 0;
    std::atomic<size_t> j3 = 0;
    std::mutex mutex;
    std::vector<int> collection;
    size_t cnt = 1;

    tf::Pipeline pl(L, 
      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j1, &mybuffer, L](auto& pf) mutable {
        if(j1 == N) {
          pf.stop();
          return;
        }
        REQUIRE(j1 == source[j1]);
        REQUIRE(pf.token() % L == pf.line());
        //*(pf.output()) = source[j1] + 1;
        mybuffer[pf.line()][pf.pipe()] = source[j1] + 1;
        j1++;
      }},

      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j2, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j2 < N);
        REQUIRE(source[j2] + 1 == mybuffer[pf.line()][pf.pipe() - 1]);
        REQUIRE(pf.token() % L == pf.line());
        //*(pf.output()) = source[j2] + 1;
        mybuffer[pf.line()][pf.pipe()] = source[j2] + 1;
        j2++;
      }},

      tf::Pipe{tf::PipeType::PARALLEL, [N, &j3, &mutex, &collection, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j3++ < N);
        {
          std::scoped_lock<std::mutex> lock(mutex);
          REQUIRE(pf.token() % L == pf.line());  
          collection.push_back(mybuffer[pf.line()][pf.pipe() - 1]);
        }
      }}
    );
    
    auto pipeline = taskflow.composed_of(pl).name("module_of_pipeline");
    auto test = taskflow.emplace([&](){
      REQUIRE(j1 == N);
      REQUIRE(j2 == N);
      REQUIRE(j3 == N);
      REQUIRE(collection.size() == N);
      
      std::sort(collection.begin(), collection.end());
      for (size_t i = 0; i < N; ++i) {
        REQUIRE(collection[i] == i + 1);
      }
      REQUIRE(pl.num_tokens() == cnt * N);
    }).name("test");

    pipeline.precede(test);

    executor.run_n(taskflow, 3, [&](){
      j1 = j2 = j3 = 0;
      collection.clear();
      for(size_t i = 0; i < mybuffer.size(); ++i){
        for(size_t j = 0; j < mybuffer[0].size(); ++j){
          mybuffer[i][j] = 0;
        }
      }

      cnt++;
    }).get();
  }
}

// three pipes (SSP)
TEST_CASE("Pipeline.3P(SSP).1L.1W" * doctest::timeout(300)) {
  pipeline_3P_SSP(1, 1);
}

TEST_CASE("Pipeline.3P(SSP).1L.2W" * doctest::timeout(300)) {
  pipeline_3P_SSP(1, 2);
}

TEST_CASE("Pipeline.3P(SSP).1L.3W" * doctest::timeout(300)) {
  pipeline_3P_SSP(1, 3);
}

TEST_CASE("Pipeline.3P(SSP).1L.4W" * doctest::timeout(300)) {
  pipeline_3P_SSP(1, 4);
}

TEST_CASE("Pipeline.3P(SSP).2L.1W" * doctest::timeout(300)) {
  pipeline_3P_SSP(2, 1);
}

TEST_CASE("Pipeline.3P(SSP).2L.2W" * doctest::timeout(300)) {
  pipeline_3P_SSP(2, 2);
}

TEST_CASE("Pipeline.3P(SSP).2L.3W" * doctest::timeout(300)) {
  pipeline_3P_SSP(2, 3);
}

TEST_CASE("Pipeline.3P(SSP).2L.4W" * doctest::timeout(300)) {
  pipeline_3P_SSP(2, 4);
}

TEST_CASE("Pipeline.3P(SSP).3L.1W" * doctest::timeout(300)) {
  pipeline_3P_SSP(3, 1);
}

TEST_CASE("Pipeline.3P(SSP).3L.2W" * doctest::timeout(300)) {
  pipeline_3P_SSP(3, 2);
}

TEST_CASE("Pipeline.3P(SSP).3L.3W" * doctest::timeout(300)) {
  pipeline_3P_SSP(3, 3);
}

TEST_CASE("Pipeline.3P(SSP).3L.4W" * doctest::timeout(300)) {
  pipeline_3P_SSP(3, 4);
}

TEST_CASE("Pipeline.3P(SSP).4L.1W" * doctest::timeout(300)) {
  pipeline_3P_SSP(4, 1);
}

TEST_CASE("Pipeline.3P(SSP).4L.2W" * doctest::timeout(300)) {
  pipeline_3P_SSP(4, 2);
}

TEST_CASE("Pipeline.3P(SSP).4L.3W" * doctest::timeout(300)) {
  pipeline_3P_SSP(4, 3);
}

TEST_CASE("Pipeline.3P(SSP).4L.4W" * doctest::timeout(300)) {
  pipeline_3P_SSP(4, 4);
}



// ----------------------------------------------------------------------------
// three pipes (SPS), L lines, W workers
// ----------------------------------------------------------------------------
void pipeline_3P_SPS(size_t L, unsigned w) {

  tf::Executor executor(w);

  const size_t maxN = 100;

  std::vector<int> source(maxN);
  std::iota(source.begin(), source.end(), 0);
  std::vector<std::array<int, 3>> mybuffer(L);

  for(size_t N = 0; N <= maxN; N++) {

    tf::Taskflow taskflow;
      
    size_t j1 = 0, j3 = 0;
    std::atomic<size_t> j2 = 0;
    std::mutex mutex;
    std::vector<int> collection;
    size_t cnt = 1;

    tf::Pipeline pl(L, 
      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j1, &mybuffer, L](auto& pf) mutable {
        if(j1 == N) {
          pf.stop();
          return;
        }
        REQUIRE(j1 == source[j1]);
        REQUIRE(pf.token() % L == pf.line());
        //*(pf.output()) = source[j1] + 1;
        mybuffer[pf.line()][pf.pipe()] = source[j1] + 1;
        j1++;
      }},

      tf::Pipe{tf::PipeType::PARALLEL, [N, &j2, &mutex, &collection, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j2++ < N);
        //*(pf.output()) = *(pf.input()) + 1;
        {
          std::scoped_lock<std::mutex> lock(mutex);
          mybuffer[pf.line()][pf.pipe()] = mybuffer[pf.line()][pf.pipe() - 1] + 1;
          REQUIRE(pf.token() % L == pf.line());
          collection.push_back(mybuffer[pf.line()][pf.pipe() - 1]);
        }
      }},

      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j3, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j3 < N);
        REQUIRE(pf.token() % L == pf.line());
        REQUIRE(source[j3] + 2 == mybuffer[pf.line()][pf.pipe() - 1]);
        j3++;
      }}
    );
    
    auto pipeline = taskflow.composed_of(pl).name("module_of_pipeline");
    auto test = taskflow.emplace([&](){
      REQUIRE(j1 == N);
      REQUIRE(j2 == N);
      REQUIRE(j3 == N);
      REQUIRE(collection.size() == N);
      
      std::sort(collection.begin(), collection.end());
      for (size_t i = 0; i < N; ++i) {
        REQUIRE(collection[i] == i + 1);
      }
      REQUIRE(pl.num_tokens() == cnt * N);
      
    }).name("test");
   
    pipeline.precede(test);
    
    executor.run_n(taskflow, 3, [&]() mutable {
      j1 = j2 = j3 = 0;
      collection.clear();
      for(size_t i = 0; i < mybuffer.size(); ++i){
        for(size_t j = 0; j < mybuffer[0].size(); ++j){
          mybuffer[i][j] = 0;
        }
      }

      cnt++;
    }).get();
  }
}

// three pipes (SPS)
TEST_CASE("Pipeline.3P(SPS).1L.1W" * doctest::timeout(300)) {
  pipeline_3P_SPS(1, 1);
}

TEST_CASE("Pipeline.3P(SPS).1L.2W" * doctest::timeout(300)) {
  pipeline_3P_SPS(1, 2);
}

TEST_CASE("Pipeline.3P(SPS).1L.3W" * doctest::timeout(300)) {
  pipeline_3P_SPS(1, 3);
}

TEST_CASE("Pipeline.3P(SPS).1L.4W" * doctest::timeout(300)) {
  pipeline_3P_SPS(1, 4);
}

TEST_CASE("Pipeline.3P(SPS).2L.1W" * doctest::timeout(300)) {
  pipeline_3P_SPS(2, 1);
}

TEST_CASE("Pipeline.3P(SPS).2L.2W" * doctest::timeout(300)) {
  pipeline_3P_SPS(2, 2);
}

TEST_CASE("Pipeline.3P(SPS).2L.3W" * doctest::timeout(300)) {
  pipeline_3P_SPS(2, 3);
}

TEST_CASE("Pipeline.3P(SPS).2L.4W" * doctest::timeout(300)) {
  pipeline_3P_SPS(2, 4);
}

TEST_CASE("Pipeline.3P(SPS).3L.1W" * doctest::timeout(300)) {
  pipeline_3P_SPS(3, 1);
}

TEST_CASE("Pipeline.3P(SPS).3L.2W" * doctest::timeout(300)) {
  pipeline_3P_SPS(3, 2);
}

TEST_CASE("Pipeline.3P(SPS).3L.3W" * doctest::timeout(300)) {
  pipeline_3P_SPS(3, 3);
}

TEST_CASE("Pipeline.3P(SPS).3L.4W" * doctest::timeout(300)) {
  pipeline_3P_SPS(3, 4);
}

TEST_CASE("Pipeline.3P(SPS).4L.1W" * doctest::timeout(300)) {
  pipeline_3P_SPS(4, 1);
}

TEST_CASE("Pipeline.3P(SPS).4L.2W" * doctest::timeout(300)) {
  pipeline_3P_SPS(4, 2);
}

TEST_CASE("Pipeline.3P(SPS).4L.3W" * doctest::timeout(300)) {
  pipeline_3P_SPS(4, 3);
}

TEST_CASE("Pipeline.3P(SPS).4L.4W" * doctest::timeout(300)) {
  pipeline_3P_SPS(4, 4);
}


// ----------------------------------------------------------------------------
// three pipes (SPP), L lines, W workers
// ----------------------------------------------------------------------------


void pipeline_3P_SPP(size_t L, unsigned w) {

  tf::Executor executor(w);

  const size_t maxN = 100;

  std::vector<int> source(maxN);
  std::iota(source.begin(), source.end(), 0);
  std::vector<std::array<int, 3>> mybuffer(L);

  for(size_t N = 0; N <= maxN; N++) {

    tf::Taskflow taskflow;
      
    size_t j1 = 0;
    std::atomic<size_t> j2 = 0;
    std::atomic<size_t> j3 = 0;
    std::mutex mutex2;
    std::mutex mutex3;
    std::vector<int> collection2;
    std::vector<int> collection3;
    size_t cnt = 1;

    tf::Pipeline pl(L, 
      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j1, &mybuffer, L](auto& pf) mutable {
        if(j1 == N) {
          pf.stop();
          return;
        }
        REQUIRE(j1 == source[j1]);
        REQUIRE(pf.token() % L == pf.line());
        //*(pf.output()) = source[j1] + 1;
        mybuffer[pf.line()][pf.pipe()] = source[j1] + 1;
        j1++;
      }},

      tf::Pipe{tf::PipeType::PARALLEL, [N, &j2, &mutex2, &collection2, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j2++ < N);
        //*pf.output() = *pf.input() + 1;
        {
          std::scoped_lock<std::mutex> lock(mutex2);
          REQUIRE(pf.token() % L == pf.line());
          mybuffer[pf.line()][pf.pipe()] = mybuffer[pf.line()][pf.pipe() - 1] + 1;
          collection2.push_back(mybuffer[pf.line()][pf.pipe() - 1]);
        }
      }},

      tf::Pipe{tf::PipeType::PARALLEL, [N, &j3, &mutex3, &collection3, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j3++ < N);
        {
          std::scoped_lock<std::mutex> lock(mutex3);
          REQUIRE(pf.token() % L == pf.line());
          collection3.push_back(mybuffer[pf.line()][pf.pipe() - 1]);
        }
      }}
    );
    
    auto pipeline = taskflow.composed_of(pl).name("module_of_pipeline");
    auto test = taskflow.emplace([&](){
      REQUIRE(j1 == N);
      REQUIRE(j2 == N);
      REQUIRE(j3 == N);
      REQUIRE(collection2.size() == N);
      REQUIRE(collection3.size() == N);
      
      std::sort(collection2.begin(), collection2.end());
      std::sort(collection3.begin(), collection3.end());
      for (size_t i = 0; i < N; ++i) {
        REQUIRE(collection2[i] == i + 1);
        REQUIRE(collection3[i] == i + 2);
      }
      REQUIRE(pl.num_tokens() == cnt * N);
    }).name("test");

    pipeline.precede(test);

    executor.run_n(taskflow, 3, [&]() mutable {
      j1 = j2 = j3 = 0;
      collection2.clear();
      collection3.clear();
      for(size_t i = 0; i < mybuffer.size(); ++i){
        for(size_t j = 0; j < mybuffer[0].size(); ++j){
          mybuffer[i][j] = 0;
        }
      }

      cnt++;
    }).get();
  }
}

// three pipes (SPP)
TEST_CASE("Pipeline.3P(SPP).1L.1W" * doctest::timeout(300)) {
  pipeline_3P_SPP(1, 1);
}

TEST_CASE("Pipeline.3P(SPP).1L.2W" * doctest::timeout(300)) {
  pipeline_3P_SPP(1, 2);
}

TEST_CASE("Pipeline.3P(SPP).1L.3W" * doctest::timeout(300)) {
  pipeline_3P_SPP(1, 3);
}

TEST_CASE("Pipeline.3P(SPP).1L.4W" * doctest::timeout(300)) {
  pipeline_3P_SPP(1, 4);
}

TEST_CASE("Pipeline.3P(SPP).2L.1W" * doctest::timeout(300)) {
  pipeline_3P_SPP(2, 1);
}

TEST_CASE("Pipeline.3P(SPP).2L.2W" * doctest::timeout(300)) {
  pipeline_3P_SPP(2, 2);
}

TEST_CASE("Pipeline.3P(SPP).2L.3W" * doctest::timeout(300)) {
  pipeline_3P_SPP(2, 3);
}

TEST_CASE("Pipeline.3P(SPP).2L.4W" * doctest::timeout(300)) {
  pipeline_3P_SPP(2, 4);
}

TEST_CASE("Pipeline.3P(SPP).3L.1W" * doctest::timeout(300)) {
  pipeline_3P_SPP(3, 1);
}

TEST_CASE("Pipeline.3P(SPP).3L.2W" * doctest::timeout(300)) {
  pipeline_3P_SPP(3, 2);
}

TEST_CASE("Pipeline.3P(SPP).3L.3W" * doctest::timeout(300)) {
  pipeline_3P_SPP(3, 3);
}

TEST_CASE("Pipeline.3P(SPP).3L.4W" * doctest::timeout(300)) {
  pipeline_3P_SPP(3, 4);
}

TEST_CASE("Pipeline.3P(SPP).4L.1W" * doctest::timeout(300)) {
  pipeline_3P_SPP(4, 1);
}

TEST_CASE("Pipeline.3P(SPP).4L.2W" * doctest::timeout(300)) {
  pipeline_3P_SPP(4, 2);
}

TEST_CASE("Pipeline.3P(SPP).4L.3W" * doctest::timeout(300)) {
  pipeline_3P_SPP(4, 3);
}

TEST_CASE("Pipeline.3P(SPP).4L.4W" * doctest::timeout(300)) {
  pipeline_3P_SPP(4, 4);
}

/*
// ----------------------------------------------------------------------------
// three pipes (PSS), L lines, W workers
// ----------------------------------------------------------------------------
void pipeline_3P_PSS(size_t L, unsigned w) {

  tf::Executor executor(w);

  const size_t maxN = 100;

  std::vector<int> source(maxN);
  std::iota(source.begin(), source.end(), 0);

  for(size_t N=0; N<=maxN; N++) {

    tf::Taskflow taskflow;
      
    std::atomic<size_t> j1 = 0;
    size_t j2 = 0, j3 = 0;
    std::mutex mutex;
    std::vector<int> collection;

    tf::Pipeline pl(L, 
      tf::Pipe{tf::PipeType::PARALLEL, 
      [N, &source, &j1, &collection, &mutex](auto& pf) mutable {
        auto ticket = j1.fetch_add(1);
        
        if(ticket >= N) {
          pf.stop();
          return;
        }
        {
          std::scoped_lock<std::mutex> lock(mutex);
          collection.push_back(ticket);
          *(pf.output()) = *(pf.input()) + 1;
        }
      }},
      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j2](auto& pf) mutable {
        REQUIRE(j2 < N);
        REQUIRE(source[j2] + 1 == *(pf.input()));
        *(pf.output()) = source[j2] + 1;
        j2++;
      }},
      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j3](auto& pf) mutable {
        REQUIRE(j3 < N);
        REQUIRE(source[j3] + 1 == *(pf.input()));
        j3++;
      }}
    );
    
    auto pipeline = taskflow.composed_of(pl).name("module_of_pipeline");
    auto test = taskflow.emplace([&](){
      REQUIRE(j2 == N);
      REQUIRE(j3 == N);
      REQUIRE(collection.size() == N);
      std::sort(collection.begin(), collection.end());
      for (size_t i = 0; i < N; i++) {
        REQUIRE(collection[i] == i);
      }
    }).name("test");
    
    pipeline.precede(test);

    executor.run_n(taskflow, 3, [&]() mutable {
      j1 = j2 = j3 = 0;
      collection.clear();
    }).get();
    
    //executor.run(taskflow).wait();
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(collection.size() == N);
    //std::sort(collection.begin(), collection.end());
    //for (size_t i = 0; i < N; i++) {
    //  REQUIRE(collection[i] == i);
    //}
   
    //
    //j1 = j2 = j3 = 0;
    //collection.clear();
    //executor.run(taskflow).wait();
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(collection.size() == N);
    //std::sort(collection.begin(), collection.end());
    //for (size_t i = 0; i < N; i++) {
    //  REQUIRE(collection[i] == i);
    //}
    //
    //j1 = j2 = j3 = 0;
    //collection.clear();
    //executor.run(taskflow).wait();
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(collection.size() == N);
    //std::sort(collection.begin(), collection.end());
    //for (size_t i = 0; i < N; i++) {
    //  REQUIRE(collection[i] == i);
    //}
  }
}

// three pipes (PSS)
//TEST_CASE("Pipeline.3P(PSS).1L.1W" * doctest::timeout(300)) {
//  pipeline_3P_PSS(1, 1);
//}
//
//TEST_CASE("Pipeline.3P(PSS).1L.2W" * doctest::timeout(300)) {
//  pipeline_3P_PSS(1, 2);
//}
//
//TEST_CASE("Pipeline.3P(PSS).1L.3W" * doctest::timeout(300)) {
//  pipeline_3P_PSS(1, 3);
//}
//
//TEST_CASE("Pipeline.3P(PSS).1L.4W" * doctest::timeout(300)) {
//  pipeline_3P_PSS(1, 4);
//}
//
//TEST_CASE("Pipeline.3P(PSS).2L.1W" * doctest::timeout(300)) {
//  pipeline_3P_PSS(2, 1);
//}
//
//TEST_CASE("Pipeline.3P(PSS).2L.2W" * doctest::timeout(300)) {
//  pipeline_3P_PSS(2, 2);
//}
//
//TEST_CASE("Pipeline.3P(PSS).2L.3W" * doctest::timeout(300)) {
//  pipeline_3P_PSS(2, 3);
//}
//
//TEST_CASE("Pipeline.3P(PSS).2L.4W" * doctest::timeout(300)) {
//  pipeline_3P_PSS(2, 4);
//}
//
//TEST_CASE("Pipeline.3P(PSS).3L.1W" * doctest::timeout(300)) {
//  pipeline_3P_PSS(3, 1);
//}
//
//TEST_CASE("Pipeline.3P(PSS).3L.2W" * doctest::timeout(300)) {
//  pipeline_3P_PSS(3, 2);
//}
//
//TEST_CASE("Pipeline.3P(PSS).3L.3W" * doctest::timeout(300)) {
//  pipeline_3P_PSS(3, 3);
//}
//
//TEST_CASE("Pipeline.3P(PSS).3L.4W" * doctest::timeout(300)) {
//  pipeline_3P_PSS(3, 4);
//}
//
//TEST_CASE("Pipeline.3P(PSS).4L.1W" * doctest::timeout(300)) {
//  pipeline_3P_PSS(4, 1);
//}
//
//TEST_CASE("Pipeline.3P(PSS).4L.2W" * doctest::timeout(300)) {
//  pipeline_3P_PSS(4, 2);
//}
//
//TEST_CASE("Pipeline.3P(PSS).4L.3W" * doctest::timeout(300)) {
//  pipeline_3P_PSS(4, 3);
//}
//
//TEST_CASE("Pipeline.3P(PSS).4L.4W" * doctest::timeout(300)) {
//  pipeline_3P_PSS(4, 4);
//}


// ----------------------------------------------------------------------------
// three pipes (PSP), L lines, W workers
// ----------------------------------------------------------------------------
void pipeline_3P_PSP(size_t L, unsigned w) {

  tf::Executor executor(w);

  const size_t maxN = 100;

  std::vector<int> source(maxN);
  std::iota(source.begin(), source.end(), 0);

  for(size_t N = 0; N <= maxN; N++) {

    tf::Taskflow taskflow;
      
    std::atomic<size_t> j1 = 0;
    std::atomic<size_t> j3 = 0;
    size_t j2 = 0;
    std::mutex mutex1;
    std::mutex mutex3;
    std::vector<int> collection1;
    std::vector<int> collection3;

    tf::Pipeline pl(L, 
      tf::Pipe{tf::PipeType::PARALLEL, 
      [N, &source, &j1, &collection1, &mutex1](auto& pf) mutable {
        auto ticket = j1.fetch_add(1);
        
        if(ticket >= N) {
          pf.stop();
          return;
        }
        {
          std::scoped_lock<std::mutex> lock(mutex1);
          collection1.push_back(ticket);
          *(pf.output()) = source[ticket] + 1;
        }
      }},
      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j2](auto& pf) mutable {
        REQUIRE(j2 < N);
        REQUIRE(source[j2] + 1 == *(pf.input()));
        *(pf.output()) = source[j2] + 1;
        j2++;
      }},
      tf::Pipe{tf::PipeType::PARALLEL, 
      [N, &source, &j3, &mutex3, &collection3](auto& pf) mutable {
        REQUIRE(j3++ < N);
        {
          std::scoped_lock<std::mutex> lock(mutex3);
          collection3.push_back(*(pf.input()));
        }
      }}
    );
    
    auto pipeline = taskflow.composed_of(pl).name("module_of_pipeline");
    auto test = taskflow.emplace([&](){
      REQUIRE(j2 == N);
      REQUIRE(j3 == N);
      REQUIRE(collection1.size() == N);
      REQUIRE(collection3.size() == N);
      std::sort(collection1.begin(), collection1.end());
      std::sort(collection3.begin(), collection3.end());
      for (size_t i = 0; i < N; i++) {
        REQUIRE(collection1[i] == i);
        REQUIRE(collection3[i] == i + 2);
      }
    }).name("test");
    
    pipeline.precede(test);

    executor.run_n(taskflow, 3, [&]() mutable {
      j1 = j2 = j3 = 0;
      collection1.clear();
      collection3.clear();
    }).get();
    
    //executor.run(taskflow).wait();
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(collection1.size() == N);
    //REQUIRE(collection3.size() == N);
    //std::sort(collection1.begin(), collection1.end());
    //std::sort(collection3.begin(), collection3.end());
    //for (size_t i = 0; i < N; i++) {
    //  REQUIRE(collection1[i] == i);
    //  REQUIRE(collection3[i] == i + 2);
    //}
   
    //
    //j1 = j2 = j3 = 0;
    //collection1.clear();
    //collection3.clear();
    //executor.run(taskflow).wait();
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(collection1.size() == N);
    //REQUIRE(collection3.size() == N);
    //std::sort(collection1.begin(), collection1.end());
    //std::sort(collection3.begin(), collection3.end());
    //for (size_t i = 0; i < N; i++) {
    //  REQUIRE(collection1[i] == i);
    //  REQUIRE(collection3[i] == i + 2);
    //}
    //
    //j1 = j2 = j3 = 0;
    //collection1.clear();
    //collection3.clear();
    //executor.run(taskflow).wait();
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(collection1.size() == N);
    //REQUIRE(collection3.size() == N);
    //std::sort(collection1.begin(), collection1.end());
    //std::sort(collection3.begin(), collection3.end());
    //for (size_t i = 0; i < N; i++) {
    //  REQUIRE(collection1[i] == i);
    //  REQUIRE(collection3[i] == i + 2);
    //}
  }
}

// three pipes (PSP)
//TEST_CASE("Pipeline.3P(PSP).1L.1W" * doctest::timeout(300)) {
//  pipeline_3P_PSP(1, 1);
//}
//
//TEST_CASE("Pipeline.3P(PSP).1L.2W" * doctest::timeout(300)) {
//  pipeline_3P_PSP(1, 2);
//}
//
//TEST_CASE("Pipeline.3P(PSP).1L.3W" * doctest::timeout(300)) {
//  pipeline_3P_PSP(1, 3);
//}
//
//TEST_CASE("Pipeline.3P(PSP).1L.4W" * doctest::timeout(300)) {
//  pipeline_3P_PSP(1, 4);
//}
//
//TEST_CASE("Pipeline.3P(PSP).2L.1W" * doctest::timeout(300)) {
//  pipeline_3P_PSP(2, 1);
//}
//
//TEST_CASE("Pipeline.3P(PSP).2L.2W" * doctest::timeout(300)) {
//  pipeline_3P_PSP(2, 2);
//}
//
//TEST_CASE("Pipeline.3P(PSP).2L.3W" * doctest::timeout(300)) {
//  pipeline_3P_PSP(2, 3);
//}
//
//TEST_CASE("Pipeline.3P(PSP).2L.4W" * doctest::timeout(300)) {
//  pipeline_3P_PSP(2, 4);
//}
//
//TEST_CASE("Pipeline.3P(PSP).3L.1W" * doctest::timeout(300)) {
//  pipeline_3P_PSP(3, 1);
//}
//
//TEST_CASE("Pipeline.3P(PSP).3L.2W" * doctest::timeout(300)) {
//  pipeline_3P_PSP(3, 2);
//}
//
//TEST_CASE("Pipeline.3P(PSP).3L.3W" * doctest::timeout(300)) {
//  pipeline_3P_PSP(3, 3);
//}
//
//TEST_CASE("Pipeline.3P(PSP).3L.4W" * doctest::timeout(300)) {
//  pipeline_3P_PSP(3, 4);
//}
//
//TEST_CASE("Pipeline.3P(PSP).4L.1W" * doctest::timeout(300)) {
//  pipeline_3P_PSP(4, 1);
//}
//
//TEST_CASE("Pipeline.3P(PSP).4L.2W" * doctest::timeout(300)) {
//  pipeline_3P_PSP(4, 2);
//}
//
//TEST_CASE("Pipeline.3P(PSP).4L.3W" * doctest::timeout(300)) {
//  pipeline_3P_PSP(4, 3);
//}
//
//TEST_CASE("Pipeline.3P(PSP).4L.4W" * doctest::timeout(300)) {
//  pipeline_3P_PSP(4, 4);
//}


// ----------------------------------------------------------------------------
// three pipes (PPS), L lines, W workers
// ----------------------------------------------------------------------------


void pipeline_3P_PPS(size_t L, unsigned w) {

  tf::Executor executor(w);

  const size_t maxN = 100;

  std::vector<int> source(maxN);
  std::iota(source.begin(), source.end(), 0);

  for(size_t N = 0; N <= maxN; N++) {

    tf::Taskflow taskflow;
      
    std::atomic<size_t> j1 = 0;
    std::atomic<size_t> j2 = 0;
    size_t j3 = 0;
    std::mutex mutex1;
    std::mutex mutex2;
    std::vector<int> collection1;
    std::vector<int> collection2;

    tf::Pipeline pl(L, 
      tf::Pipe{tf::PipeType::PARALLEL, 
      [N, &source, &j1, &collection1, &mutex1](auto& pf) mutable {
        auto ticket = j1.fetch_add(1);
        
        if(ticket >= N) {
          pf.stop();
          return;
        }
        {
          std::scoped_lock<std::mutex> lock(mutex1);
          collection1.push_back(ticket);
          *(pf.output()) = source[ticket] + 1;
        }
      }},
      tf::Pipe{tf::PipeType::PARALLEL, 
      [N, &source, &j2, &mutex2, &collection2](auto& pf) mutable {
        REQUIRE(j2++ < N);
        {
          std::scoped_lock<std::mutex> lock(mutex2);
          collection2.push_back(*(pf.input()));
          *(pf.output()) = *(pf.input()) + 1;
        }
      }},
      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j3](auto& pf) mutable {
        REQUIRE(j3 < N);
        REQUIRE(source[j3] + 1 == *(pf.input()));
        j3++;
      }}
    );
    
    auto pipeline = taskflow.composed_of(pl).name("module_of_pipeline");
    auto test = taskflow.emplace([&](){
      REQUIRE(j2 == N);
      REQUIRE(j3 == N);
      REQUIRE(collection1.size() == N);
      REQUIRE(collection2.size() == N);
      std::sort(collection1.begin(), collection1.end());
      std::sort(collection2.begin(), collection2.end());
      for (size_t i = 0; i < N; i++) {
        REQUIRE(collection1[i] == i);
        REQUIRE(collection2[i] == i + 1);
      }
    }).name("test");
    
    pipeline.precede(test);

    executor.run_n(taskflow, 3, [&]() mutable {
      j1 = j2 = j3 = 0;
      collection1.clear();
      collection2.clear();
    }).get();
    
    //executor.run(taskflow).wait();
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(collection1.size() == N);
    //REQUIRE(collection2.size() == N);
    //std::sort(collection1.begin(), collection1.end());
    //std::sort(collection2.begin(), collection2.end());
    //for (size_t i = 0; i < N; i++) {
    //  REQUIRE(collection1[i] == i);
    //  REQUIRE(collection2[i] == i + 1);
    //}
   
    //
    //j1 = j2 = j3 = 0;
    //collection1.clear();
    //collection2.clear();
    //executor.run(taskflow).wait();
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(collection1.size() == N);
    //REQUIRE(collection2.size() == N);
    //std::sort(collection1.begin(), collection1.end());
    //std::sort(collection2.begin(), collection2.end());
    //for (size_t i = 0; i < N; i++) {
    //  REQUIRE(collection1[i] == i);
    //  REQUIRE(collection2[i] == i + 1);
    //}
    //
    //j1 = j2 = j3 = 0;
    //collection1.clear();
    //collection2.clear();
    //executor.run(taskflow).wait();
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(collection1.size() == N);
    //REQUIRE(collection2.size() == N);
    //std::sort(collection1.begin(), collection1.end());
    //std::sort(collection2.begin(), collection2.end());
    //for (size_t i = 0; i < N; i++) {
    //  REQUIRE(collection1[i] == i);
    //  REQUIRE(collection2[i] == i + 1);
    //}
  }
}

// three pipes (PPS)
//TEST_CASE("Pipeline.3P(PPS).1L.1W" * doctest::timeout(300)) {
//  pipeline_3P_PPS(1, 1);
//}
//
//TEST_CASE("Pipeline.3P(PPS).1L.2W" * doctest::timeout(300)) {
//  pipeline_3P_PPS(1, 2);
//}
//
//TEST_CASE("Pipeline.3P(PPS).1L.3W" * doctest::timeout(300)) {
//  pipeline_3P_PPS(1, 3);
//}
//
//TEST_CASE("Pipeline.3P(PPS).1L.4W" * doctest::timeout(300)) {
//  pipeline_3P_PPS(1, 4);
//}
//
//TEST_CASE("Pipeline.3P(PPS).2L.1W" * doctest::timeout(300)) {
//  pipeline_3P_PPS(2, 1);
//}
//
//TEST_CASE("Pipeline.3P(PPS).2L.2W" * doctest::timeout(300)) {
//  pipeline_3P_PPS(2, 2);
//}
//
//TEST_CASE("Pipeline.3P(PPS).2L.3W" * doctest::timeout(300)) {
//  pipeline_3P_PPS(2, 3);
//}
//
//TEST_CASE("Pipeline.3P(PPS).2L.4W" * doctest::timeout(300)) {
//  pipeline_3P_PPS(2, 4);
//}
//
//TEST_CASE("Pipeline.3P(PPS).3L.1W" * doctest::timeout(300)) {
//  pipeline_3P_PPS(3, 1);
//}
//
//TEST_CASE("Pipeline.3P(PPS).3L.2W" * doctest::timeout(300)) {
//  pipeline_3P_PPS(3, 2);
//}
//
//TEST_CASE("Pipeline.3P(PPS).3L.3W" * doctest::timeout(300)) {
//  pipeline_3P_PPS(3, 3);
//}
//
//TEST_CASE("Pipeline.3P(PPS).3L.4W" * doctest::timeout(300)) {
//  pipeline_3P_PPS(3, 4);
//}
//
//TEST_CASE("Pipeline.3P(PPS).4L.1W" * doctest::timeout(300)) {
//  pipeline_3P_PPS(4, 1);
//}
//
//TEST_CASE("Pipeline.3P(PPS).4L.2W" * doctest::timeout(300)) {
//  pipeline_3P_PPS(4, 2);
//}
//
//TEST_CASE("Pipeline.3P(PPS).4L.3W" * doctest::timeout(300)) {
//  pipeline_3P_PPS(4, 3);
//}
//
//TEST_CASE("Pipeline.3P(PPS).4L.4W" * doctest::timeout(300)) {
//  pipeline_3P_PPS(4, 4);
//}


// ----------------------------------------------------------------------------
// three pipes (PPP), L lines, W workers
// ----------------------------------------------------------------------------


void pipeline_3P_PPP(size_t L, unsigned w) {

  tf::Executor executor(w);

  const size_t maxN = 100;

  std::vector<int> source(maxN);
  std::iota(source.begin(), source.end(), 0);

  for(size_t N = 0; N <= maxN; N++) {

    tf::Taskflow taskflow;
      
    std::atomic<size_t> j1 = 0;
    std::atomic<size_t> j2 = 0;
    std::atomic<size_t> j3 = 0;
    std::mutex mutex1;
    std::mutex mutex2;
    std::mutex mutex3;
    std::vector<int> collection1;
    std::vector<int> collection2;
    std::vector<int> collection3;

    tf::Pipeline pl(L, 
      tf::Pipe{tf::PipeType::PARALLEL, 
      [N, &source, &j1, &collection1, &mutex1](auto& pf) mutable {
        auto ticket = j1.fetch_add(1);
        
        if(ticket >= N) {
          pf.stop();
          return;
        }
        {
          std::scoped_lock<std::mutex> lock(mutex1);
          collection1.push_back(ticket);
          *(pf.output()) = source[ticket] + 1;
        }
      }},
      tf::Pipe{tf::PipeType::PARALLEL, 
      [N, &source, &j2, &mutex2, &collection2](auto& pf) mutable {
        REQUIRE(j2++ < N);
        {
          std::scoped_lock<std::mutex> lock(mutex2);
          collection2.push_back(*(pf.input()));
          *(pf.output()) = *(pf.input()) + 1;
        }
      }},
      tf::Pipe{tf::PipeType::PARALLEL, 
      [N, &source, &j3, &mutex3, &collection3](auto& pf) mutable {
        REQUIRE(j3++ < N);
        {
          std::scoped_lock<std::mutex> lock(mutex3);
          collection3.push_back(*(pf.input()));
        }
      }}
    );
    
    auto pipeline = taskflow.composed_of(pl).name("module_of_pipeline");
    auto test = taskflow.emplace([&](){
      REQUIRE(j2 == N);
      REQUIRE(j3 == N);
      REQUIRE(collection1.size() == N);
      REQUIRE(collection2.size() == N);
      REQUIRE(collection3.size() == N);
      std::sort(collection1.begin(), collection1.end());
      std::sort(collection2.begin(), collection2.end());
      std::sort(collection3.begin(), collection3.end());
      for (size_t i = 0; i < N; i++) {
        REQUIRE(collection1[i] == i);
        REQUIRE(collection2[i] == i + 1);
        REQUIRE(collection3[i] == i + 2);
      }
    }).name("test");
    
    pipeline.precede(test);

    executor.run_n(taskflow, 3, [&](){
      j1 = j2 = j3 = 0;
      collection1.clear();
      collection2.clear();
      collection3.clear();
    }).get();
    
    //executor.run(taskflow).wait();
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(collection1.size() == N);
    //REQUIRE(collection2.size() == N);
    //REQUIRE(collection3.size() == N);
    //std::sort(collection1.begin(), collection1.end());
    //std::sort(collection2.begin(), collection2.end());
    //std::sort(collection3.begin(), collection3.end());
    //for (size_t i = 0; i < N; i++) {
    //  REQUIRE(collection1[i] == i);
    //  REQUIRE(collection2[i] == i + 1);
    //  REQUIRE(collection3[i] == i + 2);
    //}
   
    //
    //j1 = j2 = j3 = 0;
    //collection1.clear();
    //collection2.clear();
    //collection3.clear();
    //executor.run(taskflow).wait();
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(collection1.size() == N);
    //REQUIRE(collection2.size() == N);
    //REQUIRE(collection3.size() == N);
    //std::sort(collection1.begin(), collection1.end());
    //std::sort(collection2.begin(), collection2.end());
    //std::sort(collection3.begin(), collection3.end());
    //for (size_t i = 0; i < N; i++) {
    //  REQUIRE(collection1[i] == i);
    //  REQUIRE(collection2[i] == i + 1);
    //  REQUIRE(collection3[i] == i + 2);
    //}
    //
    //j1 = j2 = j3 = 0;
    //collection1.clear();
    //collection2.clear();
    //collection3.clear();
    //executor.run(taskflow).wait();
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(collection1.size() == N);
    //REQUIRE(collection2.size() == N);
    //REQUIRE(collection3.size() == N);
    //std::sort(collection1.begin(), collection1.end());
    //std::sort(collection2.begin(), collection2.end());
    //std::sort(collection3.begin(), collection3.end());
    //for (size_t i = 0; i < N; i++) {
    //  REQUIRE(collection1[i] == i);
    //  REQUIRE(collection2[i] == i + 1);
    //  REQUIRE(collection3[i] == i + 2);
    //}
  }
}

// three pipes (PPP)
//TEST_CASE("Pipeline.3P(PPP).1L.1W" * doctest::timeout(300)) {
//  pipeline_3P_PPP(1, 1);
//}
//
//TEST_CASE("Pipeline.3P(PPP).1L.2W" * doctest::timeout(300)) {
//  pipeline_3P_PPP(1, 2);
//}
//
//TEST_CASE("Pipeline.3P(PPP).1L.3W" * doctest::timeout(300)) {
//  pipeline_3P_PPP(1, 3);
//}
//
//TEST_CASE("Pipeline.3P(PPP).1L.4W" * doctest::timeout(300)) {
//  pipeline_3P_PPP(1, 4);
//}
//
//TEST_CASE("Pipeline.3P(PPP).2L.1W" * doctest::timeout(300)) {
//  pipeline_3P_PPP(2, 1);
//}
//
//TEST_CASE("Pipeline.3P(PPP).2L.2W" * doctest::timeout(300)) {
//  pipeline_3P_PPP(2, 2);
//}
//
//TEST_CASE("Pipeline.3P(PPP).2L.3W" * doctest::timeout(300)) {
//  pipeline_3P_PPP(2, 3);
//}
//
//TEST_CASE("Pipeline.3P(PPP).2L.4W" * doctest::timeout(300)) {
//  pipeline_3P_PPP(2, 4);
//}
//
//TEST_CASE("Pipeline.3P(PPP).3L.1W" * doctest::timeout(300)) {
//  pipeline_3P_PPP(3, 1);
//}
//
//TEST_CASE("Pipeline.3P(PPP).3L.2W" * doctest::timeout(300)) {
//  pipeline_3P_PPP(3, 2);
//}
//
//TEST_CASE("Pipeline.3P(PPP).3L.3W" * doctest::timeout(300)) {
//  pipeline_3P_PPP(3, 3);
//}
//
//TEST_CASE("Pipeline.3P(PPP).3L.4W" * doctest::timeout(300)) {
//  pipeline_3P_PPP(3, 4);
//}
//
//TEST_CASE("Pipeline.3P(PPP).4L.1W" * doctest::timeout(300)) {
//  pipeline_3P_PPP(4, 1);
//}
//
//TEST_CASE("Pipeline.3P(PPP).4L.2W" * doctest::timeout(300)) {
//  pipeline_3P_PPP(4, 2);
//}
//
//TEST_CASE("Pipeline.3P(PPP).4L.3W" * doctest::timeout(300)) {
//  pipeline_3P_PPP(4, 3);
//}
//
//TEST_CASE("Pipeline.3P(PPP).4L.4W" * doctest::timeout(300)) {
//  pipeline_3P_PPP(4, 4);
//}


// ----------------------------------------------------------------------------
// four pipes (SSSS), L lines, W workers
// ----------------------------------------------------------------------------


void pipeline_4P_SSSS(size_t L, unsigned w) {

  tf::Executor executor(w);

  const size_t maxN = 100;

  std::vector<int> source(maxN);
  std::iota(source.begin(), source.end(), 0);
  std::vector<std::array<int, 4>> mybuffer(L);

  for(size_t N = 0; N <= maxN; N++) {

    tf::Taskflow taskflow;
      
    size_t j1 = 0, j2 = 0, j3 = 0, j4 = 0;
    size_t cnt = 1;

    tf::Pipeline pl(L, 
      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j1, &mybuffer, L](auto& pf) mutable {
        if(j1 == N) {
          pf.stop();
          return;
        }
        REQUIRE(j1 == source[j1]);
        REQUIRE(pf.token() % L == pf.line());
        // *(pf.output()) = source[j1] + 1;
        mybuffer[pf.line()][pf.pipe()] = source[j1] + 1;
        j1++;
      }},

      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j2, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j2 < N);
        REQUIRE(pf.token() % L == pf.line());
        REQUIRE(source[j2] + 1 == mybuffer[pf.line()][pf.pipe() - 1]);
        // REQUIRE(source[j2] + 1 == *(pf.input()));
        // *(pf.output()) = source[j2] + 1;
        mybuffer[pf.line()][pf.pipe()] = source[j2] + 1;
        j2++;
      }},

      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j3, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j3 < N);
        REQUIRE(pf.token() % L == pf.line());
        // REQUIRE(source[j3] + 1 == *(pf.input()));
        REQUIRE(source[j3] + 1 == mybuffer[pf.line()][pf.pipe() - 1]);
        // *(pf.output()) = source[j3] + 1;
        mybuffer[pf.line()][pf.pipe()] = source[j3] + 1;
        j3++;
      }},

      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j4, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j4 < N);
        REQUIRE(pf.token() % L == pf.line());
        // REQUIRE(source[j4] + 1 == *(pf.input()));
        REQUIRE(source[j4] + 1 == mybuffer[pf.line()][pf.pipe() - 1]);
        j4++;
      }}
    );
    
    tf::Task pipeline = taskflow.composed_of(pl).name("module_of_pipeline");
    tf::Task test = taskflow.emplace([&](){
      REQUIRE(j1 == N);
      REQUIRE(j2 == N);
      REQUIRE(j3 == N);
      REQUIRE(j4 == N);
      REQUIRE(pl.num_tokens() == cnt * N);
    }).name("test");
    
    pipeline.precede(test);
    
    //taskflow.dump(std::cout); 
    executor.run_n(taskflow, 3, [&]() mutable {
      j1 = j2 = j3 = j4 = 0;
      for(size_t i = 0; i < mybuffer.size(); ++i){
        for(size_t j = 0; j < mybuffer[0].size(); ++j){
          mybuffer[i][j] = 0;
        }
      }
      cnt++;
    }).get();
    
    //executor.run(taskflow).wait();
    //REQUIRE(j1 == N);
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(j4 == N);
    //REQUIRE(pl.num_tokens() == N);
    //
    //j1 = j2 = j3 = j4 = 0;
    //for(size_t i = 0; i < mybuffer.size(); ++i){
    //  for(size_t j = 0; j < mybuffer[0].size(); ++j){
    //    mybuffer[i][j] = 0;
    //  }
    //}
    //executor.run(taskflow).wait();
    //REQUIRE(j1 == N);
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(j4 == N);
    //REQUIRE(pl.num_tokens() == 2 * N);
    //
    //j1 = j2 = j3 = j4 = 0;
    //for(size_t i = 0; i < mybuffer.size(); ++i){
    //  for(size_t j = 0; j < mybuffer[0].size(); ++j){
    //    mybuffer[i][j] = 0;
    //  }
    //}
    //executor.run(taskflow).wait();
    //REQUIRE(j1 == N);
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(j4 == N);
    //REQUIRE(pl.num_tokens() == 3 * N);
  }
}

// four pipes (SSSS)
TEST_CASE("Pipeline.4P(SSSS).1L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(1, 1);
}

TEST_CASE("Pipeline.4P(SSSS).1L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(1, 2);
}

TEST_CASE("Pipeline.4P(SSSS).1L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(1, 3);
}

TEST_CASE("Pipeline.4P(SSSS).1L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(1, 4);
}

TEST_CASE("Pipeline.4P(SSSS).1L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(1, 5);
}

TEST_CASE("Pipeline.4P(SSSS).1L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(1, 6);
}

TEST_CASE("Pipeline.4P(SSSS).1L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(1, 7);
}

TEST_CASE("Pipeline.4P(SSSS).1L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(1, 8);
}

TEST_CASE("Pipeline.4P(SSSS).2L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(2, 1);
}

TEST_CASE("Pipeline.4P(SSSS).2L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(2, 2);
}

TEST_CASE("Pipeline.4P(SSSS).2L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(2, 3);
}

TEST_CASE("Pipeline.4P(SSSS).2L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(2, 4);
}

TEST_CASE("Pipeline.4P(SSSS).2L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(2, 5);
}

TEST_CASE("Pipeline.4P(SSSS).2L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(2, 6);
}

TEST_CASE("Pipeline.4P(SSSS).2L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(2, 7);
}

TEST_CASE("Pipeline.4P(SSSS).2L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(2, 8);
}

TEST_CASE("Pipeline.4P(SSSS).3L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(3, 1);
}

TEST_CASE("Pipeline.4P(SSSS).3L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(3, 2);
}

TEST_CASE("Pipeline.4P(SSSS).3L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(3, 3);
}

TEST_CASE("Pipeline.4P(SSSS).3L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(3, 4);
}

TEST_CASE("Pipeline.4P(SSSS).3L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(3, 5);
}

TEST_CASE("Pipeline.4P(SSSS).3L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(3, 6);
}

TEST_CASE("Pipeline.4P(SSSS).3L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(3, 7);
}

TEST_CASE("Pipeline.4P(SSSS).3L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(3, 8);
}

TEST_CASE("Pipeline.4P(SSSS).4L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(4, 1);
}

TEST_CASE("Pipeline.4P(SSSS).4L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(4, 2);
}

TEST_CASE("Pipeline.4P(SSSS).4L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(4, 3);
}

TEST_CASE("Pipeline.4P(SSSS).4L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(4, 4);
}

TEST_CASE("Pipeline.4P(SSSS).4L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(4, 5);
}

TEST_CASE("Pipeline.4P(SSSS).4L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(4, 6);
}

TEST_CASE("Pipeline.4P(SSSS).4L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(4, 7);
}

TEST_CASE("Pipeline.4P(SSSS).4L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(4, 8);
}

TEST_CASE("Pipeline.4P(SSSS).5L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(5, 1);
}

TEST_CASE("Pipeline.4P(SSSS).5L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(5, 2);
}

TEST_CASE("Pipeline.4P(SSSS).5L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(5, 3);
}

TEST_CASE("Pipeline.4P(SSSS).5L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(5, 4);
}

TEST_CASE("Pipeline.4P(SSSS).5L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(5, 5);
}

TEST_CASE("Pipeline.4P(SSSS).5L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(5, 6);
}

TEST_CASE("Pipeline.4P(SSSS).5L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(5, 7);
}

TEST_CASE("Pipeline.4P(SSSS).5L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(5, 8);
}

TEST_CASE("Pipeline.4P(SSSS).6L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(6, 1);
}

TEST_CASE("Pipeline.4P(SSSS).6L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(6, 2);
}

TEST_CASE("Pipeline.4P(SSSS).6L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(6, 3);
}

TEST_CASE("Pipeline.4P(SSSS).6L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(6, 4);
}

TEST_CASE("Pipeline.4P(SSSS).6L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(6, 5);
}

TEST_CASE("Pipeline.4P(SSSS).6L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(6, 6);
}

TEST_CASE("Pipeline.4P(SSSS).6L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(6, 7);
}

TEST_CASE("Pipeline.4P(SSSS).6L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(6, 8);
}

TEST_CASE("Pipeline.4P(SSSS).7L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(7, 1);
}

TEST_CASE("Pipeline.4P(SSSS).7L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(7, 2);
}

TEST_CASE("Pipeline.4P(SSSS).7L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(7, 3);
}

TEST_CASE("Pipeline.4P(SSSS).7L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(7, 4);
}

TEST_CASE("Pipeline.4P(SSSS).7L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(7, 5);
}

TEST_CASE("Pipeline.4P(SSSS).7L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(7, 6);
}

TEST_CASE("Pipeline.4P(SSSS).7L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(7, 7);
}

TEST_CASE("Pipeline.4P(SSSS).7L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(7, 8);
}

TEST_CASE("Pipeline.4P(SSSS).8L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(8, 1);
}

TEST_CASE("Pipeline.4P(SSSS).8L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(8, 2);
}

TEST_CASE("Pipeline.4P(SSSS).8L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(8, 3);
}

TEST_CASE("Pipeline.4P(SSSS).8L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(8, 4);
}

TEST_CASE("Pipeline.4P(SSSS).8L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(8, 5);
}

TEST_CASE("Pipeline.4P(SSSS).8L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(8, 6);
}

TEST_CASE("Pipeline.4P(SSSS).8L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(8, 7);
}

TEST_CASE("Pipeline.4P(SSSS).8L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSSS(8, 8);
}

// ----------------------------------------------------------------------------
// four pipes (SSSP), L lines, W workers
// ----------------------------------------------------------------------------


void pipeline_4P_SSSP(size_t L, unsigned w) {

  tf::Executor executor(w);

  const size_t maxN = 100;

  std::vector<int> source(maxN);
  std::iota(source.begin(), source.end(), 0);
  std::vector<std::array<int, 4>> mybuffer(L);

  for(size_t N = 0; N <= maxN; N++) {

    tf::Taskflow taskflow;
      
    size_t j1 = 0, j2 = 0, j3 = 0;
    std::atomic<size_t> j4 = 0;
    std::mutex mutex;
    std::vector<int> collection;
    size_t cnt = 1;

    tf::Pipeline pl(L, 
      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j1, &mybuffer, L](auto& pf) mutable {
        if(j1 == N) {
          pf.stop();
          return;
        }
        REQUIRE(j1 == source[j1]);
        REQUIRE(pf.token() % L == pf.line());
        // *(pf.output()) = source[j1] + 1;
        mybuffer[pf.line()][pf.pipe()] = source[j1] + 1;
        j1++;
      }},

      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j2, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j2 < N);
        REQUIRE(pf.token() % L == pf.line());
        REQUIRE(source[j2] + 1 == mybuffer[pf.line()][pf.pipe() - 1]);
        //REQUIRE(source[j2] + 1 == *(pf.input()));
        // *(pf.output()) = source[j2] + 1;
        mybuffer[pf.line()][pf.pipe()] = source[j2] + 1;
        j2++;
      }},

      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j3, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j3 < N);
        REQUIRE(pf.token() % L == pf.line());
        REQUIRE(source[j3] + 1 == mybuffer[pf.line()][pf.pipe() - 1]);
        //REQUIRE(source[j3] + 1 == *(pf.input()));
        // *(pf.output()) = source[j3] + 1;
        mybuffer[pf.line()][pf.pipe()] = source[j3] + 1;
        j3++;
      }},

      tf::Pipe{tf::PipeType::PARALLEL, [N, &j4, &mutex, &collection, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j4++ < N);
        {
          std::scoped_lock<std::mutex> lock(mutex);
          REQUIRE(pf.token() % L == pf.line());
          //collection.push_back(*pf.input());
          collection.push_back(mybuffer[pf.line()][pf.pipe() - 1]);
        }
      }}
    );
    
    auto pipeline = taskflow.composed_of(pl).name("module_of_pipeline");
    auto test = taskflow.emplace([&](){
      REQUIRE(j1 == N);
      REQUIRE(j2 == N);
      REQUIRE(j3 == N);
      REQUIRE(j4 == N);
      REQUIRE(collection.size() == N);
      std::sort(collection.begin(), collection.end());
      for (size_t i = 0; i < N; ++i) {
        REQUIRE(collection[i] == i + 1);
      }
      REQUIRE(pl.num_tokens() == N);
    }).name("tst");
    
    pipeline.precede(test);

    executor.run_n(taskflow, 3, [&]() mutable {
      j1 = j2 = j3 = j4 = 0;
      collection.clear();
      for(size_t i = 0; i < mybuffer.size(); ++i){
        for(size_t j = 0; j < mybuffer[0].size(); ++j){
          mybuffer[i][j] = 0;
        }
      }

      cnt++;
    }).get();
    
    
    //executor.run(taskflow).wait();
    //REQUIRE(j1 == N);
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(j4 == N);
    //REQUIRE(collection.size() == N);
    //std::sort(collection.begin(), collection.end());
    //for (size_t i = 0; i < N; ++i) {
    //  REQUIRE(collection[i] == i + 1);
    //}
    //REQUIRE(pl.num_tokens() == N);
    //
    //j1 = j2 = j3 = j4 = 0;
    //collection.clear();
    //for(size_t i = 0; i < mybuffer.size(); ++i){
    //  for(size_t j = 0; j < mybuffer[0].size(); ++j){
    //    mybuffer[i][j] = 0;
    //  }
    //}
    //executor.run(taskflow).wait();
    //REQUIRE(j1 == N);
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(j4 == N);
    //REQUIRE(collection.size() == N);
    //std::sort(collection.begin(), collection.end());
    //for (size_t i = 0; i < N; ++i) {
    //  REQUIRE(collection[i] == i + 1);
    //}
    //REQUIRE(pl.num_tokens() == 2 * N);
    //
    //j1 = j2 = j3 = j4 = 0;
    //collection.clear();
    //for(size_t i = 0; i < mybuffer.size(); ++i){
    //  for(size_t j = 0; j < mybuffer[0].size(); ++j){
    //    mybuffer[i][j] = 0;
    //  }
    //}
    //executor.run(taskflow).wait();
    //REQUIRE(j1 == N);
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(j4 == N);
    //REQUIRE(collection.size() == N);
    //std::sort(collection.begin(), collection.end());
    //for (size_t i = 0; i < N; ++i) {
    //  REQUIRE(collection[i] == i + 1);
    //}
    //REQUIRE(pl.num_tokens() == 3 * N);
  }
}

// four pipes (SSSP)
TEST_CASE("Pipeline.4P(SSSP).1L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(1, 1);
}

TEST_CASE("Pipeline.4P(SSSP).1L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(1, 2);
}

TEST_CASE("Pipeline.4P(SSSP).1L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(1, 3);
}

TEST_CASE("Pipeline.4P(SSSP).1L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(1, 4);
}

TEST_CASE("Pipeline.4P(SSSP).1L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(1, 5);
}

TEST_CASE("Pipeline.4P(SSSP).1L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(1, 6);
}

TEST_CASE("Pipeline.4P(SSSP).1L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(1, 7);
}

TEST_CASE("Pipeline.4P(SSSP).1L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(1, 8);
}

TEST_CASE("Pipeline.4P(SSSP).2L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(2, 1);
}

TEST_CASE("Pipeline.4P(SSSP).2L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(2, 2);
}

TEST_CASE("Pipeline.4P(SSSP).2L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(2, 3);
}

TEST_CASE("Pipeline.4P(SSSP).2L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(2, 4);
}

TEST_CASE("Pipeline.4P(SSSP).2L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(2, 5);
}

TEST_CASE("Pipeline.4P(SSSP).2L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(2, 6);
}

TEST_CASE("Pipeline.4P(SSSP).2L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(2, 7);
}

TEST_CASE("Pipeline.4P(SSSP).2L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(2, 8);
}

TEST_CASE("Pipeline.4P(SSSP).3L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(3, 1);
}

TEST_CASE("Pipeline.4P(SSSP).3L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(3, 2);
}

TEST_CASE("Pipeline.4P(SSSP).3L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(3, 3);
}

TEST_CASE("Pipeline.4P(SSSP).3L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(3, 4);
}

TEST_CASE("Pipeline.4P(SSSP).3L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(3, 5);
}

TEST_CASE("Pipeline.4P(SSSP).3L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(3, 6);
}

TEST_CASE("Pipeline.4P(SSSP).3L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(3, 7);
}

TEST_CASE("Pipeline.4P(SSSP).3L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(3, 8);
}

TEST_CASE("Pipeline.4P(SSSP).4L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(4, 1);
}

TEST_CASE("Pipeline.4P(SSSP).4L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(4, 2);
}

TEST_CASE("Pipeline.4P(SSSP).4L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(4, 3);
}

TEST_CASE("Pipeline.4P(SSSP).4L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(4, 4);
}

TEST_CASE("Pipeline.4P(SSSP).4L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(4, 5);
}

TEST_CASE("Pipeline.4P(SSSP).4L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(4, 6);
}

TEST_CASE("Pipeline.4P(SSSP).4L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(4, 7);
}

TEST_CASE("Pipeline.4P(SSSP).4L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(4, 8);
}

TEST_CASE("Pipeline.4P(SSSP).5L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(5, 1);
}

TEST_CASE("Pipeline.4P(SSSP).5L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(5, 2);
}

TEST_CASE("Pipeline.4P(SSSP).5L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(5, 3);
}

TEST_CASE("Pipeline.4P(SSSP).5L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(5, 4);
}

TEST_CASE("Pipeline.4P(SSSP).5L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(5, 5);
}

TEST_CASE("Pipeline.4P(SSSP).5L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(5, 6);
}

TEST_CASE("Pipeline.4P(SSSP).5L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(5, 7);
}

TEST_CASE("Pipeline.4P(SSSP).5L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(5, 8);
}

TEST_CASE("Pipeline.4P(SSSP).6L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(6, 1);
}

TEST_CASE("Pipeline.4P(SSSP).6L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(6, 2);
}

TEST_CASE("Pipeline.4P(SSSP).6L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(6, 3);
}

TEST_CASE("Pipeline.4P(SSSP).6L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(6, 4);
}

TEST_CASE("Pipeline.4P(SSSP).6L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(6, 5);
}

TEST_CASE("Pipeline.4P(SSSP).6L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(6, 6);
}

TEST_CASE("Pipeline.4P(SSSP).6L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(6, 7);
}

TEST_CASE("Pipeline.4P(SSSP).6L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(6, 8);
}

TEST_CASE("Pipeline.4P(SSSP).7L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(7, 1);
}

TEST_CASE("Pipeline.4P(SSSP).7L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(7, 2);
}

TEST_CASE("Pipeline.4P(SSSP).7L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(7, 3);
}

TEST_CASE("Pipeline.4P(SSSP).7L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(7, 4);
}

TEST_CASE("Pipeline.4P(SSSP).7L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(7, 5);
}

TEST_CASE("Pipeline.4P(SSSP).7L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(7, 6);
}

TEST_CASE("Pipeline.4P(SSSP).7L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(7, 7);
}

TEST_CASE("Pipeline.4P(SSSP).7L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(7, 8);
}

TEST_CASE("Pipeline.4P(SSSP).8L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(8, 1);
}

TEST_CASE("Pipeline.4P(SSSP).8L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(8, 2);
}

TEST_CASE("Pipeline.4P(SSSP).8L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(8, 3);
}

TEST_CASE("Pipeline.4P(SSSP).8L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(8, 4);
}

TEST_CASE("Pipeline.4P(SSSP).8L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(8, 5);
}

TEST_CASE("Pipeline.4P(SSSP).8L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(8, 6);
}

TEST_CASE("Pipeline.4P(SSSP).8L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(8, 7);
}

TEST_CASE("Pipeline.4P(SSSP).8L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSSP(8, 8);
}

// ----------------------------------------------------------------------------
// four pipes (SSPS), L lines, W workers
// ----------------------------------------------------------------------------

void pipeline_4P_SSPS(size_t L, unsigned w) {

  tf::Executor executor(w);

  const size_t maxN = 100;

  std::vector<int> source(maxN);
  std::iota(source.begin(), source.end(), 0);
  std::vector<std::array<int, 4>> mybuffer(L);

  for(size_t N = 0; N <= maxN; N++) {

    tf::Taskflow taskflow;
      
    size_t j1 = 0, j2 = 0, j4 = 0;
    std::atomic<size_t> j3 = 0;
    std::mutex mutex;
    std::vector<int> collection;
    size_t cnt = 1;

    tf::Pipeline pl(L, 
      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j1, &mybuffer, L](auto& pf) mutable {
        if(j1 == N) {
          pf.stop();
          return;
        }
        REQUIRE(j1 == source[j1]);
        REQUIRE(pf.token() % L == pf.line());
        // *(pf.output()) = source[j1] + 1;
        mybuffer[pf.line()][pf.pipe()] = source[j1] + 1;
        j1++;
      }},

      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j2, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j2 < N);
        REQUIRE(pf.token() % L == pf.line());
        REQUIRE(source[j2] + 1 == mybuffer[pf.line()][pf.pipe() - 1]);
        // REQUIRE(source[j2] + 1 == *(pf.input()));
        // *pf.output() = source[j2] + 1;
        mybuffer[pf.line()][pf.pipe()] = source[j2] + 1;
        j2++;
      }},

      tf::Pipe{tf::PipeType::PARALLEL, [N, &j3, &mutex, &collection, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j3++ < N);
        // *(pf.output()) = *(pf.input()) + 1;
        {
          std::scoped_lock<std::mutex> lock(mutex);
          REQUIRE(pf.token() % L == pf.line());
          collection.push_back(mybuffer[pf.line()][pf.pipe() - 1]);
          mybuffer[pf.line()][pf.pipe()] = mybuffer[pf.line()][pf.pipe() - 1] + 1;
          //collection.push_back(*pf.input());
        }
      }},

      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j4, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j4 < N);
        REQUIRE(pf.token() % L == pf.line());
        // REQUIRE(source[j4] + 2 == *(pf.input()));
        REQUIRE(source[j4] + 2 == mybuffer[pf.line()][pf.pipe() - 1]);
        j4++;
      }}
    );
    
    auto pipeline = taskflow.composed_of(pl).name("module_of_pipeline");
    auto test = taskflow.emplace([&](){
      REQUIRE(j1 == N);
      REQUIRE(j2 == N);
      REQUIRE(j3 == N);
      REQUIRE(j4 == N);
      REQUIRE(collection.size() == N);
      std::sort(collection.begin(), collection.end());
      for (size_t i = 0; i < N; ++i) {
        REQUIRE(collection[i] == i + 1);
      }
      REQUIRE(pl.num_tokens() == N);
    }).name("test");
    
    pipeline.precede(test);

    executor.run_n(taskflow, 3, [&]() mutable {
      j1 = j2 = j3 = j4 = 0;
      for(size_t i = 0; i < mybuffer.size(); ++i){
        for(size_t j = 0; j < mybuffer[0].size(); ++j){
          mybuffer[i][j] = 0;
        }
      }
      collection.clear();
      cnt++;
    }).get();
    
    
    //executor.run(taskflow).wait();
    //REQUIRE(j1 == N);
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(j4 == N);
    //REQUIRE(collection.size() == N);
    //std::sort(collection.begin(), collection.end());
    //for (size_t i = 0; i < N; ++i) {
    //  REQUIRE(collection[i] == i + 1);
    //}
    //REQUIRE(pl.num_tokens() == N);
    //
    //j1 = j2 = j3 = j4 = 0;
    //for(size_t i = 0; i < mybuffer.size(); ++i){
    //  for(size_t j = 0; j < mybuffer[0].size(); ++j){
    //    mybuffer[i][j] = 0;
    //  }
    //}
    //collection.clear();
    //executor.run(taskflow).wait();
    //REQUIRE(j1 == N);
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(j4 == N);
    //REQUIRE(collection.size() == N);
    //std::sort(collection.begin(), collection.end());
    //for (size_t i = 0; i < N; ++i) {
    //  REQUIRE(collection[i] == i + 1);
    //}
    //REQUIRE(pl.num_tokens() == 2 * N);
    //
    //j1 = j2 = j3 = j4 = 0;
    //for(size_t i = 0; i < mybuffer.size(); ++i){
    //  for(size_t j = 0; j < mybuffer[0].size(); ++j){
    //    mybuffer[i][j] = 0;
    //  }
    //}
    //collection.clear();
    //executor.run(taskflow).wait();
    //REQUIRE(j1 == N);
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(j4 == N);
    //REQUIRE(collection.size() == N);
    //std::sort(collection.begin(), collection.end());
    //for (size_t i = 0; i < N; ++i) {
    //  REQUIRE(collection[i] == i + 1);
    //}
    //REQUIRE(pl.num_tokens() == 3 * N);
  }
}

// four pipes (SSPS)
TEST_CASE("Pipeline.4P(SSPS).1L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(1, 1);
}

TEST_CASE("Pipeline.4P(SSPS).1L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(1, 2);
}

TEST_CASE("Pipeline.4P(SSPS).1L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(1, 3);
}

TEST_CASE("Pipeline.4P(SSPS).1L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(1, 4);
}

TEST_CASE("Pipeline.4P(SSPS).1L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(1, 5);
}

TEST_CASE("Pipeline.4P(SSPS).1L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(1, 6);
}

TEST_CASE("Pipeline.4P(SSPS).1L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(1, 7);
}

TEST_CASE("Pipeline.4P(SSPS).1L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(1, 8);
}

TEST_CASE("Pipeline.4P(SSPS).2L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(2, 1);
}

TEST_CASE("Pipeline.4P(SSPS).2L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(2, 2);
}

TEST_CASE("Pipeline.4P(SSPS).2L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(2, 3);
}

TEST_CASE("Pipeline.4P(SSPS).2L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(2, 4);
}

TEST_CASE("Pipeline.4P(SSPS).2L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(2, 5);
}

TEST_CASE("Pipeline.4P(SSPS).2L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(2, 6);
}

TEST_CASE("Pipeline.4P(SSPS).2L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(2, 7);
}

TEST_CASE("Pipeline.4P(SSPS).2L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(2, 8);
}

TEST_CASE("Pipeline.4P(SSPS).3L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(3, 1);
}

TEST_CASE("Pipeline.4P(SSPS).3L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(3, 2);
}

TEST_CASE("Pipeline.4P(SSPS).3L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(3, 3);
}

TEST_CASE("Pipeline.4P(SSPS).3L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(3, 4);
}

TEST_CASE("Pipeline.4P(SSPS).3L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(3, 5);
}

TEST_CASE("Pipeline.4P(SSPS).3L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(3, 6);
}

TEST_CASE("Pipeline.4P(SSPS).3L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(3, 7);
}

TEST_CASE("Pipeline.4P(SSPS).3L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(3, 8);
}

TEST_CASE("Pipeline.4P(SSPS).4L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(4, 1);
}

TEST_CASE("Pipeline.4P(SSPS).4L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(4, 2);
}

TEST_CASE("Pipeline.4P(SSPS).4L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(4, 3);
}

TEST_CASE("Pipeline.4P(SSPS).4L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(4, 4);
}

TEST_CASE("Pipeline.4P(SSPS).4L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(4, 5);
}

TEST_CASE("Pipeline.4P(SSPS).4L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(4, 6);
}

TEST_CASE("Pipeline.4P(SSPS).4L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(4, 7);
}

TEST_CASE("Pipeline.4P(SSPS).4L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(4, 8);
}

TEST_CASE("Pipeline.4P(SSPS).5L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(5, 1);
}

TEST_CASE("Pipeline.4P(SSPS).5L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(5, 2);
}

TEST_CASE("Pipeline.4P(SSPS).5L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(5, 3);
}

TEST_CASE("Pipeline.4P(SSPS).5L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(5, 4);
}

TEST_CASE("Pipeline.4P(SSPS).5L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(5, 5);
}

TEST_CASE("Pipeline.4P(SSPS).5L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(5, 6);
}

TEST_CASE("Pipeline.4P(SSPS).5L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(5, 7);
}

TEST_CASE("Pipeline.4P(SSPS).5L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(5, 8);
}

TEST_CASE("Pipeline.4P(SSPS).6L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(6, 1);
}

TEST_CASE("Pipeline.4P(SSPS).6L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(6, 2);
}

TEST_CASE("Pipeline.4P(SSPS).6L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(6, 3);
}

TEST_CASE("Pipeline.4P(SSPS).6L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(6, 4);
}

TEST_CASE("Pipeline.4P(SSPS).6L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(6, 5);
}

TEST_CASE("Pipeline.4P(SSPS).6L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(6, 6);
}

TEST_CASE("Pipeline.4P(SSPS).6L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(6, 7);
}

TEST_CASE("Pipeline.4P(SSPS).6L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(6, 8);
}

TEST_CASE("Pipeline.4P(SSPS).7L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(7, 1);
}

TEST_CASE("Pipeline.4P(SSPS).7L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(7, 2);
}

TEST_CASE("Pipeline.4P(SSPS).7L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(7, 3);
}

TEST_CASE("Pipeline.4P(SSPS).7L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(7, 4);
}

TEST_CASE("Pipeline.4P(SSPS).7L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(7, 5);
}

TEST_CASE("Pipeline.4P(SSPS).7L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(7, 6);
}

TEST_CASE("Pipeline.4P(SSPS).7L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(7, 7);
}

TEST_CASE("Pipeline.4P(SSPS).7L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(7, 8);
}

TEST_CASE("Pipeline.4P(SSPS).8L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(8, 1);
}

TEST_CASE("Pipeline.4P(SSPS).8L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(8, 2);
}

TEST_CASE("Pipeline.4P(SSPS).8L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(8, 3);
}

TEST_CASE("Pipeline.4P(SSPS).8L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(8, 4);
}

TEST_CASE("Pipeline.4P(SSPS).8L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(8, 5);
}

TEST_CASE("Pipeline.4P(SSPS).8L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(8, 6);
}

TEST_CASE("Pipeline.4P(SSPS).8L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(8, 7);
}

TEST_CASE("Pipeline.4P(SSPS).8L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSPS(8, 8);
}

// ----------------------------------------------------------------------------
// four pipes (SSPP), L lines, W workers
// ----------------------------------------------------------------------------

void pipeline_4P_SSPP(size_t L, unsigned w) {

  tf::Executor executor(w);

  const size_t maxN = 100;

  std::vector<int> source(maxN);
  std::iota(source.begin(), source.end(), 0);
  std::vector<std::array<int, 4>> mybuffer(L);

  for(size_t N = 0; N <= maxN; N++) {

    tf::Taskflow taskflow;
      
    size_t j1 = 0, j2 = 0;
    std::atomic<size_t> j3 = 0;
    std::atomic<size_t> j4 = 0;
    std::mutex mutex3;
    std::mutex mutex4;
    std::vector<int> collection3;
    std::vector<int> collection4;
    size_t cnt = 1;

    tf::Pipeline pl(L, 
      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j1, &mybuffer, L](auto& pf) mutable {
        if(j1 == N) {
          pf.stop();
          return;
        }
        REQUIRE(j1 == source[j1]);
        REQUIRE(pf.token() % L == pf.line());
        // *(pf.output()) = source[j1] + 1;
        mybuffer[pf.line()][pf.pipe()] = source[j1] + 1;
        j1++;
      }},

      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j2, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j2 < N);
        REQUIRE(pf.token() % L == pf.line());
        REQUIRE(source[j2] + 1 == mybuffer[pf.line()][pf.pipe() - 1]);
        // REQUIRE(source[j2] + 1 == *(pf.input()));
        // *pf.output() = source[j2] + 1;
        mybuffer[pf.line()][pf.pipe()] = source[j2] + 1;
        j2++;
      }},

      tf::Pipe{tf::PipeType::PARALLEL, [N, &j3, &mutex3, &collection3, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j3++ < N);
        // *pf.output() = *pf.input() + 1;
        {
          std::scoped_lock<std::mutex> lock(mutex3);
          REQUIRE(pf.token() % L == pf.line());
          mybuffer[pf.line()][pf.pipe()] = mybuffer[pf.line()][pf.pipe() - 1] + 1;
          //collection3.push_back(*pf.input());
          collection3.push_back(mybuffer[pf.line()][pf.pipe() - 1]);
        }
      }},

      tf::Pipe{tf::PipeType::PARALLEL, [N, &j4, &mutex4, &collection4, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j4++ < N);
        {
          std::scoped_lock<std::mutex> lock(mutex4);
          REQUIRE(pf.token() % L == pf.line());
          //collection4.push_back(*pf.input());
          collection4.push_back(mybuffer[pf.line()][pf.pipe() - 1]);
        }
      }}
    );
    
    auto pipeline = taskflow.composed_of(pl).name("module_of_pipeline");
    auto test = taskflow.emplace([&](){
      REQUIRE(j1 == N);
      REQUIRE(j2 == N);
      REQUIRE(j3 == N);
      REQUIRE(j4 == N);
      REQUIRE(collection3.size() == N);
      REQUIRE(collection4.size() == N);
      std::sort(collection3.begin(), collection3.end());
      std::sort(collection4.begin(), collection4.end());
      for (size_t i = 0; i < N; ++i) {
        REQUIRE(collection3[i] == i + 1);
        REQUIRE(collection4[i] == i + 2);
      }
      REQUIRE(pl.num_tokens() == N);
    }).name("test");
    
    pipeline.precede(test);

    executor.run_n(taskflow, 3, [&]() mutable {
      j1 = j2 = j3 = j4 = 0;
      for(size_t i = 0; i < mybuffer.size(); ++i){
        for(size_t j = 0; j < mybuffer[0].size(); ++j){
          mybuffer[i][j] = 0;
        }
      }
      collection3.clear();
      collection4.clear();
      cnt++;
    }).get();

    //executor.run(taskflow).wait();
    //REQUIRE(j1 == N);
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(j4 == N);
    //REQUIRE(collection3.size() == N);
    //REQUIRE(collection4.size() == N);
    //std::sort(collection3.begin(), collection3.end());
    //std::sort(collection4.begin(), collection4.end());
    //for (size_t i = 0; i < N; ++i) {
    //  REQUIRE(collection3[i] == i + 1);
    //  REQUIRE(collection4[i] == i + 2);
    //}
    //REQUIRE(pl.num_tokens() == N);
    //
    //j1 = j2 = j3 = j4 = 0;
    //for(size_t i = 0; i < mybuffer.size(); ++i){
    //  for(size_t j = 0; j < mybuffer[0].size(); ++j){
    //    mybuffer[i][j] = 0;
    //  }
    //}
    //collection3.clear();
    //collection4.clear();
    //executor.run(taskflow).wait();
    //REQUIRE(j1 == N);
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(j4 == N);
    //REQUIRE(collection3.size() == N);
    //REQUIRE(collection4.size() == N);
    //std::sort(collection3.begin(), collection3.end());
    //std::sort(collection4.begin(), collection4.end());
    //for (size_t i = 0; i < N; ++i) {
    //  REQUIRE(collection3[i] == i + 1);
    //  REQUIRE(collection4[i] == i + 2);
    //}
    //REQUIRE(pl.num_tokens() == 2 * N);
    //
    //j1 = j2 = j3 = j4 = 0;
    //for(size_t i = 0; i < mybuffer.size(); ++i){
    //  for(size_t j = 0; j < mybuffer[0].size(); ++j){
    //    mybuffer[i][j] = 0;
    //  }
    //}
    //collection3.clear();
    //collection4.clear();
    //executor.run(taskflow).wait();
    //REQUIRE(j1 == N);
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(j4 == N);
    //REQUIRE(collection3.size() == N);
    //REQUIRE(collection4.size() == N);
    //std::sort(collection3.begin(), collection3.end());
    //std::sort(collection4.begin(), collection4.end());
    //for (size_t i = 0; i < N; ++i) {
    //  REQUIRE(collection3[i] == i + 1);
    //  REQUIRE(collection4[i] == i + 2);
    //}
    //REQUIRE(pl.num_tokens() == 3 * N);
  }
}

// four pipes (SSPP)
TEST_CASE("Pipeline.4P(SSPP).1L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(1, 1);
}

TEST_CASE("Pipeline.4P(SSPP).1L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(1, 2);
}

TEST_CASE("Pipeline.4P(SSPP).1L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(1, 3);
}

TEST_CASE("Pipeline.4P(SSPP).1L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(1, 4);
}

TEST_CASE("Pipeline.4P(SSPP).1L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(1, 5);
}

TEST_CASE("Pipeline.4P(SSPP).1L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(1, 6);
}

TEST_CASE("Pipeline.4P(SSPP).1L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(1, 7);
}

TEST_CASE("Pipeline.4P(SSPP).1L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(1, 8);
}

TEST_CASE("Pipeline.4P(SSPP).2L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(2, 1);
}

TEST_CASE("Pipeline.4P(SSPP).2L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(2, 2);
}

TEST_CASE("Pipeline.4P(SSPP).2L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(2, 3);
}

TEST_CASE("Pipeline.4P(SSPP).2L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(2, 4);
}

TEST_CASE("Pipeline.4P(SSPP).2L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(2, 5);
}

TEST_CASE("Pipeline.4P(SSPP).2L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(2, 6);
}

TEST_CASE("Pipeline.4P(SSPP).2L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(2, 7);
}

TEST_CASE("Pipeline.4P(SSPP).2L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(2, 8);
}

TEST_CASE("Pipeline.4P(SSPP).3L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(3, 1);
}

TEST_CASE("Pipeline.4P(SSPP).3L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(3, 2);
}

TEST_CASE("Pipeline.4P(SSPP).3L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(3, 3);
}

TEST_CASE("Pipeline.4P(SSPP).3L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(3, 4);
}

TEST_CASE("Pipeline.4P(SSPP).3L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(3, 5);
}

TEST_CASE("Pipeline.4P(SSPP).3L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(3, 6);
}

TEST_CASE("Pipeline.4P(SSPP).3L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(3, 7);
}

TEST_CASE("Pipeline.4P(SSPP).3L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(3, 8);
}

TEST_CASE("Pipeline.4P(SSPP).4L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(4, 1);
}

TEST_CASE("Pipeline.4P(SSPP).4L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(4, 2);
}

TEST_CASE("Pipeline.4P(SSPP).4L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(4, 3);
}

TEST_CASE("Pipeline.4P(SSPP).4L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(4, 4);
}

TEST_CASE("Pipeline.4P(SSPP).4L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(4, 5);
}

TEST_CASE("Pipeline.4P(SSPP).4L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(4, 6);
}

TEST_CASE("Pipeline.4P(SSPP).4L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(4, 7);
}

TEST_CASE("Pipeline.4P(SSPP).4L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(4, 8);
}

TEST_CASE("Pipeline.4P(SSPP).5L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(5, 1);
}

TEST_CASE("Pipeline.4P(SSPP).5L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(5, 2);
}

TEST_CASE("Pipeline.4P(SSPP).5L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(5, 3);
}

TEST_CASE("Pipeline.4P(SSPP).5L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(5, 4);
}

TEST_CASE("Pipeline.4P(SSPP).5L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(5, 5);
}

TEST_CASE("Pipeline.4P(SSPP).5L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(5, 6);
}

TEST_CASE("Pipeline.4P(SSPP).5L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(5, 7);
}

TEST_CASE("Pipeline.4P(SSPP).5L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(5, 8);
}

TEST_CASE("Pipeline.4P(SSPP).6L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(6, 1);
}

TEST_CASE("Pipeline.4P(SSPP).6L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(6, 2);
}

TEST_CASE("Pipeline.4P(SSPP).6L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(6, 3);
}

TEST_CASE("Pipeline.4P(SSPP).6L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(6, 4);
}

TEST_CASE("Pipeline.4P(SSPP).6L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(6, 5);
}

TEST_CASE("Pipeline.4P(SSPP).6L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(6, 6);
}

TEST_CASE("Pipeline.4P(SSPP).6L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(6, 7);
}

TEST_CASE("Pipeline.4P(SSPP).6L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(6, 8);
}

TEST_CASE("Pipeline.4P(SSPP).7L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(7, 1);
}

TEST_CASE("Pipeline.4P(SSPP).7L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(7, 2);
}

TEST_CASE("Pipeline.4P(SSPP).7L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(7, 3);
}

TEST_CASE("Pipeline.4P(SSPP).7L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(7, 4);
}

TEST_CASE("Pipeline.4P(SSPP).7L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(7, 5);
}

TEST_CASE("Pipeline.4P(SSPP).7L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(7, 6);
}

TEST_CASE("Pipeline.4P(SSPP).7L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(7, 7);
}

TEST_CASE("Pipeline.4P(SSPP).7L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(7, 8);
}

TEST_CASE("Pipeline.4P(SSPP).8L.1W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(8, 1);
}

TEST_CASE("Pipeline.4P(SSPP).8L.2W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(8, 2);
}

TEST_CASE("Pipeline.4P(SSPP).8L.3W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(8, 3);
}

TEST_CASE("Pipeline.4P(SSPP).8L.4W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(8, 4);
}

TEST_CASE("Pipeline.4P(SSPP).8L.5W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(8, 5);
}

TEST_CASE("Pipeline.4P(SSPP).8L.6W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(8, 6);
}

TEST_CASE("Pipeline.4P(SSPP).8L.7W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(8, 7);
}

TEST_CASE("Pipeline.4P(SSPP).8L.8W" * doctest::timeout(300)) {
  pipeline_4P_SSPP(8, 8);
}

// ----------------------------------------------------------------------------
// four pipes (SPSS), L lines, W workers
// ----------------------------------------------------------------------------

void pipeline_4P_SPSS(size_t L, unsigned w) {

  tf::Executor executor(w);

  const size_t maxN = 100;

  std::vector<int> source(maxN);
  std::iota(source.begin(), source.end(), 0);
  std::vector<std::array<int, 4>> mybuffer(L);

  for(size_t N = 0; N <= maxN; N++) {

    tf::Taskflow taskflow;
      
    size_t j1 = 0, j3 = 0, j4 = 0;
    std::atomic<size_t> j2 = 0;
    std::mutex mutex;
    std::vector<int> collection;
    size_t cnt = 1;

    tf::Pipeline pl(L, 
      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j1, &mybuffer, L](auto& pf) mutable {
        if(j1 == N) {
          pf.stop();
          return;
        }
        REQUIRE(j1 == source[j1]);
        REQUIRE(pf.token() % L == pf.line());
        // *(pf.output()) = source[j1] + 1;
        mybuffer[pf.line()][pf.pipe()] = source[j1] + 1;
        j1++;
      }},

      tf::Pipe{tf::PipeType::PARALLEL, [N, &j2, &mutex, &collection, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j2++ < N);
        // *(pf.output()) = *(pf.input()) + 1;
        {
          std::scoped_lock<std::mutex> lock(mutex);
          REQUIRE(pf.token() % L == pf.line());
          mybuffer[pf.line()][pf.pipe()] = mybuffer[pf.line()][pf.pipe() - 1] + 1;
          //collection.push_back(*pf.input());
          collection.push_back(mybuffer[pf.line()][pf.pipe() - 1]);
        }
      }},

      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j3, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j3 < N);
        REQUIRE(pf.token() % L == pf.line());
        REQUIRE(source[j3] + 2 == mybuffer[pf.line()][pf.pipe() - 1]);
        mybuffer[pf.line()][pf.pipe()] = mybuffer[pf.line()][pf.pipe() - 1] + 1;
        // REQUIRE(source[j3] + 2 == *(pf.input()));
        // *pf.output() = *pf.input() + 1;
        j3++;
      }},

      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j4, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j4 < N);
        REQUIRE(pf.token() % L == pf.line());
        REQUIRE(source[j4] + 3 == mybuffer[pf.line()][pf.pipe() - 1]);
        //REQUIRE(source[j4] + 3 == *(pf.input()));
        j4++;
      }}
    );
    
    auto pipeline = taskflow.composed_of(pl).name("module_of_pipeline");
    auto test = taskflow.emplace([&](){
      REQUIRE(j1 == N);
      REQUIRE(j2 == N);
      REQUIRE(j3 == N);
      REQUIRE(j4 == N);
      REQUIRE(collection.size() == N);
      std::sort(collection.begin(), collection.end());
      for (size_t i = 0; i < N; ++i) {
        REQUIRE(collection[i] == i + 1);
      }
      REQUIRE(pl.num_tokens() == cnt * N);
    }).name("test");

    pipeline.precede(test);

    executor.run_n(taskflow, 3, [&]() mutable {
      j1 = j2 = j3 = j4 = 0;
      for(size_t i = 0; i < mybuffer.size(); ++i){
        for(size_t j = 0; j < mybuffer[0].size(); ++j){
          mybuffer[i][j] = 0;
        }
      }
      collection.clear();
      cnt++; 
    }).get();

    //executor.run(taskflow).wait();
    //REQUIRE(j1 == N);
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(j4 == N);
    //REQUIRE(collection.size() == N);
    //std::sort(collection.begin(), collection.end());
    //for (size_t i = 0; i < N; ++i) {
    //  REQUIRE(collection[i] == i + 1);
    //}
    //REQUIRE(pl.num_tokens() == N);
    //
    //j1 = j2 = j3 = j4 = 0;
    //for(size_t i = 0; i < mybuffer.size(); ++i){
    //  for(size_t j = 0; j < mybuffer[0].size(); ++j){
    //    mybuffer[i][j] = 0;
    //  }
    //}
    //collection.clear();
    //executor.run(taskflow).wait();
    //REQUIRE(j1 == N);
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(j4 == N);
    //REQUIRE(collection.size() == N);
    //std::sort(collection.begin(), collection.end());
    //for (size_t i = 0; i < N; ++i) {
    //  REQUIRE(collection[i] == i + 1);
    //}
    //REQUIRE(pl.num_tokens() == 2 * N);
    //
    //j1 = j2 = j3 = j4 = 0;
    //for(size_t i = 0; i < mybuffer.size(); ++i){
    //  for(size_t j = 0; j < mybuffer[0].size(); ++j){
    //    mybuffer[i][j] = 0;
    //  }
    //}
    //collection.clear();
    //executor.run(taskflow).wait();
    //REQUIRE(j1 == N);
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(j4 == N);
    //REQUIRE(collection.size() == N);
    //std::sort(collection.begin(), collection.end());
    //for (size_t i = 0; i < N; ++i) {
    //  REQUIRE(collection[i] == i + 1);
    //}
    //REQUIRE(pl.num_tokens() == 3 * N);
  }
}

// four pipes (SPSS)
TEST_CASE("Pipeline.4P(SPSS).1L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(1, 1);
}

TEST_CASE("Pipeline.4P(SPSS).1L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(1, 2);
}

TEST_CASE("Pipeline.4P(SPSS).1L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(1, 3);
}

TEST_CASE("Pipeline.4P(SPSS).1L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(1, 4);
}

TEST_CASE("Pipeline.4P(SPSS).1L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(1, 5);
}

TEST_CASE("Pipeline.4P(SPSS).1L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(1, 6);
}

TEST_CASE("Pipeline.4P(SPSS).1L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(1, 7);
}

TEST_CASE("Pipeline.4P(SPSS).1L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(1, 8);
}

TEST_CASE("Pipeline.4P(SPSS).2L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(2, 1);
}

TEST_CASE("Pipeline.4P(SPSS).2L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(2, 2);
}

TEST_CASE("Pipeline.4P(SPSS).2L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(2, 3);
}

TEST_CASE("Pipeline.4P(SPSS).2L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(2, 4);
}

TEST_CASE("Pipeline.4P(SPSS).2L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(2, 5);
}

TEST_CASE("Pipeline.4P(SPSS).2L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(2, 6);
}

TEST_CASE("Pipeline.4P(SPSS).2L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(2, 7);
}

TEST_CASE("Pipeline.4P(SPSS).2L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(2, 8);
}

TEST_CASE("Pipeline.4P(SPSS).3L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(3, 1);
}

TEST_CASE("Pipeline.4P(SPSS).3L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(3, 2);
}

TEST_CASE("Pipeline.4P(SPSS).3L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(3, 3);
}

TEST_CASE("Pipeline.4P(SPSS).3L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(3, 4);
}

TEST_CASE("Pipeline.4P(SPSS).3L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(3, 5);
}

TEST_CASE("Pipeline.4P(SPSS).3L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(3, 6);
}

TEST_CASE("Pipeline.4P(SPSS).3L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(3, 7);
}

TEST_CASE("Pipeline.4P(SPSS).3L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(3, 8);
}

TEST_CASE("Pipeline.4P(SPSS).4L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(4, 1);
}

TEST_CASE("Pipeline.4P(SPSS).4L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(4, 2);
}

TEST_CASE("Pipeline.4P(SPSS).4L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(4, 3);
}

TEST_CASE("Pipeline.4P(SPSS).4L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(4, 4);
}

TEST_CASE("Pipeline.4P(SPSS).4L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(4, 5);
}

TEST_CASE("Pipeline.4P(SPSS).4L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(4, 6);
}

TEST_CASE("Pipeline.4P(SPSS).4L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(4, 7);
}

TEST_CASE("Pipeline.4P(SPSS).4L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(4, 8);
}

TEST_CASE("Pipeline.4P(SPSS).5L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(5, 1);
}

TEST_CASE("Pipeline.4P(SPSS).5L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(5, 2);
}

TEST_CASE("Pipeline.4P(SPSS).5L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(5, 3);
}

TEST_CASE("Pipeline.4P(SPSS).5L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(5, 4);
}

TEST_CASE("Pipeline.4P(SPSS).5L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(5, 5);
}

TEST_CASE("Pipeline.4P(SPSS).5L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(5, 6);
}

TEST_CASE("Pipeline.4P(SPSS).5L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(5, 7);
}

TEST_CASE("Pipeline.4P(SPSS).5L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(5, 8);
}

TEST_CASE("Pipeline.4P(SPSS).6L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(6, 1);
}

TEST_CASE("Pipeline.4P(SPSS).6L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(6, 2);
}

TEST_CASE("Pipeline.4P(SPSS).6L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(6, 3);
}

TEST_CASE("Pipeline.4P(SPSS).6L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(6, 4);
}

TEST_CASE("Pipeline.4P(SPSS).6L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(6, 5);
}

TEST_CASE("Pipeline.4P(SPSS).6L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(6, 6);
}

TEST_CASE("Pipeline.4P(SPSS).6L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(6, 7);
}

TEST_CASE("Pipeline.4P(SPSS).6L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(6, 8);
}

TEST_CASE("Pipeline.4P(SPSS).7L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(7, 1);
}

TEST_CASE("Pipeline.4P(SPSS).7L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(7, 2);
}

TEST_CASE("Pipeline.4P(SPSS).7L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(7, 3);
}

TEST_CASE("Pipeline.4P(SPSS).7L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(7, 4);
}

TEST_CASE("Pipeline.4P(SPSS).7L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(7, 5);
}

TEST_CASE("Pipeline.4P(SPSS).7L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(7, 6);
}

TEST_CASE("Pipeline.4P(SPSS).7L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(7, 7);
}

TEST_CASE("Pipeline.4P(SPSS).7L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(7, 8);
}

TEST_CASE("Pipeline.4P(SPSS).8L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(8, 1);
}

TEST_CASE("Pipeline.4P(SPSS).8L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(8, 2);
}

TEST_CASE("Pipeline.4P(SPSS).8L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(8, 3);
}

TEST_CASE("Pipeline.4P(SPSS).8L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(8, 4);
}

TEST_CASE("Pipeline.4P(SPSS).8L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(8, 5);
}

TEST_CASE("Pipeline.4P(SPSS).8L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(8, 6);
}

TEST_CASE("Pipeline.4P(SPSS).8L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(8, 7);
}

TEST_CASE("Pipeline.4P(SPSS).8L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPSS(8, 8);
}

// ----------------------------------------------------------------------------
// four pipes (SPSP), L lines, W workers
// ----------------------------------------------------------------------------

void pipeline_4P_SPSP(size_t L, unsigned w) {

  tf::Executor executor(w);

  const size_t maxN = 100;

  std::vector<int> source(maxN);
  std::iota(source.begin(), source.end(), 0);
  std::vector<std::array<int, 4>> mybuffer(L);

  for(size_t N = 0; N <= maxN; N++) {

    tf::Taskflow taskflow;
      
    size_t j1 = 0, j3 = 0;
    std::atomic<size_t> j2 = 0;
    std::atomic<size_t> j4 = 0;
    std::mutex mutex2;
    std::mutex mutex4;
    std::vector<int> collection2;
    std::vector<int> collection4;
    size_t cnt = 1;

    tf::Pipeline pl(L, 
      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j1, &mybuffer, L](auto& pf) mutable {
        if(j1 == N) {
          pf.stop();
          return;
        }
        REQUIRE(j1 == source[j1]);
        REQUIRE(pf.token() % L == pf.line());
        mybuffer[pf.line()][pf.pipe()] = source[j1] + 1;
        // *(pf.output()) = source[j1] + 1;
        j1++;
      }},

      tf::Pipe{tf::PipeType::PARALLEL, [N, &j2, &mutex2, &collection2, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j2++ < N);
        // *(pf.output()) = *(pf.input()) + 1;
        {
          std::scoped_lock<std::mutex> lock(mutex2);
          REQUIRE(pf.token() % L == pf.line());
          mybuffer[pf.line()][pf.pipe()] = mybuffer[pf.line()][pf.pipe() - 1] + 1;
          collection2.push_back(mybuffer[pf.line()][pf.pipe() - 1]);
          //collection2.push_back(*pf.input());
        }
      }},

      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j3, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j3 < N);
        REQUIRE(pf.token() % L == pf.line());
        REQUIRE(source[j3] + 2 == mybuffer[pf.line()][pf.pipe() - 1]);
        mybuffer[pf.line()][pf.pipe()] = mybuffer[pf.line()][pf.pipe() - 1] + 1;
        // REQUIRE(source[j3] + 2 == *(pf.input()));
        // *pf.output() = *pf.input() + 1;
        j3++;
      }},

      tf::Pipe{tf::PipeType::PARALLEL, [N, &j4, &mutex4, &collection4, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j4++ < N);
        {
          std::scoped_lock<std::mutex> lock(mutex4);
          REQUIRE(pf.token() % L == pf.line());
          collection4.push_back(mybuffer[pf.line()][pf.pipe() - 1]);
          //collection4.push_back(*pf.input());
        }
      }}
    );
    
    auto pipeline = taskflow.composed_of(pl).name("module_of_pipeline");
    auto test = taskflow.emplace([&](){
      REQUIRE(j1 == N);
      REQUIRE(j2 == N);
      REQUIRE(j3 == N);
      REQUIRE(j4 == N);
      REQUIRE(collection2.size() == N);
      REQUIRE(collection4.size() == N);
      std::sort(collection2.begin(), collection2.end());
      std::sort(collection4.begin(), collection4.end());
      for (size_t i = 0; i < N; ++i) {
        REQUIRE(collection2[i] == i + 1);
        REQUIRE(collection4[i] == i + 3);
      }
      REQUIRE(pl.num_tokens() == cnt * N);
    }).name("test");
   
    pipeline.precede(test);

    executor.run_n(taskflow, 3, [&]() mutable {
      j1 = j2 = j3 = j4 = 0;
      for(size_t i = 0; i < mybuffer.size(); ++i){
        for(size_t j = 0; j < mybuffer[0].size(); ++j){
          mybuffer[i][j] = 0;
        }
      }
      collection2.clear();
      collection4.clear();
      cnt++;
    }).get();
    
    //executor.run(taskflow).wait();
    //REQUIRE(j1 == N);
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(j4 == N);
    //REQUIRE(collection2.size() == N);
    //REQUIRE(collection4.size() == N);
    //std::sort(collection2.begin(), collection2.end());
    //std::sort(collection4.begin(), collection4.end());
    //for (size_t i = 0; i < N; ++i) {
    //  REQUIRE(collection2[i] == i + 1);
    //  REQUIRE(collection4[i] == i + 3);
    //}
    //REQUIRE(pl.num_tokens() == N);
    //
    //j1 = j2 = j3 = j4 = 0;
    //for(size_t i = 0; i < mybuffer.size(); ++i){
    //  for(size_t j = 0; j < mybuffer[0].size(); ++j){
    //    mybuffer[i][j] = 0;
    //  }
    //}
    //collection2.clear();
    //collection4.clear();
    //executor.run(taskflow).wait();
    //REQUIRE(j1 == N);
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(j4 == N);
    //REQUIRE(collection2.size() == N);
    //REQUIRE(collection4.size() == N);
    //std::sort(collection2.begin(), collection2.end());
    //std::sort(collection4.begin(), collection4.end());
    //for (size_t i = 0; i < N; ++i) {
    //  REQUIRE(collection2[i] == i + 1);
    //  REQUIRE(collection4[i] == i + 3);
    //}
    //REQUIRE(pl.num_tokens() == 2 * N);
    //
    //j1 = j2 = j3 = j4 = 0;
    //for(size_t i = 0; i < mybuffer.size(); ++i){
    //  for(size_t j = 0; j < mybuffer[0].size(); ++j){
    //    mybuffer[i][j] = 0;
    //  }
    //}
    //collection2.clear();
    //collection4.clear();
    //executor.run(taskflow).wait();
    //REQUIRE(j1 == N);
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(j4 == N);
    //REQUIRE(collection2.size() == N);
    //REQUIRE(collection4.size() == N);
    //std::sort(collection2.begin(), collection2.end());
    //std::sort(collection4.begin(), collection4.end());
    //for (size_t i = 0; i < N; ++i) {
    //  REQUIRE(collection2[i] == i + 1);
    //  REQUIRE(collection4[i] == i + 3);
    //}
    //REQUIRE(pl.num_tokens() == 3 * N);
  }
}

// four pipes (SPSP)
TEST_CASE("Pipeline.4P(SPSP).1L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(1, 1);
}

TEST_CASE("Pipeline.4P(SPSP).1L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(1, 2);
}

TEST_CASE("Pipeline.4P(SPSP).1L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(1, 3);
}

TEST_CASE("Pipeline.4P(SPSP).1L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(1, 4);
}

TEST_CASE("Pipeline.4P(SPSP).1L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(1, 5);
}

TEST_CASE("Pipeline.4P(SPSP).1L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(1, 6);
}

TEST_CASE("Pipeline.4P(SPSP).1L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(1, 7);
}

TEST_CASE("Pipeline.4P(SPSP).1L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(1, 8);
}

TEST_CASE("Pipeline.4P(SPSP).2L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(2, 1);
}

TEST_CASE("Pipeline.4P(SPSP).2L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(2, 2);
}

TEST_CASE("Pipeline.4P(SPSP).2L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(2, 3);
}

TEST_CASE("Pipeline.4P(SPSP).2L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(2, 4);
}

TEST_CASE("Pipeline.4P(SPSP).2L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(2, 5);
}

TEST_CASE("Pipeline.4P(SPSP).2L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(2, 6);
}

TEST_CASE("Pipeline.4P(SPSP).2L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(2, 7);
}

TEST_CASE("Pipeline.4P(SPSP).2L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(2, 8);
}

TEST_CASE("Pipeline.4P(SPSP).3L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(3, 1);
}

TEST_CASE("Pipeline.4P(SPSP).3L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(3, 2);
}

TEST_CASE("Pipeline.4P(SPSP).3L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(3, 3);
}

TEST_CASE("Pipeline.4P(SPSP).3L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(3, 4);
}

TEST_CASE("Pipeline.4P(SPSP).3L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(3, 5);
}

TEST_CASE("Pipeline.4P(SPSP).3L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(3, 6);
}

TEST_CASE("Pipeline.4P(SPSP).3L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(3, 7);
}

TEST_CASE("Pipeline.4P(SPSP).3L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(3, 8);
}

TEST_CASE("Pipeline.4P(SPSP).4L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(4, 1);
}

TEST_CASE("Pipeline.4P(SPSP).4L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(4, 2);
}

TEST_CASE("Pipeline.4P(SPSP).4L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(4, 3);
}

TEST_CASE("Pipeline.4P(SPSP).4L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(4, 4);
}

TEST_CASE("Pipeline.4P(SPSP).4L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(4, 5);
}

TEST_CASE("Pipeline.4P(SPSP).4L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(4, 6);
}

TEST_CASE("Pipeline.4P(SPSP).4L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(4, 7);
}

TEST_CASE("Pipeline.4P(SPSP).4L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(4, 8);
}

TEST_CASE("Pipeline.4P(SPSP).5L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(5, 1);
}

TEST_CASE("Pipeline.4P(SPSP).5L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(5, 2);
}

TEST_CASE("Pipeline.4P(SPSP).5L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(5, 3);
}

TEST_CASE("Pipeline.4P(SPSP).5L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(5, 4);
}

TEST_CASE("Pipeline.4P(SPSP).5L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(5, 5);
}

TEST_CASE("Pipeline.4P(SPSP).5L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(5, 6);
}

TEST_CASE("Pipeline.4P(SPSP).5L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(5, 7);
}

TEST_CASE("Pipeline.4P(SPSP).5L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(5, 8);
}

TEST_CASE("Pipeline.4P(SPSP).6L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(6, 1);
}

TEST_CASE("Pipeline.4P(SPSP).6L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(6, 2);
}

TEST_CASE("Pipeline.4P(SPSP).6L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(6, 3);
}

TEST_CASE("Pipeline.4P(SPSP).6L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(6, 4);
}

TEST_CASE("Pipeline.4P(SPSP).6L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(6, 5);
}

TEST_CASE("Pipeline.4P(SPSP).6L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(6, 6);
}

TEST_CASE("Pipeline.4P(SPSP).6L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(6, 7);
}

TEST_CASE("Pipeline.4P(SPSP).6L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(6, 8);
}

TEST_CASE("Pipeline.4P(SPSP).7L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(7, 1);
}

TEST_CASE("Pipeline.4P(SPSP).7L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(7, 2);
}

TEST_CASE("Pipeline.4P(SPSP).7L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(7, 3);
}

TEST_CASE("Pipeline.4P(SPSP).7L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(7, 4);
}

TEST_CASE("Pipeline.4P(SPSP).7L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(7, 5);
}

TEST_CASE("Pipeline.4P(SPSP).7L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(7, 6);
}

TEST_CASE("Pipeline.4P(SPSP).7L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(7, 7);
}

TEST_CASE("Pipeline.4P(SPSP).7L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(7, 8);
}

TEST_CASE("Pipeline.4P(SPSP).8L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(8, 1);
}

TEST_CASE("Pipeline.4P(SPSP).8L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(8, 2);
}

TEST_CASE("Pipeline.4P(SPSP).8L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(8, 3);
}

TEST_CASE("Pipeline.4P(SPSP).8L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(8, 4);
}

TEST_CASE("Pipeline.4P(SPSP).8L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(8, 5);
}

TEST_CASE("Pipeline.4P(SPSP).8L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(8, 6);
}

TEST_CASE("Pipeline.4P(SPSP).8L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(8, 7);
}

TEST_CASE("Pipeline.4P(SPSP).8L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPSP(8, 8);
}

// ----------------------------------------------------------------------------
// four pipes (SPPS), L lines, W workers
// ----------------------------------------------------------------------------
void pipeline_4P_SPPS(size_t L, unsigned w) {

  tf::Executor executor(w);

  const size_t maxN = 100;

  std::vector<int> source(maxN);
  std::iota(source.begin(), source.end(), 0);
  std::vector<std::array<int, 4>> mybuffer(L);

  for(size_t N = 0; N <= maxN; N++) {

    tf::Taskflow taskflow;
      
    size_t j1 = 0, j4 = 0;
    std::atomic<size_t> j2 = 0;
    std::atomic<size_t> j3 = 0;
    std::mutex mutex2;
    std::mutex mutex3;
    std::vector<int> collection2;
    std::vector<int> collection3;
    size_t cnt = 1;

    tf::Pipeline pl(L, 
      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j1, &mybuffer, L](auto& pf) mutable {
        if(j1 == N) {
          pf.stop();
          return;
        }
        REQUIRE(j1 == source[j1]);
        REQUIRE(pf.token() % L == pf.line());
        mybuffer[pf.line()][pf.pipe()] = source[j1] + 1;
        // *(pf.output()) = source[j1] + 1;
        j1++;
      }},

      tf::Pipe{tf::PipeType::PARALLEL, [N, &j2, &mutex2, &collection2, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j2++ < N);
        // *pf.output() = *pf.input() + 1;
        {
          std::scoped_lock<std::mutex> lock(mutex2);
          REQUIRE(pf.token() % L == pf.line());
          mybuffer[pf.line()][pf.pipe()] = mybuffer[pf.line()][pf.pipe() - 1] + 1;
          collection2.push_back(mybuffer[pf.line()][pf.pipe() - 1]);
          //collection2.push_back(*pf.input());
        }
      }},

      tf::Pipe{tf::PipeType::PARALLEL, [N, &j3, &mutex3, &collection3, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j3++ < N);
        // *pf.output() = *pf.input() + 1;
        {
          std::scoped_lock<std::mutex> lock(mutex3);
          REQUIRE(pf.token() % L == pf.line());
          mybuffer[pf.line()][pf.pipe()] = mybuffer[pf.line()][pf.pipe() - 1] + 1;
          collection3.push_back(mybuffer[pf.line()][pf.pipe() - 1]);
          //collection3.push_back(*pf.input());
        }
      }},

      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j4, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j4 < N);
        REQUIRE(pf.token() % L == pf.line());
        REQUIRE(source[j4] + 3 == mybuffer[pf.line()][pf.pipe() - 1]);
        //REQUIRE(source[j4] + 3 == *(pf.input()));
        j4++;
      }}
    );
    
    auto pipeline = taskflow.composed_of(pl).name("module_of_pipeline");
    auto test = taskflow.emplace([&](){
      REQUIRE(j1 == N);
      REQUIRE(j2 == N);
      REQUIRE(j3 == N);
      REQUIRE(j4 == N);
      REQUIRE(collection2.size() == N);
      REQUIRE(collection3.size() == N);
      std::sort(collection2.begin(), collection2.end());
      std::sort(collection3.begin(), collection3.end());
      for (size_t i = 0; i < N; ++i) {
        REQUIRE(collection2[i] == i + 1);
        REQUIRE(collection3[i] == i + 2);
      }
      REQUIRE(pl.num_tokens() == cnt * N);
    }).name("test");
    
    pipeline.precede(test);

    executor.run_n(taskflow, 3, [&]() mutable {
      j1 = j2 = j3 = j4 = 0;
      for(size_t i = 0; i < mybuffer.size(); ++i){
        for(size_t j = 0; j < mybuffer[0].size(); ++j){
          mybuffer[i][j] = 0;
        }
      }
      collection2.clear();
      collection3.clear();
      cnt++;
    }).get();
    
    //Executor.run(taskflow).wait();
    //REQUIRE(j1 == N);
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(j4 == N);
    //REQUIRE(collection2.size() == N);
    //REQUIRE(collection3.size() == N);
    //Std::sort(collection2.begin(), collection2.end());
    //Std::sort(collection3.begin(), collection3.end());
    //For (size_t i = 0; i < N; ++i) {
    //  REQUIRE(collection2[i] == i + 1);
    //  REQUIRE(collection3[i] == i + 2);
    //}
    //REQUIRE(pl.num_tokens() == N);
    //
    //J1 = j2 = j3 = j4 = 0;
    //For(size_t i = 0; i < mybuffer.size(); ++i){
    //  for(size_t j = 0; j < mybuffer[0].size(); ++j){
    //    mybuffer[i][j] = 0;
    //  }
    //}
    //Collection2.clear();
    //Collection3.clear();
    //Executor.run(taskflow).wait();
    //REQUIRE(j1 == N);
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(j4 == N);
    //REQUIRE(collection2.size() == N);
    //REQUIRE(collection3.size() == N);
    //Std::sort(collection2.begin(), collection2.end());
    //Std::sort(collection3.begin(), collection3.end());
    //For (size_t i = 0; i < N; ++i) {
    //  REQUIRE(collection2[i] == i + 1);
    //  REQUIRE(collection3[i] == i + 2);
    //}
    //REQUIRE(pl.num_tokens() == 2 * N);
    //
    //J1 = j2 = j3 = j4 = 0;
    //For(size_t i = 0; i < mybuffer.size(); ++i){
    //  for(size_t j = 0; j < mybuffer[0].size(); ++j){
    //    mybuffer[i][j] = 0;
    //  }
    //}
    //Collection2.clear();
    //Collection3.clear();
    //Executor.run(taskflow).wait();
    //REQUIRE(j1 == N);
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(j4 == N);
    //REQUIRE(collection2.size() == N);
    //REQUIRE(collection3.size() == N);
    //Std::sort(collection2.begin(), collection2.end());
    //Std::sort(collection3.begin(), collection3.end());
    //For (size_t i = 0; i < N; ++i) {
    //  REQUIRE(collection2[i] == i + 1);
    //  REQUIRE(collection3[i] == i + 2);
    //}
    //REQUIRE(pl.num_tokens() == 3 * N);
  }
}

// four pipes (SPPS)
TEST_CASE("Pipeline.4P(SPPS).1L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(1, 1);
}

TEST_CASE("Pipeline.4P(SPPS).1L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(1, 2);
}

TEST_CASE("Pipeline.4P(SPPS).1L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(1, 3);
}

TEST_CASE("Pipeline.4P(SPPS).1L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(1, 4);
}

TEST_CASE("Pipeline.4P(SPPS).1L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(1, 5);
}

TEST_CASE("Pipeline.4P(SPPS).1L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(1, 6);
}

TEST_CASE("Pipeline.4P(SPPS).1L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(1, 7);
}

TEST_CASE("Pipeline.4P(SPPS).1L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(1, 8);
}

TEST_CASE("Pipeline.4P(SPPS).2L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(2, 1);
}

TEST_CASE("Pipeline.4P(SPPS).2L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(2, 2);
}

TEST_CASE("Pipeline.4P(SPPS).2L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(2, 3);
}

TEST_CASE("Pipeline.4P(SPPS).2L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(2, 4);
}

TEST_CASE("Pipeline.4P(SPPS).2L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(2, 5);
}

TEST_CASE("Pipeline.4P(SPPS).2L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(2, 6);
}

TEST_CASE("Pipeline.4P(SPPS).2L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(2, 7);
}

TEST_CASE("Pipeline.4P(SPPS).2L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(2, 8);
}

TEST_CASE("Pipeline.4P(SPPS).3L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(3, 1);
}

TEST_CASE("Pipeline.4P(SPPS).3L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(3, 2);
}

TEST_CASE("Pipeline.4P(SPPS).3L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(3, 3);
}

TEST_CASE("Pipeline.4P(SPPS).3L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(3, 4);
}

TEST_CASE("Pipeline.4P(SPPS).3L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(3, 5);
}

TEST_CASE("Pipeline.4P(SPPS).3L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(3, 6);
}

TEST_CASE("Pipeline.4P(SPPS).3L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(3, 7);
}

TEST_CASE("Pipeline.4P(SPPS).3L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(3, 8);
}

TEST_CASE("Pipeline.4P(SPPS).4L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(4, 1);
}

TEST_CASE("Pipeline.4P(SPPS).4L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(4, 2);
}

TEST_CASE("Pipeline.4P(SPPS).4L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(4, 3);
}

TEST_CASE("Pipeline.4P(SPPS).4L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(4, 4);
}

TEST_CASE("Pipeline.4P(SPPS).4L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(4, 5);
}

TEST_CASE("Pipeline.4P(SPPS).4L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(4, 6);
}

TEST_CASE("Pipeline.4P(SPPS).4L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(4, 7);
}

TEST_CASE("Pipeline.4P(SPPS).4L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(4, 8);
}

TEST_CASE("Pipeline.4P(SPPS).5L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(5, 1);
}

TEST_CASE("Pipeline.4P(SPPS).5L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(5, 2);
}

TEST_CASE("Pipeline.4P(SPPS).5L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(5, 3);
}

TEST_CASE("Pipeline.4P(SPPS).5L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(5, 4);
}

TEST_CASE("Pipeline.4P(SPPS).5L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(5, 5);
}

TEST_CASE("Pipeline.4P(SPPS).5L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(5, 6);
}

TEST_CASE("Pipeline.4P(SPPS).5L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(5, 7);
}

TEST_CASE("Pipeline.4P(SPPS).5L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(5, 8);
}

TEST_CASE("Pipeline.4P(SPPS).6L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(6, 1);
}

TEST_CASE("Pipeline.4P(SPPS).6L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(6, 2);
}

TEST_CASE("Pipeline.4P(SPPS).6L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(6, 3);
}

TEST_CASE("Pipeline.4P(SPPS).6L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(6, 4);
}

TEST_CASE("Pipeline.4P(SPPS).6L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(6, 5);
}

TEST_CASE("Pipeline.4P(SPPS).6L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(6, 6);
}

TEST_CASE("Pipeline.4P(SPPS).6L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(6, 7);
}

TEST_CASE("Pipeline.4P(SPPS).6L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(6, 8);
}

TEST_CASE("Pipeline.4P(SPPS).7L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(7, 1);
}

TEST_CASE("Pipeline.4P(SPPS).7L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(7, 2);
}

TEST_CASE("Pipeline.4P(SPPS).7L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(7, 3);
}

TEST_CASE("Pipeline.4P(SPPS).7L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(7, 4);
}

TEST_CASE("Pipeline.4P(SPPS).7L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(7, 5);
}

TEST_CASE("Pipeline.4P(SPPS).7L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(7, 6);
}

TEST_CASE("Pipeline.4P(SPPS).7L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(7, 7);
}

TEST_CASE("Pipeline.4P(SPPS).7L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(7, 8);
}

TEST_CASE("Pipeline.4P(SPPS).8L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(8, 1);
}

TEST_CASE("Pipeline.4P(SPPS).8L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(8, 2);
}

TEST_CASE("Pipeline.4P(SPPS).8L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(8, 3);
}

TEST_CASE("Pipeline.4P(SPPS).8L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(8, 4);
}

TEST_CASE("Pipeline.4P(SPPS).8L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(8, 5);
}

TEST_CASE("Pipeline.4P(SPPS).8L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(8, 6);
}

TEST_CASE("Pipeline.4P(SPPS).8L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(8, 7);
}

TEST_CASE("Pipeline.4P(SPPS).8L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPPS(8, 8);
}

// ----------------------------------------------------------------------------
// four pipes (SPPP), L lines, W workers
// ----------------------------------------------------------------------------

void pipeline_4P_SPPP(size_t L, unsigned w) {

  tf::Executor executor(w);

  const size_t maxN = 100;

  std::vector<int> source(maxN);
  std::iota(source.begin(), source.end(), 0);
  std::vector<std::array<int, 4>> mybuffer(L);

  for(size_t N = 0; N <= maxN; N++) {

    tf::Taskflow taskflow;
      
    size_t j1 = 0;
    std::atomic<size_t> j2 = 0;
    std::atomic<size_t> j3 = 0;
    std::atomic<size_t> j4 = 0;
    std::mutex mutex2;
    std::mutex mutex3;
    std::mutex mutex4;
    std::vector<int> collection2;
    std::vector<int> collection3;
    std::vector<int> collection4;
    size_t cnt = 1;

    tf::Pipeline pl(L, 
      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j1, &mybuffer, L](auto& pf) mutable {
        if(j1 == N) {
          pf.stop();
          return;
        }
        REQUIRE(j1 == source[j1]);
        REQUIRE(pf.token() % L == pf.line());
        mybuffer[pf.line()][pf.pipe()] = source[j1] + 1;
        // *(pf.output()) = source[j1] + 1;
        j1++;
      }},

      tf::Pipe{tf::PipeType::PARALLEL, [N, &j2, &mutex2, &collection2, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j2++ < N);
        // *pf.output() = *pf.input() + 1;
        {
          std::scoped_lock<std::mutex> lock(mutex2);
          REQUIRE(pf.token() % L == pf.line());
          mybuffer[pf.line()][pf.pipe()] = mybuffer[pf.line()][pf.pipe() - 1] + 1;
          collection2.push_back(mybuffer[pf.line()][pf.pipe() - 1]);
          //collection2.push_back(*pf.input());
        }
      }},

      tf::Pipe{tf::PipeType::PARALLEL, [N, &j3, &mutex3, &collection3, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j3++ < N);
        // *pf.output() = *pf.input() + 1;
        {
          std::scoped_lock<std::mutex> lock(mutex3);
          REQUIRE(pf.token() % L == pf.line());
          mybuffer[pf.line()][pf.pipe()] = mybuffer[pf.line()][pf.pipe() - 1] + 1;
          collection3.push_back(mybuffer[pf.line()][pf.pipe() - 1]);
          // collection3.push_back(*pf.input());
        }
      }},

      tf::Pipe{tf::PipeType::PARALLEL, [N, &j4, &mutex4, &collection4, &mybuffer, L](auto& pf) mutable {
        REQUIRE(j4++ < N);
        {
          std::scoped_lock<std::mutex> lock(mutex4);
          REQUIRE(pf.token() % L == pf.line());
          collection4.push_back(mybuffer[pf.line()][pf.pipe() - 1]);
          // collection4.push_back(*pf.input());
        }
      }}
    );
    
    auto pipeline = taskflow.composed_of(pl).name("module_of_pipeline");
    auto test = taskflow.emplace([&](){
      REQUIRE(j1 == N);
      REQUIRE(j2 == N);
      REQUIRE(j3 == N);
      REQUIRE(j4 == N);
      REQUIRE(collection2.size() == N);
      REQUIRE(collection3.size() == N);
      REQUIRE(collection4.size() == N);
      std::sort(collection2.begin(), collection2.end());
      std::sort(collection3.begin(), collection3.end());
      std::sort(collection4.begin(), collection4.end());
      for (size_t i = 0; i < N; ++i) {
        REQUIRE(collection2[i] == i + 1);
        REQUIRE(collection3[i] == i + 2);
        REQUIRE(collection4[i] == i + 3);
      }
      REQUIRE(pl.num_tokens() == N);
    }).name("test");
    
    pipeline.precede(test);

    executor.run_n(taskflow, 3, [&]() mutable {
      j1 = j2 = j3 = j4 = 0;
      for(size_t i = 0; i < mybuffer.size(); ++i){
        for(size_t j = 0; j < mybuffer[0].size(); ++j){
          mybuffer[i][j] = 0;
        }
      }
      collection2.clear();
      collection3.clear();
      collection4.clear();
      cnt++;
    }).get();
    
    //executor.run(taskflow).wait();
    //REQUIRE(j1 == N);
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(j4 == N);
    //REQUIRE(collection2.size() == N);
    //REQUIRE(collection3.size() == N);
    //REQUIRE(collection4.size() == N);
    //std::sort(collection2.begin(), collection2.end());
    //std::sort(collection3.begin(), collection3.end());
    //std::sort(collection4.begin(), collection4.end());
    //for (size_t i = 0; i < N; ++i) {
    //  REQUIRE(collection2[i] == i + 1);
    //  REQUIRE(collection3[i] == i + 2);
    //  REQUIRE(collection4[i] == i + 3);
    //}
    //REQUIRE(pl.num_tokens() == N);
    //
    //j1 = j2 = j3 = j4 = 0;
    //for(size_t i = 0; i < mybuffer.size(); ++i){
    //  for(size_t j = 0; j < mybuffer[0].size(); ++j){
    //    mybuffer[i][j] = 0;
    //  }
    //}
    //collection2.clear();
    //collection3.clear();
    //collection4.clear();
    //executor.run(taskflow).wait();
    //REQUIRE(j1 == N);
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(j4 == N);
    //REQUIRE(collection2.size() == N);
    //REQUIRE(collection3.size() == N);
    //REQUIRE(collection4.size() == N);
    //std::sort(collection2.begin(), collection2.end());
    //std::sort(collection3.begin(), collection3.end());
    //std::sort(collection4.begin(), collection4.end());
    //for (size_t i = 0; i < N; ++i) {
    //  REQUIRE(collection2[i] == i + 1);
    //  REQUIRE(collection3[i] == i + 2);
    //  REQUIRE(collection4[i] == i + 3);
    //}
    //REQUIRE(pl.num_tokens() == 2 * N);
    //
    //j1 = j2 = j3 = j4 = 0;
    //for(size_t i = 0; i < mybuffer.size(); ++i){
    //  for(size_t j = 0; j < mybuffer[0].size(); ++j){
    //    mybuffer[i][j] = 0;
    //  }
    //}
    //collection2.clear();
    //collection3.clear();
    //collection4.clear();
    //executor.run(taskflow).wait();
    //REQUIRE(j1 == N);
    //REQUIRE(j2 == N);
    //REQUIRE(j3 == N);
    //REQUIRE(j4 == N);
    //REQUIRE(collection2.size() == N);
    //REQUIRE(collection3.size() == N);
    //REQUIRE(collection4.size() == N);
    //std::sort(collection2.begin(), collection2.end());
    //std::sort(collection3.begin(), collection3.end());
    //std::sort(collection4.begin(), collection4.end());
    //for (size_t i = 0; i < N; ++i) {
    //  REQUIRE(collection2[i] == i + 1);
    //  REQUIRE(collection3[i] == i + 2);
    //  REQUIRE(collection4[i] == i + 3);
    //}
    //REQUIRE(pl.num_tokens() == 3 * N);
  }
}

// four pipes (SPPP)
TEST_CASE("Pipeline.4P(SPPP).1L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(1, 1);
}

TEST_CASE("Pipeline.4P(SPPP).1L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(1, 2);
}

TEST_CASE("Pipeline.4P(SPPP).1L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(1, 3);
}

TEST_CASE("Pipeline.4P(SPPP).1L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(1, 4);
}

TEST_CASE("Pipeline.4P(SPPP).1L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(1, 5);
}

TEST_CASE("Pipeline.4P(SPPP).1L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(1, 6);
}

TEST_CASE("Pipeline.4P(SPPP).1L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(1, 7);
}

TEST_CASE("Pipeline.4P(SPPP).1L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(1, 8);
}

TEST_CASE("Pipeline.4P(SPPP).2L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(2, 1);
}

TEST_CASE("Pipeline.4P(SPPP).2L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(2, 2);
}

TEST_CASE("Pipeline.4P(SPPP).2L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(2, 3);
}

TEST_CASE("Pipeline.4P(SPPP).2L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(2, 4);
}

TEST_CASE("Pipeline.4P(SPPP).2L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(2, 5);
}

TEST_CASE("Pipeline.4P(SPPP).2L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(2, 6);
}

TEST_CASE("Pipeline.4P(SPPP).2L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(2, 7);
}

TEST_CASE("Pipeline.4P(SPPP).2L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(2, 8);
}

TEST_CASE("Pipeline.4P(SPPP).3L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(3, 1);
}

TEST_CASE("Pipeline.4P(SPPP).3L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(3, 2);
}

TEST_CASE("Pipeline.4P(SPPP).3L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(3, 3);
}

TEST_CASE("Pipeline.4P(SPPP).3L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(3, 4);
}

TEST_CASE("Pipeline.4P(SPPP).3L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(3, 5);
}

TEST_CASE("Pipeline.4P(SPPP).3L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(3, 6);
}

TEST_CASE("Pipeline.4P(SPPP).3L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(3, 7);
}

TEST_CASE("Pipeline.4P(SPPP).3L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(3, 8);
}

TEST_CASE("Pipeline.4P(SPPP).4L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(4, 1);
}

TEST_CASE("Pipeline.4P(SPPP).4L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(4, 2);
}

TEST_CASE("Pipeline.4P(SPPP).4L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(4, 3);
}

TEST_CASE("Pipeline.4P(SPPP).4L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(4, 4);
}

TEST_CASE("Pipeline.4P(SPPP).4L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(4, 5);
}

TEST_CASE("Pipeline.4P(SPPP).4L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(4, 6);
}

TEST_CASE("Pipeline.4P(SPPP).4L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(4, 7);
}

TEST_CASE("Pipeline.4P(SPPP).4L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(4, 8);
}

TEST_CASE("Pipeline.4P(SPPP).5L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(5, 1);
}

TEST_CASE("Pipeline.4P(SPPP).5L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(5, 2);
}

TEST_CASE("Pipeline.4P(SPPP).5L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(5, 3);
}

TEST_CASE("Pipeline.4P(SPPP).5L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(5, 4);
}

TEST_CASE("Pipeline.4P(SPPP).5L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(5, 5);
}

TEST_CASE("Pipeline.4P(SPPP).5L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(5, 6);
}

TEST_CASE("Pipeline.4P(SPPP).5L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(5, 7);
}

TEST_CASE("Pipeline.4P(SPPP).5L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(5, 8);
}

TEST_CASE("Pipeline.4P(SPPP).6L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(6, 1);
}

TEST_CASE("Pipeline.4P(SPPP).6L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(6, 2);
}

TEST_CASE("Pipeline.4P(SPPP).6L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(6, 3);
}

TEST_CASE("Pipeline.4P(SPPP).6L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(6, 4);
}

TEST_CASE("Pipeline.4P(SPPP).6L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(6, 5);
}

TEST_CASE("Pipeline.4P(SPPP).6L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(6, 6);
}

TEST_CASE("Pipeline.4P(SPPP).6L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(6, 7);
}

TEST_CASE("Pipeline.4P(SPPP).6L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(6, 8);
}

TEST_CASE("Pipeline.4P(SPPP).7L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(7, 1);
}

TEST_CASE("Pipeline.4P(SPPP).7L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(7, 2);
}

TEST_CASE("Pipeline.4P(SPPP).7L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(7, 3);
}

TEST_CASE("Pipeline.4P(SPPP).7L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(7, 4);
}

TEST_CASE("Pipeline.4P(SPPP).7L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(7, 5);
}

TEST_CASE("Pipeline.4P(SPPP).7L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(7, 6);
}

TEST_CASE("Pipeline.4P(SPPP).7L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(7, 7);
}

TEST_CASE("Pipeline.4P(SPPP).7L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(7, 8);
}

TEST_CASE("Pipeline.4P(SPPP).8L.1W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(8, 1);
}

TEST_CASE("Pipeline.4P(SPPP).8L.2W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(8, 2);
}

TEST_CASE("Pipeline.4P(SPPP).8L.3W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(8, 3);
}

TEST_CASE("Pipeline.4P(SPPP).8L.4W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(8, 4);
}

TEST_CASE("Pipeline.4P(SPPP).8L.5W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(8, 5);
}

TEST_CASE("Pipeline.4P(SPPP).8L.6W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(8, 6);
}

TEST_CASE("Pipeline.4P(SPPP).8L.7W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(8, 7);
}

TEST_CASE("Pipeline.4P(SPPP).8L.8W" * doctest::timeout(300)) {
  pipeline_4P_SPPP(8, 8);
}  
*/


// ----------------------------------------------------------------------------
// three parallel pipelines. each pipeline with L lines.
// one with four pipes (SSSS), one with three pipes (SPP),
// One with two  Pipes (SP)
//
//      --> SSSS --> O --
//     |                 |
// O -> --> SSP  --> O -- --> O
//     |                 |
//      --> SP   --> O --
//
// ----------------------------------------------------------------------------

void three_parallel_pipelines(size_t L, unsigned w) {

  tf::Executor executor(w);

  const size_t maxN = 100;

  std::vector<int> source(maxN);
  std::iota(source.begin(), source.end(), 0);
  std::vector<std::array<int, 4>> mybuffer1(L);
  std::vector<std::array<int, 3>> mybuffer2(L);
  std::vector<std::array<int, 2>> mybuffer3(L);

  for(size_t N = 0; N <= maxN; N++) {

    tf::Taskflow taskflow;
      
    size_t j1_1 = 0, j1_2 = 0, j1_3 = 0, j1_4 = 0;
    size_t cnt1 = 1;

    // pipeline 1 is SSSS    
    tf::Pipeline pl1(L, 
      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j1_1, &mybuffer1, L](auto& pf) mutable {
        if(j1_1 == N) {
          pf.stop();
          return;
        }
        REQUIRE(j1_1 == source[j1_1]);
        REQUIRE(pf.token() % L == pf.line());
        mybuffer1[pf.line()][pf.pipe()] = source[j1_1] + 1;
        j1_1++;
      }},

      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j1_2, &mybuffer1, L](auto& pf) mutable {
        REQUIRE(j1_2 < N);
        REQUIRE(pf.token() % L == pf.line());
        REQUIRE(source[j1_2] + 1 == mybuffer1[pf.line()][pf.pipe() - 1]);
        mybuffer1[pf.line()][pf.pipe()] = source[j1_2] + 1;
        j1_2++;
      }},

      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j1_3, &mybuffer1, L](auto& pf) mutable {
        REQUIRE(j1_3 < N);
        REQUIRE(pf.token() % L == pf.line());
        REQUIRE(source[j1_3] + 1 == mybuffer1[pf.line()][pf.pipe() - 1]);
        mybuffer1[pf.line()][pf.pipe()] = source[j1_3] + 1;
        j1_3++;
      }},

      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j1_4, &mybuffer1, L](auto& pf) mutable {
        REQUIRE(j1_4 < N);
        REQUIRE(pf.token() % L == pf.line());
        REQUIRE(source[j1_4] + 1 == mybuffer1[pf.line()][pf.pipe() - 1]);
        j1_4++;
      }}
    );
    
    auto pipeline1 = taskflow.composed_of(pl1).name("module_of_pipeline1");
    auto test1 = taskflow.emplace([&](){
      REQUIRE(j1_1 == N);
      REQUIRE(j1_2 == N);
      REQUIRE(j1_3 == N);
      REQUIRE(j1_4 == N);
      REQUIRE(pl1.num_tokens() == cnt1 * N);
    }).name("test1");
    
    pipeline1.precede(test1);



    // the followings are definitions for pipeline 2    
    size_t j2_1 = 0, j2_2 = 0;
    std::atomic<size_t> j2_3 = 0;
    std::mutex mutex2_3;
    std::vector<int> collection2_3;
    size_t cnt2 = 1;

    // pipeline 2 is SSP
    tf::Pipeline pl2(L, 
      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j2_1, &mybuffer2, L](auto& pf) mutable {
        if(j2_1 == N) {
          pf.stop();
          return;
        }
        REQUIRE(j2_1 == source[j2_1]);
        REQUIRE(pf.token() % L == pf.line());
        mybuffer2[pf.line()][pf.pipe()] = source[j2_1] + 1;
        j2_1++;
      }},

      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j2_2, &mybuffer2, L](auto& pf) mutable {
        REQUIRE(j2_2 < N);
        REQUIRE(source[j2_2] + 1 == mybuffer2[pf.line()][pf.pipe() - 1]);
        REQUIRE(pf.token() % L == pf.line());
        mybuffer2[pf.line()][pf.pipe()] = source[j2_2] + 1;
        j2_2++;
      }},

      tf::Pipe{tf::PipeType::PARALLEL, [N, &j2_3, &mutex2_3, &collection2_3, &mybuffer2, L](auto& pf) mutable {
        REQUIRE(j2_3++ < N);
        {
          std::scoped_lock<std::mutex> lock(mutex2_3);
          REQUIRE(pf.token() % L == pf.line());  
          collection2_3.push_back(mybuffer2[pf.line()][pf.pipe() - 1]);
        }
      }}
    );

    auto pipeline2 = taskflow.composed_of(pl2).name("module_of_pipeline2");
    auto test2 = taskflow.emplace([&](){
      REQUIRE(j2_1 == N);
      REQUIRE(j2_2 == N);
      REQUIRE(j2_3 == N);
      REQUIRE(collection2_3.size() == N);
      
      std::sort(collection2_3.begin(), collection2_3.end());
      for (size_t i = 0; i < N; ++i) {
        REQUIRE(collection2_3[i] == i + 1);
      }
      REQUIRE(pl2.num_tokens() == cnt2 * N);
    }).name("test2");

    pipeline2.precede(test2);



    // the followings are definitions for pipeline 3
    size_t j3_1 = 0;
    std::atomic<size_t> j3_2 = 0;
    std::mutex mutex3_2;
    std::vector<int> collection3_2;
    size_t cnt3 = 1;

    // pipeline 3 is SP
    tf::Pipeline pl3(L,
      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j3_1, &mybuffer3, L](auto& pf) mutable {
        if(j3_1 == N) {
          pf.stop();
          return;
        }
        REQUIRE(j3_1 == source[j3_1]);
        REQUIRE(pf.token() % L == pf.line());
        mybuffer3[pf.line()][pf.pipe()] = source[j3_1] + 1;
        j3_1++;
      }},

      tf::Pipe{tf::PipeType::PARALLEL, 
      [N, &collection3_2, &mutex3_2, &j3_2, &mybuffer3, L](auto& pf) mutable {
        REQUIRE(j3_2++ < N);
        {
          std::scoped_lock<std::mutex> lock(mutex3_2);
          REQUIRE(pf.token() % L == pf.line());
          collection3_2.push_back(mybuffer3[pf.line()][pf.pipe() - 1]);
        }
      }}
    );
    
    auto pipeline3 = taskflow.composed_of(pl3).name("module_of_pipeline3");
    auto test3 = taskflow.emplace([&](){
      REQUIRE(j3_1 == N);
      REQUIRE(j3_2 == N);
      
      std::sort(collection3_2.begin(), collection3_2.end());
      for(size_t i = 0; i < N; i++) {
        REQUIRE(collection3_2[i] == i + 1);
      }
      REQUIRE(pl3.num_tokens() == cnt3 * N);
    }).name("test3");
    
    pipeline3.precede(test3);


    auto initial  = taskflow.emplace([](){}).name("initial");
    auto terminal = taskflow.emplace([](){}).name("terminal");

    initial.precede(pipeline1, pipeline2, pipeline3);
    terminal.succeed(test1, test2, test3);

    //taskflow.dump(std::cout);

    executor.run_n(taskflow, 3, [&]() mutable {
      // reset variables for pipeline 1
      j1_1 = j1_2 = j1_3 = j1_4 = 0;
      for(size_t i = 0; i < mybuffer1.size(); ++i){
        for(size_t j = 0; j < mybuffer1[0].size(); ++j){
          mybuffer1[i][j] = 0;
        }
      }
      cnt1++;

      // reset variables for pipeline 2
      j2_1 = j2_2 = j2_3 = 0;
      collection2_3.clear();
      for(size_t i = 0; i < mybuffer2.size(); ++i){
        for(size_t j = 0; j < mybuffer2[0].size(); ++j){
          mybuffer2[i][j] = 0;
        }
      }
      cnt2++;

      // reset variables for pipeline 3
      j3_1 = j3_2 = 0;
      collection3_2.clear();
      for(size_t i = 0; i < mybuffer3.size(); ++i){
        for(size_t j = 0; j < mybuffer3[0].size(); ++j){
          mybuffer3[i][j] = 0;
        }
      }
      cnt3++;
    }).get();
    
    
  }
}

// three parallel piplines
TEST_CASE("Three.Parallel.Pipelines.1L.1W" * doctest::timeout(300)) {
  three_parallel_pipelines(1, 1);
}

TEST_CASE("Three.Parallel.Pipelines.1L.2W" * doctest::timeout(300)) {
  three_parallel_pipelines(1, 2);
}

TEST_CASE("Three.Parallel.Pipelines.1L.3W" * doctest::timeout(300)) {
  three_parallel_pipelines(1, 3);
}

TEST_CASE("Three.Parallel.Pipelines.1L.4W" * doctest::timeout(300)) {
  three_parallel_pipelines(1, 4);
}

TEST_CASE("Three.Parallel.Pipelines.1L.5W" * doctest::timeout(300)) {
  three_parallel_pipelines(1, 5);
}

TEST_CASE("Three.Parallel.Pipelines.1L.6W" * doctest::timeout(300)) {
  three_parallel_pipelines(1, 6);
}

TEST_CASE("Three.Parallel.Pipelines.1L.7W" * doctest::timeout(300)) {
  three_parallel_pipelines(1, 7);
}

TEST_CASE("Three.Parallel.Pipelines.1L.8W" * doctest::timeout(300)) {
  three_parallel_pipelines(1, 8);
}

TEST_CASE("Three.Parallel.Pipelines.2L.1W" * doctest::timeout(300)) {
  three_parallel_pipelines(2, 1);
}

TEST_CASE("Three.Parallel.Pipelines.2L.2W" * doctest::timeout(300)) {
  three_parallel_pipelines(2, 2);
}

TEST_CASE("Three.Parallel.Pipelines.2L.3W" * doctest::timeout(300)) {
  three_parallel_pipelines(2, 3);
}

TEST_CASE("Three.Parallel.Pipelines.2L.4W" * doctest::timeout(300)) {
  three_parallel_pipelines(2, 4);
}

TEST_CASE("Three.Parallel.Pipelines.2L.5W" * doctest::timeout(300)) {
  three_parallel_pipelines(2, 5);
}

TEST_CASE("Three.Parallel.Pipelines.2L.6W" * doctest::timeout(300)) {
  three_parallel_pipelines(2, 6);
}

TEST_CASE("Three.Parallel.Pipelines.2L.7W" * doctest::timeout(300)) {
  three_parallel_pipelines(2, 7);
}

TEST_CASE("Three.Parallel.Pipelines.2L.8W" * doctest::timeout(300)) {
  three_parallel_pipelines(2, 8);
}

TEST_CASE("Three.Parallel.Pipelines.3L.1W" * doctest::timeout(300)) {
  three_parallel_pipelines(3, 1);
}

TEST_CASE("Three.Parallel.Pipelines.3L.2W" * doctest::timeout(300)) {
  three_parallel_pipelines(3, 2);
}

TEST_CASE("Three.Parallel.Pipelines.3L.3W" * doctest::timeout(300)) {
  three_parallel_pipelines(3, 3);
}

TEST_CASE("Three.Parallel.Pipelines.3L.4W" * doctest::timeout(300)) {
  three_parallel_pipelines(3, 4);
}

TEST_CASE("Three.Parallel.Pipelines.3L.5W" * doctest::timeout(300)) {
  three_parallel_pipelines(3, 5);
}

TEST_CASE("Three.Parallel.Pipelines.3L.6W" * doctest::timeout(300)) {
  three_parallel_pipelines(3, 6);
}

TEST_CASE("Three.Parallel.Pipelines.3L.7W" * doctest::timeout(300)) {
  three_parallel_pipelines(3, 7);
}

TEST_CASE("Three.Parallel.Pipelines.3L.8W" * doctest::timeout(300)) {
  three_parallel_pipelines(3, 8);
}

TEST_CASE("Three.Parallel.Pipelines.4L.1W" * doctest::timeout(300)) {
  three_parallel_pipelines(4, 1);
}

TEST_CASE("Three.Parallel.Pipelines.4L.2W" * doctest::timeout(300)) {
  three_parallel_pipelines(4, 2);
}

TEST_CASE("Three.Parallel.Pipelines.4L.3W" * doctest::timeout(300)) {
  three_parallel_pipelines(4, 3);
}

TEST_CASE("Three.Parallel.Pipelines.4L.4W" * doctest::timeout(300)) {
  three_parallel_pipelines(4, 4);
}

TEST_CASE("Three.Parallel.Pipelines.4L.5W" * doctest::timeout(300)) {
  three_parallel_pipelines(4, 5);
}

TEST_CASE("Three.Parallel.Pipelines.4L.6W" * doctest::timeout(300)) {
  three_parallel_pipelines(4, 6);
}

TEST_CASE("Three.Parallel.Pipelines.4L.7W" * doctest::timeout(300)) {
  three_parallel_pipelines(4, 7);
}

TEST_CASE("Three.Parallel.Pipelines.4L.8W" * doctest::timeout(300)) {
  three_parallel_pipelines(4, 8);
}

TEST_CASE("Three.Parallel.Pipelines.5L.1W" * doctest::timeout(300)) {
  three_parallel_pipelines(5, 1);
}

TEST_CASE("Three.Parallel.Pipelines.5L.2W" * doctest::timeout(300)) {
  three_parallel_pipelines(5, 2);
}

TEST_CASE("Three.Parallel.Pipelines.5L.3W" * doctest::timeout(300)) {
  three_parallel_pipelines(5, 3);
}

TEST_CASE("Three.Parallel.Pipelines.5L.4W" * doctest::timeout(300)) {
  three_parallel_pipelines(5, 4);
}

TEST_CASE("Three.Parallel.Pipelines.5L.5W" * doctest::timeout(300)) {
  three_parallel_pipelines(5, 5);
}

TEST_CASE("Three.Parallel.Pipelines.5L.6W" * doctest::timeout(300)) {
  three_parallel_pipelines(5, 6);
}

TEST_CASE("Three.Parallel.Pipelines.5L.7W" * doctest::timeout(300)) {
  three_parallel_pipelines(5, 7);
}

TEST_CASE("Three.Parallel.Pipelines.5L.8W" * doctest::timeout(300)) {
  three_parallel_pipelines(5, 8);
}

TEST_CASE("Three.Parallel.Pipelines.6L.1W" * doctest::timeout(300)) {
  three_parallel_pipelines(6, 1);
}

TEST_CASE("Three.Parallel.Pipelines.6L.2W" * doctest::timeout(300)) {
  three_parallel_pipelines(6, 2);
}

TEST_CASE("Three.Parallel.Pipelines.6L.3W" * doctest::timeout(300)) {
  three_parallel_pipelines(6, 3);
}

TEST_CASE("Three.Parallel.Pipelines.6L.4W" * doctest::timeout(300)) {
  three_parallel_pipelines(6, 4);
}

TEST_CASE("Three.Parallel.Pipelines.6L.5W" * doctest::timeout(300)) {
  three_parallel_pipelines(6, 5);
}

TEST_CASE("Three.Parallel.Pipelines.6L.6W" * doctest::timeout(300)) {
  three_parallel_pipelines(6, 6);
}

TEST_CASE("Three.Parallel.Pipelines.6L.7W" * doctest::timeout(300)) {
  three_parallel_pipelines(6, 7);
}

TEST_CASE("Three.Parallel.Pipelines.6L.8W" * doctest::timeout(300)) {
  three_parallel_pipelines(6, 8);
}

TEST_CASE("Three.Parallel.Pipelines.7L.1W" * doctest::timeout(300)) {
  three_parallel_pipelines(7, 1);
}

TEST_CASE("Three.Parallel.Pipelines.7L.2W" * doctest::timeout(300)) {
  three_parallel_pipelines(7, 2);
}

TEST_CASE("Three.Parallel.Pipelines.7L.3W" * doctest::timeout(300)) {
  three_parallel_pipelines(7, 3);
}

TEST_CASE("Three.Parallel.Pipelines.7L.4W" * doctest::timeout(300)) {
  three_parallel_pipelines(7, 4);
}

TEST_CASE("Three.Parallel.Pipelines.7L.5W" * doctest::timeout(300)) {
  three_parallel_pipelines(7, 5);
}

TEST_CASE("Three.Parallel.Pipelines.7L.6W" * doctest::timeout(300)) {
  three_parallel_pipelines(7, 6);
}

TEST_CASE("Three.Parallel.Pipelines.7L.7W" * doctest::timeout(300)) {
  three_parallel_pipelines(7, 7);
}

TEST_CASE("Three.Parallel.Pipelines.7L.8W" * doctest::timeout(300)) {
  three_parallel_pipelines(7, 8);
}

TEST_CASE("Three.Parallel.Pipelines.8L.1W" * doctest::timeout(300)) {
  three_parallel_pipelines(8, 1);
}

TEST_CASE("Three.Parallel.Pipelines.8L.2W" * doctest::timeout(300)) {
  three_parallel_pipelines(8, 2);
}

TEST_CASE("Three.Parallel.Pipelines.8L.3W" * doctest::timeout(300)) {
  three_parallel_pipelines(8, 3);
}

TEST_CASE("Three.Parallel.Pipelines.8L.4W" * doctest::timeout(300)) {
  three_parallel_pipelines(8, 4);
}

TEST_CASE("Three.Parallel.Pipelines.8L.5W" * doctest::timeout(300)) {
  three_parallel_pipelines(8, 5);
}

TEST_CASE("Three.Parallel.Pipelines.8L.6W" * doctest::timeout(300)) {
  three_parallel_pipelines(8, 6);
}

TEST_CASE("Three.Parallel.Pipelines.8L.7W" * doctest::timeout(300)) {
  three_parallel_pipelines(8, 7);
}

TEST_CASE("Three.Parallel.Pipelines.8L.8W" * doctest::timeout(300)) {
  three_parallel_pipelines(8, 8);
}

// ----------------------------------------------------------------------------
// three concatenated pipelines. each pipeline with L lines.
// one with four pipes (SSSS), one with three pipes (SSP),
// One with two  Pipes (SP)
//
// O -> SSSS -> O -> SSP -> O -> SP -> O
//
// ----------------------------------------------------------------------------

void three_concatenated_pipelines(size_t L, unsigned w) {

  tf::Executor executor(w);

  const size_t maxN = 100;

  std::vector<int> source(maxN);
  std::iota(source.begin(), source.end(), 0);
  std::vector<std::array<int, 4>> mybuffer1(L);
  std::vector<std::array<int, 3>> mybuffer2(L);
  std::vector<std::array<int, 2>> mybuffer3(L);

  for(size_t N = 0; N <= maxN; N++) {

    tf::Taskflow taskflow;
      
    size_t j1_1 = 0, j1_2 = 0, j1_3 = 0, j1_4 = 0;
    size_t cnt1 = 1;

    // pipeline 1 is SSSS    
    tf::Pipeline pl1(L, 
      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j1_1, &mybuffer1, L](auto& pf) mutable {
        if(j1_1 == N) {
          pf.stop();
          return;
        }
        REQUIRE(j1_1 == source[j1_1]);
        REQUIRE(pf.token() % L == pf.line());
        mybuffer1[pf.line()][pf.pipe()] = source[j1_1] + 1;
        j1_1++;
      }},

      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j1_2, &mybuffer1, L](auto& pf) mutable {
        REQUIRE(j1_2 < N);
        REQUIRE(pf.token() % L == pf.line());
        REQUIRE(source[j1_2] + 1 == mybuffer1[pf.line()][pf.pipe() - 1]);
        mybuffer1[pf.line()][pf.pipe()] = source[j1_2] + 1;
        j1_2++;
      }},

      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j1_3, &mybuffer1, L](auto& pf) mutable {
        REQUIRE(j1_3 < N);
        REQUIRE(pf.token() % L == pf.line());
        REQUIRE(source[j1_3] + 1 == mybuffer1[pf.line()][pf.pipe() - 1]);
        mybuffer1[pf.line()][pf.pipe()] = source[j1_3] + 1;
        j1_3++;
      }},

      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j1_4, &mybuffer1, L](auto& pf) mutable {
        REQUIRE(j1_4 < N);
        REQUIRE(pf.token() % L == pf.line());
        REQUIRE(source[j1_4] + 1 == mybuffer1[pf.line()][pf.pipe() - 1]);
        j1_4++;
      }}
    );
    
    auto pipeline1 = taskflow.composed_of(pl1).name("module_of_pipeline1");
    auto test1 = taskflow.emplace([&](){
      REQUIRE(j1_1 == N);
      REQUIRE(j1_2 == N);
      REQUIRE(j1_3 == N);
      REQUIRE(j1_4 == N);
      REQUIRE(pl1.num_tokens() == cnt1 * N);
    }).name("test1");
    


    // the followings are definitions for pipeline 2    
    size_t j2_1 = 0, j2_2 = 0;
    std::atomic<size_t> j2_3 = 0;
    std::mutex mutex2_3;
    std::vector<int> collection2_3;
    size_t cnt2 = 1;

    // pipeline 2 is SSP
    tf::Pipeline pl2(L, 
      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j2_1, &mybuffer2, L](auto& pf) mutable {
        if(j2_1 == N) {
          pf.stop();
          return;
        }
        REQUIRE(j2_1 == source[j2_1]);
        REQUIRE(pf.token() % L == pf.line());
        mybuffer2[pf.line()][pf.pipe()] = source[j2_1] + 1;
        j2_1++;
      }},

      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j2_2, &mybuffer2, L](auto& pf) mutable {
        REQUIRE(j2_2 < N);
        REQUIRE(source[j2_2] + 1 == mybuffer2[pf.line()][pf.pipe() - 1]);
        REQUIRE(pf.token() % L == pf.line());
        mybuffer2[pf.line()][pf.pipe()] = source[j2_2] + 1;
        j2_2++;
      }},

      tf::Pipe{tf::PipeType::PARALLEL, [N, &j2_3, &mutex2_3, &collection2_3, &mybuffer2, L](auto& pf) mutable {
        REQUIRE(j2_3++ < N);
        {
          std::scoped_lock<std::mutex> lock(mutex2_3);
          REQUIRE(pf.token() % L == pf.line());  
          collection2_3.push_back(mybuffer2[pf.line()][pf.pipe() - 1]);
        }
      }}
    );

    auto pipeline2 = taskflow.composed_of(pl2).name("module_of_pipeline2");
    auto test2 = taskflow.emplace([&](){
      REQUIRE(j2_1 == N);
      REQUIRE(j2_2 == N);
      REQUIRE(j2_3 == N);
      REQUIRE(collection2_3.size() == N);
      
      std::sort(collection2_3.begin(), collection2_3.end());
      for (size_t i = 0; i < N; ++i) {
        REQUIRE(collection2_3[i] == i + 1);
      }
      REQUIRE(pl2.num_tokens() == cnt2 * N);
    }).name("test2");



    // the followings are definitions for pipeline 3
    size_t j3_1 = 0;
    std::atomic<size_t> j3_2 = 0;
    std::mutex mutex3_2;
    std::vector<int> collection3_2;
    size_t cnt3 = 1;

    // pipeline 3 is SP
    tf::Pipeline pl3(L,
      tf::Pipe{tf::PipeType::SERIAL, [N, &source, &j3_1, &mybuffer3, L](auto& pf) mutable {
        if(j3_1 == N) {
          pf.stop();
          return;
        }
        REQUIRE(j3_1 == source[j3_1]);
        REQUIRE(pf.token() % L == pf.line());
        mybuffer3[pf.line()][pf.pipe()] = source[j3_1] + 1;
        j3_1++;
      }},

      tf::Pipe{tf::PipeType::PARALLEL, 
      [N, &collection3_2, &mutex3_2, &j3_2, &mybuffer3, L](auto& pf) mutable {
        REQUIRE(j3_2++ < N);
        {
          std::scoped_lock<std::mutex> lock(mutex3_2);
          REQUIRE(pf.token() % L == pf.line());
          collection3_2.push_back(mybuffer3[pf.line()][pf.pipe() - 1]);
        }
      }}
    );
    
    auto pipeline3 = taskflow.composed_of(pl3).name("module_of_pipeline3");
    auto test3 = taskflow.emplace([&](){
      REQUIRE(j3_1 == N);
      REQUIRE(j3_2 == N);
      
      std::sort(collection3_2.begin(), collection3_2.end());
      for(size_t i = 0; i < N; i++) {
        REQUIRE(collection3_2[i] == i + 1);
      }
      REQUIRE(pl3.num_tokens() == cnt3 * N);
    }).name("test3");
    

    auto initial  = taskflow.emplace([](){}).name("initial");
    auto terminal = taskflow.emplace([](){}).name("terminal");

    initial.precede(pipeline1);
    pipeline1.precede(test1);
    test1.precede(pipeline2);
    pipeline2.precede(test2);
    test2.precede(pipeline3);
    pipeline3.precede(test3);
    test3.precede(terminal);

    //taskflow.dump(std::cout);

    executor.run_n(taskflow, 3, [&]() mutable {
      // reset variables for pipeline 1
      j1_1 = j1_2 = j1_3 = j1_4 = 0;
      for(size_t i = 0; i < mybuffer1.size(); ++i){
        for(size_t j = 0; j < mybuffer1[0].size(); ++j){
          mybuffer1[i][j] = 0;
        }
      }
      cnt1++;

      // reset variables for pipeline 2
      j2_1 = j2_2 = j2_3 = 0;
      collection2_3.clear();
      for(size_t i = 0; i < mybuffer2.size(); ++i){
        for(size_t j = 0; j < mybuffer2[0].size(); ++j){
          mybuffer2[i][j] = 0;
        }
      }
      cnt2++;

      // reset variables for pipeline 3
      j3_1 = j3_2 = 0;
      collection3_2.clear();
      for(size_t i = 0; i < mybuffer3.size(); ++i){
        for(size_t j = 0; j < mybuffer3[0].size(); ++j){
          mybuffer3[i][j] = 0;
        }
      }
      cnt3++;
    }).get();
    
    
  }
}

// three concatenated piplines
TEST_CASE("Three.Concatenated.Pipelines.1L.1W" * doctest::timeout(300)) {
  three_concatenated_pipelines(1, 1);
}

TEST_CASE("Three.Concatenated.Pipelines.1L.2W" * doctest::timeout(300)) {
  three_concatenated_pipelines(1, 2);
}

TEST_CASE("Three.Concatenated.Pipelines.1L.3W" * doctest::timeout(300)) {
  three_concatenated_pipelines(1, 3);
}

TEST_CASE("Three.Concatenated.Pipelines.1L.4W" * doctest::timeout(300)) {
  three_concatenated_pipelines(1, 4);
}

TEST_CASE("Three.Concatenated.Pipelines.1L.5W" * doctest::timeout(300)) {
  three_concatenated_pipelines(1, 5);
}

TEST_CASE("Three.Concatenated.Pipelines.1L.6W" * doctest::timeout(300)) {
  three_concatenated_pipelines(1, 6);
}

TEST_CASE("Three.Concatenated.Pipelines.1L.7W" * doctest::timeout(300)) {
  three_concatenated_pipelines(1, 7);
}

TEST_CASE("Three.Concatenated.Pipelines.1L.8W" * doctest::timeout(300)) {
  three_concatenated_pipelines(1, 8);
}

TEST_CASE("Three.Concatenated.Pipelines.2L.1W" * doctest::timeout(300)) {
  three_concatenated_pipelines(2, 1);
}

TEST_CASE("Three.Concatenated.Pipelines.2L.2W" * doctest::timeout(300)) {
  three_concatenated_pipelines(2, 2);
}

TEST_CASE("Three.Concatenated.Pipelines.2L.3W" * doctest::timeout(300)) {
  three_concatenated_pipelines(2, 3);
}

TEST_CASE("Three.Concatenated.Pipelines.2L.4W" * doctest::timeout(300)) {
  three_concatenated_pipelines(2, 4);
}

TEST_CASE("Three.Concatenated.Pipelines.2L.5W" * doctest::timeout(300)) {
  three_concatenated_pipelines(2, 5);
}

TEST_CASE("Three.Concatenated.Pipelines.2L.6W" * doctest::timeout(300)) {
  three_concatenated_pipelines(2, 6);
}

TEST_CASE("Three.Concatenated.Pipelines.2L.7W" * doctest::timeout(300)) {
  three_concatenated_pipelines(2, 7);
}

TEST_CASE("Three.Concatenated.Pipelines.2L.8W" * doctest::timeout(300)) {
  three_concatenated_pipelines(2, 8);
}

TEST_CASE("Three.Concatenated.Pipelines.3L.1W" * doctest::timeout(300)) {
  three_concatenated_pipelines(3, 1);
}

TEST_CASE("Three.Concatenated.Pipelines.3L.2W" * doctest::timeout(300)) {
  three_concatenated_pipelines(3, 2);
}

TEST_CASE("Three.Concatenated.Pipelines.3L.3W" * doctest::timeout(300)) {
  three_concatenated_pipelines(3, 3);
}

TEST_CASE("Three.Concatenated.Pipelines.3L.4W" * doctest::timeout(300)) {
  three_concatenated_pipelines(3, 4);
}

TEST_CASE("Three.Concatenated.Pipelines.3L.5W" * doctest::timeout(300)) {
  three_concatenated_pipelines(3, 5);
}

TEST_CASE("Three.Concatenated.Pipelines.3L.6W" * doctest::timeout(300)) {
  three_concatenated_pipelines(3, 6);
}

TEST_CASE("Three.Concatenated.Pipelines.3L.7W" * doctest::timeout(300)) {
  three_concatenated_pipelines(3, 7);
}

TEST_CASE("Three.Concatenated.Pipelines.3L.8W" * doctest::timeout(300)) {
  three_concatenated_pipelines(3, 8);
}

TEST_CASE("Three.Concatenated.Pipelines.4L.1W" * doctest::timeout(300)) {
  three_concatenated_pipelines(4, 1);
}

TEST_CASE("Three.Concatenated.Pipelines.4L.2W" * doctest::timeout(300)) {
  three_concatenated_pipelines(4, 2);
}

TEST_CASE("Three.Concatenated.Pipelines.4L.3W" * doctest::timeout(300)) {
  three_concatenated_pipelines(4, 3);
}

TEST_CASE("Three.Concatenated.Pipelines.4L.4W" * doctest::timeout(300)) {
  three_concatenated_pipelines(4, 4);
}

TEST_CASE("Three.Concatenated.Pipelines.4L.5W" * doctest::timeout(300)) {
  three_concatenated_pipelines(4, 5);
}

TEST_CASE("Three.Concatenated.Pipelines.4L.6W" * doctest::timeout(300)) {
  three_concatenated_pipelines(4, 6);
}

TEST_CASE("Three.Concatenated.Pipelines.4L.7W" * doctest::timeout(300)) {
  three_concatenated_pipelines(4, 7);
}

TEST_CASE("Three.Concatenated.Pipelines.4L.8W" * doctest::timeout(300)) {
  three_concatenated_pipelines(4, 8);
}

TEST_CASE("Three.Concatenated.Pipelines.5L.1W" * doctest::timeout(300)) {
  three_concatenated_pipelines(5, 1);
}

TEST_CASE("Three.Concatenated.Pipelines.5L.2W" * doctest::timeout(300)) {
  three_concatenated_pipelines(5, 2);
}

TEST_CASE("Three.Concatenated.Pipelines.5L.3W" * doctest::timeout(300)) {
  three_concatenated_pipelines(5, 3);
}

TEST_CASE("Three.Concatenated.Pipelines.5L.4W" * doctest::timeout(300)) {
  three_concatenated_pipelines(5, 4);
}

TEST_CASE("Three.Concatenated.Pipelines.5L.5W" * doctest::timeout(300)) {
  three_concatenated_pipelines(5, 5);
}

TEST_CASE("Three.Concatenated.Pipelines.5L.6W" * doctest::timeout(300)) {
  three_concatenated_pipelines(5, 6);
}

TEST_CASE("Three.Concatenated.Pipelines.5L.7W" * doctest::timeout(300)) {
  three_concatenated_pipelines(5, 7);
}

TEST_CASE("Three.Concatenated.Pipelines.5L.8W" * doctest::timeout(300)) {
  three_concatenated_pipelines(5, 8);
}

TEST_CASE("Three.Concatenated.Pipelines.6L.1W" * doctest::timeout(300)) {
  three_concatenated_pipelines(6, 1);
}

TEST_CASE("Three.Concatenated.Pipelines.6L.2W" * doctest::timeout(300)) {
  three_concatenated_pipelines(6, 2);
}

TEST_CASE("Three.Concatenated.Pipelines.6L.3W" * doctest::timeout(300)) {
  three_concatenated_pipelines(6, 3);
}

TEST_CASE("Three.Concatenated.Pipelines.6L.4W" * doctest::timeout(300)) {
  three_concatenated_pipelines(6, 4);
}

TEST_CASE("Three.Concatenated.Pipelines.6L.5W" * doctest::timeout(300)) {
  three_concatenated_pipelines(6, 5);
}

TEST_CASE("Three.Concatenated.Pipelines.6L.6W" * doctest::timeout(300)) {
  three_concatenated_pipelines(6, 6);
}

TEST_CASE("Three.Concatenated.Pipelines.6L.7W" * doctest::timeout(300)) {
  three_concatenated_pipelines(6, 7);
}

TEST_CASE("Three.Concatenated.Pipelines.6L.8W" * doctest::timeout(300)) {
  three_concatenated_pipelines(6, 8);
}

TEST_CASE("Three.Concatenated.Pipelines.7L.1W" * doctest::timeout(300)) {
  three_concatenated_pipelines(7, 1);
}

TEST_CASE("Three.Concatenated.Pipelines.7L.2W" * doctest::timeout(300)) {
  three_concatenated_pipelines(7, 2);
}

TEST_CASE("Three.Concatenated.Pipelines.7L.3W" * doctest::timeout(300)) {
  three_concatenated_pipelines(7, 3);
}

TEST_CASE("Three.Concatenated.Pipelines.7L.4W" * doctest::timeout(300)) {
  three_concatenated_pipelines(7, 4);
}

TEST_CASE("Three.Concatenated.Pipelines.7L.5W" * doctest::timeout(300)) {
  three_concatenated_pipelines(7, 5);
}

TEST_CASE("Three.Concatenated.Pipelines.7L.6W" * doctest::timeout(300)) {
  three_concatenated_pipelines(7, 6);
}

TEST_CASE("Three.Concatenated.Pipelines.7L.7W" * doctest::timeout(300)) {
  three_concatenated_pipelines(7, 7);
}

TEST_CASE("Three.Concatenated.Pipelines.7L.8W" * doctest::timeout(300)) {
  three_concatenated_pipelines(7, 8);
}

TEST_CASE("Three.Concatenated.Pipelines.8L.1W" * doctest::timeout(300)) {
  three_concatenated_pipelines(8, 1);
}

TEST_CASE("Three.Concatenated.Pipelines.8L.2W" * doctest::timeout(300)) {
  three_concatenated_pipelines(8, 2);
}

TEST_CASE("Three.Concatenated.Pipelines.8L.3W" * doctest::timeout(300)) {
  three_concatenated_pipelines(8, 3);
}

TEST_CASE("Three.Concatenated.Pipelines.8L.4W" * doctest::timeout(300)) {
  three_concatenated_pipelines(8, 4);
}

TEST_CASE("Three.Concatenated.Pipelines.8L.5W" * doctest::timeout(300)) {
  three_concatenated_pipelines(8, 5);
}

TEST_CASE("Three.Concatenated.Pipelines.8L.6W" * doctest::timeout(300)) {
  three_concatenated_pipelines(8, 6);
}

TEST_CASE("Three.Concatenated.Pipelines.8L.7W" * doctest::timeout(300)) {
  three_concatenated_pipelines(8, 7);
}

TEST_CASE("Three.Concatenated.Pipelines.8L.8W" * doctest::timeout(300)) {
  three_concatenated_pipelines(8, 8);
}

// ----------------------------------------------------------------------------
// pipeline (SPSP) and conditional task.  pipeline has L lines, W workers
//
// O -> SPSP -> conditional_task
//        ^            |
//        |____________|
// ----------------------------------------------------------------------------

void looping_pipelines(size_t L, unsigned w) {

  tf::Executor executor(w);

  const size_t maxN = 100;

  std::vector<int> source(maxN);
  std::iota(source.begin(), source.end(), 0);
  std::vector<std::array<int, 4>> mybuffer(L);

  tf::Taskflow taskflow;
    
  size_t j1 = 0, j3 = 0;
  std::atomic<size_t> j2 = 0;
  std::atomic<size_t> j4 = 0;
  std::mutex mutex2;
  std::mutex mutex4;
  std::vector<int> collection2;
  std::vector<int> collection4;
  size_t cnt = 0;

  size_t N = 0;

  tf::Pipeline pl(L, 
    tf::Pipe{tf::PipeType::SERIAL, [&N, &source, &j1, &mybuffer, L](auto& pf) mutable {
      if(j1 == N) {
        pf.stop();
        return;
      }
      REQUIRE(j1 == source[j1]);
      REQUIRE(pf.token() % L == pf.line());
      mybuffer[pf.line()][pf.pipe()] = source[j1] + 1;
      j1++;
    }},

    tf::Pipe{tf::PipeType::PARALLEL, [&N, &j2, &mutex2, &collection2, &mybuffer, L](auto& pf) mutable {
      REQUIRE(j2++ < N);
      {
        std::scoped_lock<std::mutex> lock(mutex2);
        REQUIRE(pf.token() % L == pf.line());
        mybuffer[pf.line()][pf.pipe()] = mybuffer[pf.line()][pf.pipe() - 1] + 1;
        collection2.push_back(mybuffer[pf.line()][pf.pipe() - 1]);
      }
    }},

    tf::Pipe{tf::PipeType::SERIAL, [&N, &source, &j3, &mybuffer, L](auto& pf) mutable {
      REQUIRE(j3 < N);
      REQUIRE(pf.token() % L == pf.line());
      REQUIRE(source[j3] + 2 == mybuffer[pf.line()][pf.pipe() - 1]);
      mybuffer[pf.line()][pf.pipe()] = mybuffer[pf.line()][pf.pipe() - 1] + 1;
      j3++;
    }},

    tf::Pipe{tf::PipeType::PARALLEL, [&N, &j4, &mutex4, &collection4, &mybuffer, L](auto& pf) mutable {
      REQUIRE(j4++ < N);
      {
        std::scoped_lock<std::mutex> lock(mutex4);
        REQUIRE(pf.token() % L == pf.line());
        collection4.push_back(mybuffer[pf.line()][pf.pipe() - 1]);
      }
    }}
  );
  
  auto pipeline = taskflow.composed_of(pl).name("module_of_pipeline");
  auto initial = taskflow.emplace([](){}).name("initial");

  auto conditional = taskflow.emplace([&](){
    REQUIRE(j1 == N);
    REQUIRE(j2 == N);
    REQUIRE(j3 == N);
    REQUIRE(j4 == N);
    REQUIRE(collection2.size() == N);
    REQUIRE(collection4.size() == N);
    std::sort(collection2.begin(), collection2.end());
    std::sort(collection4.begin(), collection4.end());
    for (size_t i = 0; i < N; ++i) {
      REQUIRE(collection2[i] == i + 1);
      REQUIRE(collection4[i] == i + 3);
    }
    REQUIRE(pl.num_tokens() == cnt);
    
    // reset variables 
    j1 = j2 = j3 = j4 = 0;
    for(size_t i = 0; i < mybuffer.size(); ++i){
      for(size_t j = 0; j < mybuffer[0].size(); ++j){
        mybuffer[i][j] = 0;
      }
    }
    collection2.clear();
    collection4.clear();
    ++N;
    cnt+=N;
    
    return N < maxN ? 0 : 1;
  }).name("conditional");

  auto terminal = taskflow.emplace([](){}).name("terminal");

  initial.precede(pipeline);
  pipeline.precede(conditional);
  conditional.precede(pipeline, terminal);

  executor.run(taskflow).wait();
}

// looping piplines
TEST_CASE("Looping.Pipelines.1L.1W" * doctest::timeout(300)) {
  looping_pipelines(1, 1);
}

TEST_CASE("Looping.Pipelines.1L.2W" * doctest::timeout(300)) {
  looping_pipelines(1, 2);
}

TEST_CASE("Looping.Pipelines.1L.3W" * doctest::timeout(300)) {
  looping_pipelines(1, 3);
}

TEST_CASE("Looping.Pipelines.1L.4W" * doctest::timeout(300)) {
  looping_pipelines(1, 4);
}

TEST_CASE("Looping.Pipelines.1L.5W" * doctest::timeout(300)) {
  looping_pipelines(1, 5);
}

TEST_CASE("Looping.Pipelines.1L.6W" * doctest::timeout(300)) {
  looping_pipelines(1, 6);
}

TEST_CASE("Looping.Pipelines.1L.7W" * doctest::timeout(300)) {
  looping_pipelines(1, 7);
}

TEST_CASE("Looping.Pipelines.1L.8W" * doctest::timeout(300)) {
  looping_pipelines(1, 8);
}

TEST_CASE("Looping.Pipelines.2L.1W" * doctest::timeout(300)) {
  looping_pipelines(2, 1);
}

TEST_CASE("Looping.Pipelines.2L.2W" * doctest::timeout(300)) {
  looping_pipelines(2, 2);
}

TEST_CASE("Looping.Pipelines.2L.3W" * doctest::timeout(300)) {
  looping_pipelines(2, 3);
}

TEST_CASE("Looping.Pipelines.2L.4W" * doctest::timeout(300)) {
  looping_pipelines(2, 4);
}

TEST_CASE("Looping.Pipelines.2L.5W" * doctest::timeout(300)) {
  looping_pipelines(2, 5);
}

TEST_CASE("Looping.Pipelines.2L.6W" * doctest::timeout(300)) {
  looping_pipelines(2, 6);
}

TEST_CASE("Looping.Pipelines.2L.7W" * doctest::timeout(300)) {
  looping_pipelines(2, 7);
}

TEST_CASE("Looping.Pipelines.2L.8W" * doctest::timeout(300)) {
  looping_pipelines(2, 8);
}

TEST_CASE("Looping.Pipelines.3L.1W" * doctest::timeout(300)) {
  looping_pipelines(3, 1);
}

TEST_CASE("Looping.Pipelines.3L.2W" * doctest::timeout(300)) {
  looping_pipelines(3, 2);
}

TEST_CASE("Looping.Pipelines.3L.3W" * doctest::timeout(300)) {
  looping_pipelines(3, 3);
}

TEST_CASE("Looping.Pipelines.3L.4W" * doctest::timeout(300)) {
  looping_pipelines(3, 4);
}

TEST_CASE("Looping.Pipelines.3L.5W" * doctest::timeout(300)) {
  looping_pipelines(3, 5);
}

TEST_CASE("Looping.Pipelines.3L.6W" * doctest::timeout(300)) {
  looping_pipelines(3, 6);
}

TEST_CASE("Looping.Pipelines.3L.7W" * doctest::timeout(300)) {
  looping_pipelines(3, 7);
}

TEST_CASE("Looping.Pipelines.3L.8W" * doctest::timeout(300)) {
  looping_pipelines(3, 8);
}

TEST_CASE("Looping.Pipelines.4L.1W" * doctest::timeout(300)) {
  looping_pipelines(4, 1);
}

TEST_CASE("Looping.Pipelines.4L.2W" * doctest::timeout(300)) {
  looping_pipelines(4, 2);
}

TEST_CASE("Looping.Pipelines.4L.3W" * doctest::timeout(300)) {
  looping_pipelines(4, 3);
}

TEST_CASE("Looping.Pipelines.4L.4W" * doctest::timeout(300)) {
  looping_pipelines(4, 4);
}

TEST_CASE("Looping.Pipelines.4L.5W" * doctest::timeout(300)) {
  looping_pipelines(4, 5);
}

TEST_CASE("Looping.Pipelines.4L.6W" * doctest::timeout(300)) {
  looping_pipelines(4, 6);
}

TEST_CASE("Looping.Pipelines.4L.7W" * doctest::timeout(300)) {
  looping_pipelines(4, 7);
}

TEST_CASE("Looping.Pipelines.4L.8W" * doctest::timeout(300)) {
  looping_pipelines(4, 8);
}

TEST_CASE("Looping.Pipelines.5L.1W" * doctest::timeout(300)) {
  looping_pipelines(5, 1);
}

TEST_CASE("Looping.Pipelines.5L.2W" * doctest::timeout(300)) {
  looping_pipelines(5, 2);
}

TEST_CASE("Looping.Pipelines.5L.3W" * doctest::timeout(300)) {
  looping_pipelines(5, 3);
}

TEST_CASE("Looping.Pipelines.5L.4W" * doctest::timeout(300)) {
  looping_pipelines(5, 4);
}

TEST_CASE("Looping.Pipelines.5L.5W" * doctest::timeout(300)) {
  looping_pipelines(5, 5);
}

TEST_CASE("Looping.Pipelines.5L.6W" * doctest::timeout(300)) {
  looping_pipelines(5, 6);
}

TEST_CASE("Looping.Pipelines.5L.7W" * doctest::timeout(300)) {
  looping_pipelines(5, 7);
}

TEST_CASE("Looping.Pipelines.5L.8W" * doctest::timeout(300)) {
  looping_pipelines(5, 8);
}

TEST_CASE("Looping.Pipelines.6L.1W" * doctest::timeout(300)) {
  looping_pipelines(6, 1);
}

TEST_CASE("Looping.Pipelines.6L.2W" * doctest::timeout(300)) {
  looping_pipelines(6, 2);
}

TEST_CASE("Looping.Pipelines.6L.3W" * doctest::timeout(300)) {
  looping_pipelines(6, 3);
}

TEST_CASE("Looping.Pipelines.6L.4W" * doctest::timeout(300)) {
  looping_pipelines(6, 4);
}

TEST_CASE("Looping.Pipelines.6L.5W" * doctest::timeout(300)) {
  looping_pipelines(6, 5);
}

TEST_CASE("Looping.Pipelines.6L.6W" * doctest::timeout(300)) {
  looping_pipelines(6, 6);
}

TEST_CASE("Looping.Pipelines.6L.7W" * doctest::timeout(300)) {
  looping_pipelines(6, 7);
}

TEST_CASE("Looping.Pipelines.6L.8W" * doctest::timeout(300)) {
  looping_pipelines(6, 8);
}

TEST_CASE("Looping.Pipelines.7L.1W" * doctest::timeout(300)) {
  looping_pipelines(7, 1);
}

TEST_CASE("Looping.Pipelines.7L.2W" * doctest::timeout(300)) {
  looping_pipelines(7, 2);
}

TEST_CASE("Looping.Pipelines.7L.3W" * doctest::timeout(300)) {
  looping_pipelines(7, 3);
}

TEST_CASE("Looping.Pipelines.7L.4W" * doctest::timeout(300)) {
  looping_pipelines(7, 4);
}

TEST_CASE("Looping.Pipelines.7L.5W" * doctest::timeout(300)) {
  looping_pipelines(7, 5);
}

TEST_CASE("Looping.Pipelines.7L.6W" * doctest::timeout(300)) {
  looping_pipelines(7, 6);
}

TEST_CASE("Looping.Pipelines.7L.7W" * doctest::timeout(300)) {
  looping_pipelines(7, 7);
}

TEST_CASE("Looping.Pipelines.7L.8W" * doctest::timeout(300)) {
  looping_pipelines(7, 8);
}

TEST_CASE("Looping.Pipelines.8L.1W" * doctest::timeout(300)) {
  looping_pipelines(8, 1);
}

TEST_CASE("Looping.Pipelines.8L.2W" * doctest::timeout(300)) {
  looping_pipelines(8, 2);
}

TEST_CASE("Looping.Pipelines.8L.3W" * doctest::timeout(300)) {
  looping_pipelines(8, 3);
}

TEST_CASE("Looping.Pipelines.8L.4W" * doctest::timeout(300)) {
  looping_pipelines(8, 4);
}

TEST_CASE("Looping.Pipelines.8L.5W" * doctest::timeout(300)) {
  looping_pipelines(8, 5);
}

TEST_CASE("Looping.Pipelines.8L.6W" * doctest::timeout(300)) {
  looping_pipelines(8, 6);
}

TEST_CASE("Looping.Pipelines.8L.7W" * doctest::timeout(300)) {
  looping_pipelines(8, 7);
}

TEST_CASE("Looping.Pipelines.8L.8W" * doctest::timeout(300)) {
  looping_pipelines(8, 8);
}
