Lecture 8

(continuing with multithreading program from last time - finding max of large array)

#include <thread>
#include <iostream>
#include <vector>
#include <limits>
#include <chrono>
#include <algorithm>

constexpr int VECTOR_SIZE = 10'000'000;
constexpr int NUM_THREADS = 4;

void FindMax(const std::vector<int>& nums, int from, int upto, int& result) {
    int best = std::numeric_limits<int>::min();
    for (int i = from; i < upto; i++) {
        if (nums[i] > best) {
            best = nums[i];
        }
    }
    result = best;
}

void main() {
    std::vector<int> nums(VECTOR_SIZE);

    // pretend this is unsorted
    for (int i = 0; i < VECTOR_SIZE; i++)
        nums[i] = i + 1;

    // linear scan
    std::chrono::steady_clock::time_point start, finish;

    int result = 0;
    start = std::chrono::steady_clock::now();
    FindMax(nums, 0, VECTOR_SIZE, result);
    finish = std::chrono::steady_clock::now();
    
    std::cout << result << std::endl;

    std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(
        finish - start
    ).count() << std::endl;

    std::vector<std::thread> threads;

    int threadPortion = VECTOR_SIZE / NUM_THREADS;
    std::vector<int> results(NUM_THREADS, 0);

    start = std::chrono::steady_clock::now();
    for (int i = 0; i < NUM_THREADS; i++) {
        // there is a problem here - fourth parameter
        // is integer reference variable - results[i]
        // thread library does not automatically convert
        // variables to references, because it is bug-prone
        // so, we need to explicitly treat the variable
        // as a reference ourselves
        threads.emplace_back(
            FindMax, nums, threadPortion * i,
            threadPortion * (i + 1), std::ref(results[i])
        );
    }

    // need to wait for other threads to finish their work
    for (int i = 0; i < NUM_THREADS; i++) {
        // block until threads[i] is finished
        threads[i].join();
    }

    // if we just do this immediately, threads may not be done yet,
    // so we had to wait above using join function
    result = *std::max_element(results.begin(), results.end());
    finish = std::chrono::steady_clock::now();

    std::cout << result << std::endl;

    std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(
        finish - start
    ).count() << std::endl;

    return 0;
}

reminder - threads can be moved, but not copied.

notes on above program:

on our computers, this program is actually slower with multithreading:

other thing that may affect program performance: debug vs. release build

note - for home project, don't just reproduce code from class 1:1 - but can use as a reference

consideration: what if we replace the best variable definition with just directly modifying result?

// create `best` var
int best = std::numeric_limits<int>::min();
// use `result` inplace
result = std::numeric_limits<int>::min();