//Description: openmp
//Create Date: 2021-05-15 12:31:38
//Author: channy
Visual C++ supports the OpenMP 2.0 standard.
Qt:
QMAKE_CXXFLAGS += -fopenmp
LIBS += -fopenmp
用在一个代码段之前,表示这段代码将被多个线程并行执行
#pragma omp parallel
{
std::cout << omp_get_thread_num();
}
//output
//1320
用于for循环之前,将循环分配到多个线程中并行执行,必须保证每次循环之间无相关性
#pragma omp for
for (int i = 0; i < 20; i++) {
std::cout << i << " " << omp_get_thread_num() << std::endl;
}
//invalid
int a[5], i;
#pragma omp parallel
{
// Perform some computation.
#pragma omp for
for (i = 0; i < 5; i++)
a[i] = i * i;
// Print intermediate results.
#pragma omp master
for (i = 0; i < 5; i++)
printf_s("a[%d] = %d\n", i, a[i]);
// Wait.
#pragma omp barrier
// Continue with the computation.
#pragma omp for
for (i = 0; i < 5; i++)
a[i] += i;
}
也是用在一个for循环之前,表示当前层for循环的代码将被多个线程并行执行。
#pragma omp parallel for
for (int i = 0; i < 10; i++) {
std::cout << i << " " << omp_get_thread_num() <<std::endl;
}
多层for循环
int sum = 0;
#pragma omp parallel for collapse(2)
for (int i = 0; i < 10; i++) {
for (int j = 0; j < 11; j++) {
sum += i * j;
#pragma omp critical
{
std::cout << i << " " << j << " " << omp_get_thread_num() << std::endl;
}
}
}
其中sum是共享的,采用reduction之后,每个线程根据reduction(+: sum)的声明算出自己的sum,然后再将每个线程的sum加起来。
int sum = 0;
#pragma omp parallel for num_threads(4) reduction(+:sum)
for (int i = 0; i < 100; i++) {
sum += i;
}
std::cout << sum << std::endl;
用在一段代码临界区之前。表示同一时间内该段代码只被单个线程执行
只被单个线程执行一次
int a = 0, b = 0;
#pragma omp parallel
{
#pragma omp single
a++;
#pragma omp critical
b++;
}
std::cout << a << " " << b << std::endl;
原子操作,多用于++,–,etc,仅对一句有效
# pragma omp atomic [read | write | update | capture]
用于指定一段代码块由主线程执行
用在可能会被并行执行的代码段之前
parallel和sections两个语句的结合
同步
用于并行区内代码的线程同步,所有线程执行到barrier时要停止,直到所有线程都执行到barrier时才继续往下执行。
int sum = 0;
#pragma omp parallel num_threads(10)
{
#pragma omp atomic
sum += 10;
#pragma omp barrier
std::cout << sum << std::endl;
}
用于指定并行区域的循环按顺序执行
用于指定一个变量是线程私有的。
int tmp = 0;
#pragma omp parallel for private(tmp)
for (int i = 0; i < 10; i++) {
tmp += i;
printf("%d %d %d\n", i, tmp, omp_get_thread_num());
}
std::cout << "------------------" << tmp << std::endl;
#pragma omp parallel for firstprivate(tmp)
for (int i = 0; i < 10; i++) {
tmp += i;
printf("%d %d %d\n", i, tmp, omp_get_thread_num());
}
std::cout << "------------------" << tmp << std::endl;
#pragma omp parallel for lastprivate(tmp)
for (int i = 0; i < 10; i++) {
tmp += i;
printf("%d %d %d\n", i, tmp, omp_get_thread_num());
}
std::cout << "------------------" << tmp << std::endl;
int value[10] = {0};
omp_lock_t lock[10];
#pragma omp parallel for
for (int i = 0; i < 10; i++) {
omp_init_lock(&lock[i]);
}
#pragma omp parallel for
for (int i = 0; i < 10; i++) {
int index = rand() % 10;
omp_set_lock(&lock[index]);
value[index]++;
omp_unset_lock(&lock[index]);
}
#pragma omp parallel for
for (int i = 0; i < 10; i++) {
omp_destroy_lock(&lock[i]);
}
for (int i = 0; i < 10; i++) {
std::cout << value[i] << " ";
}
for循环结束后默认带barrier
#pragma omp parallel
{
#pragma omp for nowait
for (int i = 0; i < 10; i++) {
printf("+");
}
#pragma omp for
for (int i = 0; i < 10; i++) {
printf("-");
}
for (int i = 0; i < 10; i++) {
printf("*");
}
}
for和sections指令的”缺陷“:无法根据运行时的环境动态的进行任务划分,必须是预先能知道的任务划分的情况。
task主要适用于不规则的循环迭代(如下面的循环)和递归的函数调用
void mytask(int& a) {
a++;
}
int main(int argc, char *argv[])
{
int N = 10;
int a[N];
for (int i = 0; i < N; i++) {
a[i] = 1;
}
#pragma omp parallel num_threads(2)
{
#pragma omp single
{
for (int i = 0; i < N; i = i + a[i])
{
#pragma omp task
mytask(a[i]);
}
}
}
for (int i = 0; i < N; i++) {
std::cout << a[i] << std::endl;
}
return 0;
}
向量化