diff --git a/lab_2/12/Makefile b/lab_2/12/Makefile index 0ef13b8..6040f26 100644 --- a/lab_2/12/Makefile +++ b/lab_2/12/Makefile @@ -16,3 +16,6 @@ clear: clean zip: clean zip $(folder) Makefile *.cpp *.h + +profile: + g++ -pg *.cpp -o $@ $^ \ No newline at end of file diff --git a/lab_2/12/chain-random-walk.cpp b/lab_2/12/chain-random-walk.cpp index 780b048..4d92e35 100644 --- a/lab_2/12/chain-random-walk.cpp +++ b/lab_2/12/chain-random-walk.cpp @@ -19,7 +19,9 @@ namespace { { double sum = 0.0; for(long i = 0; i < size; i++) sum += config[i]; - for(long i = 0; i < size; i++) config[i] -= sum/size; + + const float ratio = sum/size; + for(long i = 0; i < size; i++) config[i] -= ratio; } const int el_out_max = 9; // show at most the first nine elements @@ -37,6 +39,7 @@ float crw::elongation(long size, float config[]) return max - min; } + float* crw::step(long size, float previous[]) { // allocate the next configuration @@ -44,8 +47,7 @@ float* crw::step(long size, float previous[]) // first, let the chain contract: each element is attracted by its neighbours for(long i = 0; i < size; i++) - config[i] = 0.5*previous[i] + 0.25*previous[(i-1) % size] - + 0.25*previous[(i+1) % size]; + config[i] = 0.5*previous[i] + 0.25*previous[(i-1) % size]*2; stochastic_unit_step(size, config); // actual random walk step shift_centre_to_origin(size, config); // shift such that the average remains zero diff --git a/lab_2/12/const_Nm_timings.csv b/lab_2/12/const_Nm_timings.csv new file mode 100644 index 0000000..2e7871a --- /dev/null +++ b/lab_2/12/const_Nm_timings.csv @@ -0,0 +1,10 @@ +1024, 36.21 +512, 36.88 +256, 43.77 +128, 40.27 +64, 40.47 +32, 43.17 +16, 47.71 +8, 50.55 +4, 49.56 +2, 54.86 diff --git a/lab_2/12/const_m_timings.csv b/lab_2/12/const_m_timings.csv new file mode 100644 index 0000000..7b2583e --- /dev/null +++ b/lab_2/12/const_m_timings.csv @@ -0,0 +1,10 @@ +1024, 36.24 +512, 18.53 +256, 9.72 +128, 4.94 +64, 2.56 +32, 1.36 +16, 0.75 +8, 0.40 +4, 0.19 +2, 0.11 \ No newline at end of file diff --git a/lab_2/12/memleak.cpp b/lab_2/12/memleak.cpp index 5833225..08c9583 100644 --- a/lab_2/12/memleak.cpp +++ b/lab_2/12/memleak.cpp @@ -49,7 +49,7 @@ int main(int argc, char** argv) // Simple fix to memleak: float* old_configuration = present_configuration; present_configuration = crw::step(size, present_configuration); - delete old_configuration; + delete[] old_configuration; float present_elongation = crw::elongation(size, present_configuration); if(present_elongation > extreme_elongation) diff --git a/lab_2/12/plot_speedup.py b/lab_2/12/plot_speedup.py new file mode 100644 index 0000000..c3dd99b --- /dev/null +++ b/lab_2/12/plot_speedup.py @@ -0,0 +1,38 @@ +import matplotlib.pyplot as plt +import matplotlib.cbook as cbook + +import numpy as np +import pandas as pd + +f1 = open('const_m_timings.csv', 'r') +f2 = open('../11/const_m_timings.csv', 'r') +x1 = [] +y1 = [] +for l,p in zip(f1.readlines(), f2.readlines()): + l = l.split(',') + p = p.split(',') + x1.append(p[0]) + y1.append(float(p[1])/float(l[1])) + +f1 = open('const_Nm_timings.csv', 'r') +f2 = open('../11/const_Nm_timings.csv', 'r') +x2 = [] +y2 = [] +for l,p in zip(f1.readlines(), f2.readlines()): + l = l.split(',') + p = p.split(',') + x2.append(p[0]) + y2.append(float(p[1])/float(l[1])) + +plt.plot(x1, y1, label = 'constant m') +plt.plot(x2, y2, label = 'constant Nm') +for i in range(len(x1)): + plt.text(i,y1[i],f'{y1[i]:.2f}', color="blue") +for i in range(len(x2)): + plt.text(i,y2[i],f'{y2[i]:.2f}', color="orange") +plt.title('Speedup after optimization') +plt.xlabel('N') +plt.ylabel('xspeedup') +plt.grid() +plt.legend() +plt.show() diff --git a/lab_2/12_answer.md b/lab_2/12_answer.md new file mode 100644 index 0000000..3b3a23e --- /dev/null +++ b/lab_2/12_answer.md @@ -0,0 +1,24 @@ +# What did work: +## Removing a double calculation when step +I changed `config[i] = 0.5*previous[i] + 0.25*previous[(i-1) % size]+ 0.25*previous[(i+1) % size];` to `config[i] = 0.5*previous[i] + 0.25*previous[(i-1) % size]*2;` in step +This made all the improvement as far as i can tell + + +## In shift_centre_to_origin compute the ratio before the for loop +``` +const float ratio = sum/size; +for(long i = 0; i < size; i++) config[i] -= ratio; +``` +This did not make a noticable difference + +# What did not work and actually made it slower: +- Trying to find a fancy way to get a random bool in stochastic_unit_step +- Using the std::minmax_element function in elongation +- Changing the step function to be a void that changed the array directly made it about 8 secounds slower + +# Compiler optimizations +I treid these flags: -Ofast -march=native -flto -fno-signed-zeros -fno-trapping-math +The improvements were in the margin of error + +# Speedup +I got at best a 1.1 times speedup. See 12_speedup.png \ No newline at end of file diff --git a/lab_2/12_speedup.png b/lab_2/12_speedup.png new file mode 100644 index 0000000..e52cc27 Binary files /dev/null and b/lab_2/12_speedup.png differ