33 April 10, 2007
CPU “
0.01
0.1
1
10
100
1000
10000
100000
1000000
10000000
1970 1980 1990 2000 2010 2020
MIP
S
Pentium® 4
Pentium®
486386
2868086
Pentium® Pro
44 April 10, 2007
65nm65nm
20062006
30nm30nm 20nm20nm
45nm45nm
20082008
32nm32nm20102010
15nm15nm
22nm22nm20122012
10nm10nm
IntelIntel
3216842TR
2232456590nm
…
2
66 April 10, 2007
i486
0
5
10
15
20
25
30
0 2 4 6 8
= ^ 1.74
i486 Pentium
Pentium Pro
Pentium 4 (Wmt)
Pentium 4 (Psc)
77 April 10, 2007
0
2
4
6
8
10
1 2 3 4 5 6 7 8 9 10
0
2
4
6
8
10
1 2 3 4 5 6 7 8 9 10( )
0
2
4
6
8
10
1 2 3 4 5 6 7 8 9 10
88 April 10, 2007
0
5
10
15
20
25
30
0 2 4 6 8
= ^ 1.74
Pentium M
i486 Pentium
Pentium Pro
Pentium 4 (Wmt)
Pentium 4 (Psc)
CPU
1212 April 10, 2007
80
4.27GHz
* 1.37 SP TFLOPS
PDE
* 1 SP TFLOPS
* 0.51 SP TFLOPS
* 2 FMAC
–
2 FLOPS
Source: A 80-tile 1.28 TFLOP Network-on-Chip in 65 nm CMOS, ISSCC’07, Sriram Vangal, Jason Howard, Gregory Ruhl, Saurabh Dighe, Howard Wilson, James tschanz, David Finan, Priya Iyer, Arvind Singh, Riju Jacob, Shailendra Jain, Sriram venkataraman, Yatin Hoskote and Nitin Borkar.
1414 April 10, 2007
Program SPMD_Emb_Par ()
{
TYPE *tmp, *func();
global_array Data(TYPE);
global_array Res(TYPE);
int N = get_num_procs();
int id = get_proc_id();
if (id==0) setup_problem(N,DATA);
for (int I= 0; I<N;I=I+Num){
tmp = func(I);
Res.accumulate( tmp);
}
}
Program SPMD_Emb_Par ()
{
TYPE *tmp, *func();
global_array Data(TYPE);
global_array Res(TYPE);
int N = get_num_procs();
int id = get_proc_id();
if (id==0) setup_problem(N,DATA);
for (int I= 0; I<N;I=I+Num){
tmp = func(I);
Res.accumulate( tmp);
}
}
Program SPMD_Emb_Par ()
{
TYPE *tmp, *func();
global_array Data(TYPE);
global_array Res(TYPE);
int N = get_num_procs();
int id = get_proc_id();
if (id==0) setup_problem(N,DATA);
for (int I= 0; I<N;I=I+Num){
tmp = func(I);
Res.accumulate( tmp);
}
}
Program SPMD_Emb_Par ()
{
TYPE *tmp, *func();
global_array Data(TYPE);
global_array Res(TYPE);
int Num = get_num_procs();
int id = get_proc_id();
if (id==0) setup_problem(N, Data);
for (int I= ID; I<N;I=I+Num){
tmp = func(I, Data);
Res.accumulate( tmp);
}
}
+
2020 April 10, 2007
Joe Wolf® C++ Fortran
10.0 June 19
Vasanth Tovinkere3
–June 5
Victoria Gromova3
– ®May 15
Dr. David Mackay3
–May 1
Gary CarletonCPU Windows Vista*
April 17
Dr. Tim MattsonApril 3
2525 April 10, 2007
®
C++
•
•
•
•
• 32 64
• Windows* Linux* Mac OS X* Microsoft* GNU*
Maya
3D
®
Gerry Hawkins Maya Autodesk
3333 April 10, 2007
Thread Setup and InitializationCRITICAL_SECTION MyMutex, MyMutex2, MyMutex3;int get_num_cpus (void) {
SYSTEM_INFO si;GetSystemInfo(&si);return (int)si.dwNumberOfProcessors;}
int nthreads = get_num_cpus ();HANDLE *threads = (HANDLE *) alloca (nthreads * sizeof (HANDLE));InitializeCriticalSection (&MyMutex);InitializeCriticalSection (&MyMutex2);InitializeCriticalSection (&MyMutex3);for (int i = 0; i < nthreads; i++) {
DWORD id;&threads[i] = CreateThread (NULL, 0, parallel_thread, i, 0, &id);}
for (int i = 0; i < nthreads; i++) {WaitForSingleObject (&threads[i], INFINITE);
}
Parallel Task Scheduling and Executionconst int MINPATCH = 150;const int DIVFACTOR = 2;typedef struct work_queue_entry_s {
patch pch;struct work_queue_entry_s *next;
} work_queue_entry_t;work_queue_entry_t *work_queue_head = NULL;work_queue_entry_t *work_queue_tail = NULL;void generate_work (patch* pchin){ int startx, stopx, starty, stopy;
int xs,ys;startx=pchin->startx; stopx= pchin->stopx;starty=pchin->starty; stopy= pchin->stopy;if(((stopx-startx) >= MINPATCH) || ((stopy-starty) >= MINPATCH)) {
int xpatchsize = (stopx-startx)/DIVFACTOR + 1;int ypatchsize = (stopy-starty)/DIVFACTOR + 1;for (ys=starty; ys<=stopy; ys+=ypatchsize)for (xs=startx; xs<=stopx; xs+=xpatchsize) {
patch pch;pch.startx = xs;pch.starty = ys;pch.stopx = MIN(xs+xpatchsize-1,stopx);pch.stopy = MIN(ys+ypatchsize-1,stopy);generate_work (&pch);}
} else {/* just trace this patch */work_queue_entry_t *q = (work_queue_entry_t *) malloc (sizeof
(work_queue_entry_t));q->pch.starty = starty; q->pch.stopy = stopy;q->pch.startx = startx; q->pch.stopx = stopx;q->next = NULL;
Thread Setup and Initialization#include "tbb/task_scheduler_init.h" #include "tbb/spin_mutex.h"tbb::task_scheduler_init init;tbb::spin_mutex MyMutex, MyMutex2;
Parallel Task Scheduling and Execution#include "tbb/parallel_for.h"#include "tbb/blocked_range2d.h"class parallel_task {public:
void operator() (const tbb::blocked_range2d<int> &r) const {for (int y = r.rows().begin(); y != r.rows().end(); ++y) {
for (int x = r.cols().begin(); x != r.cols().end(); x++) {render_one_pixel (x, y);
}} if (scene.displaymode == RT_DISPLAY_ENABLED) {
tbb::spin_mutex::scoped_lock lock (MyMutex2);for (int y = r.rows().begin(); y != r.rows().end(); ++y) {
GraphicsDrawRow(startx-1, y-1, totalx, (unsigned char *) &global_buffer[(y-starty)*totalx*3]);
}}
}parallel_task () {}
};parallel_for (tbb::blocked_range2d<int> (starty, stopy + 1, grain_size, startx, stopx + 1, grain_size), parallel_task ());
Windows
®
API Windows*
Linux* Mac OS*
2D
(Tacheon)
if (work_queue_head == NULL) {work_queue_head = q;
} else {work_queue_tail->next = q;
}work_queue_tail = q;
}}void generate_worklist (void){
patch pch;pch.startx = startx;pch.stopx = stopx;pch.starty = starty;pch.stopy = stopy;generate_work (&pch);
}bool schedule_thread_work (patch &pch){
EnterCriticalSection (&MyMutex3);work_queue_entry_t *q = work_queue_head;if (q != NULL) {
pch = q->pch;work_queue_head = work_queue_head->next;
}LeaveCriticalSection (&MyMutex3);return (q != NULL);
}generate_worklist ();
void parallel_thread (void *arg){
patch pch;while (schedule_thread_work (pch)) {
for (int y = pch.starty; y <= pch.stopy; y++) {for (int x=pch.startx; x<=pch.stopx; x++) {
render_one_pixel (x, y);}} if (scene.displaymode == RT_DISPLAY_ENABLED) {
EnterCriticalSection (&MyMutex3);for (int y = pch.starty; y <= pch.stopy; y++) {
GraphicsDrawRow(pch.startx-1, y-1, pch.stopx-pch.startx+1, (unsigned char *) &global_buffer[((y-starty)*totalx+(pch.startx-startx))*3]);
}LeaveCriticalSection (&MyMutex3);
}}
}
This example includes softwaredeveloped by John E. Stone.
®
3535 April 10, 2007
intel.com/software/mcdeveloper go-parallel.com
Joe Wolf® C++ Fortran
10.0June 19
Vasanth Tovinkere3
–June 5
Victoria Gromova3
– ®May 15
Dr. David Mackay3
–May 1
Gary CarletonCPU Windows Vista*
April 17
Dr. Tim MattsonApril 3
intel.com/software/products
3 … …
http://on24.com/event/36/88/3/rt/1/?eventid=36883
3737 April 10, 2007
intel.com/software/mcdevelopergo-parallel.com
intel.com/software/products
3 … …
http://on24.com/event/36/88/3/rt/1/?eventid=36883
Joe Wolf® C++ Fortran
10.0June 19
Vasanth Tovinkere
3–June 5
Victoria Gromova3
– ®May 15
Dr. David Mackay
3–
May 1
Gary CarletonCPU Windows Vista*
April 17
Dr. Tim MattsonApril 3