1
Machine-Level
Representation
of Programs (x86-64)
2
Outline
• x86-64 Machine-Level Programming– Procedures and Stack– Alignment– Byte Ordering
• Suggested reading– Chap 3.13
IA32
Example: swap()
void swap(int *xp, int *yp){
int t0 = *xp;int t1 = *yp;*xp = t1;*yp = t0;
}
swap:pushl %ebp
movl %esp,%ebp
pushl %ebx
movl 12(%ebp),%ecx
movl 8(%ebp),%edx
movl (%ecx),%eax
movl (%edx),%ebx
movl %eax,(%edx)
movl %ebx,(%ecx)
movl -4(%ebp),%ebx
movl %ebp,%esp
popl %ebp
ret
Setup
Body
Finish
x86-64
Example: swap()
void swap(int *xp, int *yp){
int t0 = *xp;int t1 = *yp;*xp = t1;*yp = t0;
}
swap:movl (%rdi), %edx
movl (%rsi), %eax
movl %eax, (%rdi)
movl %edx, (%rsi)
retq
Operands passed in registersFirst (xp) in %rdiSecond (yp) in %rsi64-bit pointers
No stack operation required
32-bit dataData held in register %eax and %edxmovl operation
Example: swap()
swap:movl (%rdi), %edx
movl (%rsi), %eax
movl %eax, (%rdi)
movl %edx, (%rsi)
retq
Swap long int in 64-bit
Example: swap()
void swap(long int *xp, long int *yp){
long int t0 = *xp;long int t1 = *yp;*xp = t1;*yp = t0;
}
swap:movq (%rdi), %rdx
movq (%rsi), %rax
movq %rax, (%rdi)
movq %rdx, (%rsi)
retq
64-bit dataData held in registers
%rax and %rdxmovq operation“q” stands for quar-
word
• IA32/Linux Stack Frame
– Caller Stack Frame• Arguments for this call• Return Address (pushed by “call”)
– Callee Stack Frame• Old %ebp (saved by “push %ebp”)• Saved registers• Local variable s• Arguments for next call
7
Procedures - Stack
Ret Addr
Arguments
Old %ebp
Saved
registers
Local
variables
Arguments
%ebp
%esp
frame pointer
stack pointer
• IA32/Linux Register Usage– %eax, %edx, %ecx
• Caller saves prior the call if values are used by later
– %eax• Return integer value
– %ebx, %esi, %edi• Callee saves if want to
used them
– %esp, %ebp• special 8
Procedures - Register
%eax
%edx
%ecx
%ebx
%esi
%edi
%esp
%ebp
Caller-SaveCaller-Save
Callee-Save Callee-Save
SpecialSpecial
%rbp
%rsp
%rdi
%rsi
%rdx
%rcx
%rbx
%rax
%r15
%r14
%r13
%r12
%r11
%r10
%r9
%r8
• X86-64/Linux Register Usage– Caller-Save
• %rax %rcx %rdx %rsi %rdi %r8 %r9
– Callee-Save• %rbx %rbp %r10• %r12 %r13 %r14 %r15
– Special• %rsp, %r11
9
Procedures - Register
%rbp
%rsp
%rdi
%rsi
%rdx
%rcx
%rbx
%rax
%r15
%r14
%r13
%r12
%r11
%r10
%r9
%r8
• X86-64/Linux Register Usage– Arguments passed via regs
• %rcx %rdx %rsi %rdi %r8 %r9
• If more than 6 integer parameters, then passrest on stack
– Return value by %rax– No frame pointer– Special
• %rsp stack pointer• %r11 used for linking
10
Procedures - Register
%rbp
%rsp
%rdi
%rsi
%rdx
%rcx
%rbx
%rax
%r15
%r14
%r13
%r12
%r11
%r10
%r9
%r8
%rdi arg#1
%rsi arg#2
%rdx arg#3
%rcx arg#4
%r9 arg#6
%r8 arg#5%rax ret
%rsp stack
%r11 link
• x86-64/Linux Stack Frame
– Caller Stack Frame• Arguments passed via registers• Return Address (pushed by “call”)
– Callee Stack Frame• Saved registers• Local variables
11
Procedures - Stack
Ret Addr
Saved
registers
Local
variables%rspstack pointer
12
X86-64 Swap
• Operands passed in registers– First (xp) in %rdi, second (yp) in %rsi
• No stack operations required (except ret)• Avoid stack
– Can hold all local information in registers
void swap(long *xp, long *yp){
long t0 = *xp;long t1 = *yp;*xp = t1;*yp = t0;
}
swap:movq (%rdi), %rdx
movq (%rsi), %rax
movq %rax, (%rdi)
movq %rdx, (%rsi)
ret
Local Variables in Stack
• Avoid Stack Pointer change– Can hold all information within small
windows beyond stack pointer
void swap_a(long *xp, long *yp){ volatile long loc[2];
loc[0] = *xp;loc[1] = *yp;*xp = loc[1];*yp = loc[0];
}
swap_a:movq (%rdi), %rax
movq %rax, -24(%rsp)
movq (%rsi), %rax
movq %rax, -16(%rsp)
movq -16(%rsp), %rax
movq %rax, (%rdi)
movq -24(%rsp), %rax
movq %rax, (%rsi)
retret ptr
unused
loc[1]
loc[0]
%rsp
-8
-16
-24
Without Stack Frame
• No value held while swap being invoked
• No callee save registers needed
long scount = 0
void swap_b(long a[], int i){ swap(&a[i], &a[i+1]); scount++}
swap_b:movslq %esi,%rsi # sign extend
leaq (%rdi,%rsi,8), %rdi # &a[i]
leaq 8(%rdi,%rsi,8),%rsi # &a[i+1]
call swap # swap()
incq scount(%rip) # scount++;
ret
ret ptr1
ret ptr2
. . .
%rsp
execute in swap
14
Call using Jump
• Directly return from swap
• Possible since swap is a “tail call “
long scount = 0
void swap_c(long a[], int i){ swap(&a[i], &a[i+1]);}
swap_c:movslq %esi,%rsi # Sign extend
leaq (%rdi,%rsi,8), %rdi # &a[i]
leaq 8(%rdi, rsi,8), %rsi# &a[i+1]
jmp swap # swap()
ret ptr1
. . .
%rsp
execute in swap
15
Stack Frame Example
• Keep values of a and i in callee save registers
• Must set up stack frame to save these registers
long sum = 0void swap_d(long a[], int i){
swap(a[i], a[i+1]);sum += a[i];
}
swap_d:movq %rbx, -16(%rsp)
movslq %esi,%rbx
movq %r12, -8(%rsp)
movq %rdi, %r12
leaq (%rdi,%rbx,8), %rdi
subq $16, %rsp
leaq 8(%rdi), %rsi
call swap
movq (%r12,%rbx,8), %rax
addq %rax, sum(%rip)
movq (%rsp), %rbx
movq 8(%rsp), %r12
addq $16, %rsp
ret
Understanding x86-64 Stack Frame
swap_d:movq %rbx, -16(%rsp)
movslq %esi,%rbx
movq %r12, -8(%rsp)
movq %rdi, %r12
leaq (%rdi,%rbx,8), %rdi
subq $16, %rsp
leaq 8(%rdi), %rsi
. . .
addq %rax, sum(%rip)
movq (%rsp), %rbx
movq 8(%rsp), %r12
addq $16, %rsp
ret
ret ptr
%r12
%rbx
%rsp
-8
-16
# save %rbx
# save %r12
17
Understanding x86-64 Stack Frame
swap_d:movq %rbx, -16(%rsp)
movslq %esi,%rbx
movq %r12, -8(%rsp)
movq %rdi, %r12
leaq (%rdi,%rbx,8), %rdi
subq $16, %rsp
leaq 8(%rdi), %rsi
. . .
addq %rax, sum(%rip)
movq (%rsp), %rbx
movq 8(%rsp), %r12
addq $16, %rsp
ret
ret ptr
%r12
%rbx %rsp
+8# save %rbx
# save %r12
# move stack frame
# restore %rbx
# restore %r12
18
Understanding x86-64 Stack Frame
swap_d:movq %rbx, -16(%rsp)
movslq %esi,%rbx
movq %r12, -8(%rsp)
movq %rdi, %r12
leaq (%rdi,%rbx,8), %rdi
subq $16, %rsp
leaq 8(%rdi), %rsi
. . .
addq %rax, sum(%rip)
movq (%rsp), %rbx
movq 8(%rsp), %r12
addq $16, %rsp
ret
ret ptr
%r12
%rbx
%rsp
-8# save %rbx
# save %r12
# move stack frame
# restore %rbx
# restore %r12
# move stack frame
-16
19
20
Features of Stack Frame
• Allocate entire frame at once– All stack accesses can be relative to %rsp– Do by decrementing stack pointer– Can delay allocation
• Simple deallocation– Increment stack pointer– No base/frame pointer needed
21
Alignment
BytesIA32 x86-64
Type Alignment Type Alignment
11 charchar NoNo charchar NoNo
22 shortshort 0022 shortshort 0022
44int, float, int, float, pointerpointer
000022int, int, floatfloat
000022
88 doubledouble00000022(Win)(Win)000022(Lin)(Lin)
double, double, pointerpointer
00000022
12/1612/16 long doublelong double 000022 long doublelong double 0000000022
Example
• IA32 Linux– K = 4; double treated like a 4-byte data type
X86-64 or IA32 Windows:– K = 8; due to double element
struct s1 {char c;int i[2];double d;
} *p;
CC 3bytes3bytes i[0]i[0] i[1]i[1] 4bytes4bytes dd
p+0p+0 p+4p+4 p+8p+8 p+16p+16 p+24p+24
CC 3bytes3bytes i[0]i[0] i[1]i[1] dd
p+0p+0 p+4p+4 p+8p+8 p+12p+12 p+20p+2022
23
Byte Ordering
IA32 (Little Endian)
Output on IA32 Characters 0-7 = [0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7]
Shorts 0-3 = [0xf1f0,0xf3f2,0xf5f4,0xf7f6]
Ints 0-1 = [0xf3f2f1f0,0xf7f6f5f4]
Long 0 = [0xf3f2f1f0]
C[0]C[0]
S[0]S[0]
I[0]I[0] I[1]I[1]
S[1]S[1] S[2]S[2] S[3]S[3]
C[1]C[1] C[2]C[2] C[3]C[3] C[4]C[4] C[5]C[5] C[6]C[6] C[7]C[7]
0xf0 0xf1 0xf2 0xf3 0xf4 0xf5 0xf6 0xf7
LSB MSB
LSB MSBLSBMSB
L[0]L[0]
24
Byte Ordering
X86-64 (Little Endian)
Output on x86-64 Characters 0-7 = [0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7]
Shorts 0-3 = [0xf1f0,0xf3f2,0xf5f4,0xf7f6]
Ints 0-1 = [0xf3f2f1f0,0xf7f6f5f4]
Long 0 = [0xf7f6f5f4f3f2f1f0]
C[0]C[0]
S[0]S[0]
I[0]I[0]
L[0]L[0]
I[1]I[1]
S[1]S[1] S[2]S[2] S[3]S[3]
C[1]C[1] C[2]C[2] C[3]C[3] C[4]C[4] C[5]C[5] C[6]C[6] C[7]C[7]
0xf0 0xf1 0xf2 0xf3 0xf4 0xf5 0xf6 0xf7
LSB MSB
LSB MSBLSBMSB