<- previous index next ->
See www.csee.umbc.edu/help/nasm/nasm_64.shtml for notes on using debugger.
A program that prints where its sections are allocated
(in virtual memory) is where_64.asm
My output, yours should be different, is
where_64.out
; where_64.asm print addresses of sections
; Assemble: nasm -g -f elf64 -l where_64.lst where_64.asm
; Link: gcc -g3 -m64 -o where_64 where_64.o
; Run: ./where_64 > where_64.out
; Output: you need to run it, on my computer
; data a: at 601034
; bss b: at 60108C
; rodata c: at 400640
; code main: at 400530
;
; to debug, typically after segfault
; gdb where_64
; run
; break main
; disassemble main
; backtrace
; hopefully this will point to where the problem is in source
extern printf ; the C function, to be called
section .data ; Data section, initialized variables
a: db 0,1,2,3,4,5,6,7
fmt: db "data a: at %lX",10
db "bss b: at %lX",10
db "rodata c: at %lX",10
db "code main: at %lX",10,0
section .bss ; reserved storage, uninitialized
b: resq 8
section .rodata ; read only initialized storage
c: db 7,6,5,4,3,2,1,0
section .text ; Code section.
global main ; the standard gcc entry point
main: ; the program label for the entry point
push rbp
mov rbp,rsp
push rbx ; save callers registers
mov rdi,fmt ; pass address of fmt to printf
lea rsi,[a] ; using load effective address
lea rdx,[b] ; using load effective address
lea rcx,[c] ; using load effective address
lea r8,[main] ; using load effective address
mov rax,0 ; no float
call printf ; Call C function
mov rdi,fmt ; pass address of fmt to printf
mov rsi,a ; just loading address
mov rdx,b ; just loading address
mov rcx,c ; just loading address
mov r8,main ; just loading address
mov rax,0 ; no float
call printf ; Call C function
pop rbx ; restore callers registers
mov rsp,rbp
pop rbp
mov rax,0 ; normal, no error, return value
ret ; return
gdb disassemble main produces:
(gdb) disassemble main
Dump of assembler code for function main:
0x0000000000400530 <+0>:push %rbp
0x0000000000400531 <+1>:mov %rsp,%rbp
0x0000000000400534 <+4>:push %rbx
0x0000000000400535 <+5>:movabs $0x60103c,%rdi
0x000000000040053f <+15>:lea 0x601034,%rsi
0x0000000000400547 <+23>:lea 0x60108c,%rdx
0x000000000040054f <+31>:lea 0x400640,%rcx
0x0000000000400557 <+39>:lea 0x400530,%r8
0x000000000040055f <+47>:mov $0x0,%eax
0x0000000000400564 <+52>:callq 0x400410
0x0000000000400569 <+57>:movabs $0x60103c,%rdi
0x0000000000400573 <+67>:movabs $0x601034,%rsi
0x000000000040057d <+77>:movabs $0x60108c,%rdx
0x0000000000400587 <+87>:movabs $0x400640,%rcx
0x0000000000400591 <+97>:movabs $0x400530,%r8
0x000000000040059b <+107>:mov $0x0,%eax
0x00000000004005a0 <+112>:callq 0x400410
0x00000000004005a5 <+117>:pop %rbx
0x00000000004005a6 <+118>:mov %rbp,%rsp
0x00000000004005a9 <+121>:pop %rbp
0x00000000004005aa <+122>:mov $0x0,%eax
0x00000000004005af <+127>:retq
End of assembler dump.
part of where_64.lst
Note address of each section starts at zero
.data
18 00000000 0001020304050607 a: db 0,1,2,3,4,5,6,7
19 00000008 646174612020202061- fmt: db "data a: at %lX",10
20 00000011 3A20617420256C580A
21 0000001A 627373202020202062- db "bss b: at %lX",10
22 00000023 3A20617420256C580A
23 0000002C 726F64617461202063- db "rodata c: at %lX",10
24 00000035 3A20617420256C580A
25 0000003E 636F6465206D61696E- db "code main: at %lX",10,0
26 00000047 3A20617420256C580A-
27 00000050 00
28
.bss
30 00000000 b: resq 8
.rodata
33 00000000 0706050403020100 c: db 7,6,5,4,3,2,1,0
.text
38 00000000 55 push rbp
39 00000001 4889E5 mov rbp,rsp
40 00000004 53 push rbx
42 00000005 48BF- mov rdi,fmt
43 00000007 [0800000000000000]
44 0000000F 488D3425[00000000] lea rsi,[a]
45 00000017 488D1425[00000000] lea rdx,[b]
46 0000001F 488D0C25[00000000] lea rcx,[c]
Options that may allow you to debug
Typical assembly language programming, may just use registers,
or may keep most variables just in registers.
Storing variables in memory may be needed for debugging.
This example starts with a small C program,fib.c
then codes efficient assembly language,fib_64l.asm
Output, shows overflow fib_64l.out
then keeps variables in memory,fib_64m.asm
// fib.c same as computation as fib_64.asm
#include <stdio.h>
int main(int argc, char *argv[])
{
long int c = 95; // loop counter
long int a = 1; // current number, becomes next
long int b = 2; // next number, becomes sum a+b
long int d; // temp
printf("fibinachi numbers\n");
for(c=c; c!=0; c--)
{
printf("%21ld\n",a);
d = a;
a = b;
b = d+b;
}
}
implement fib.c using registers
; fib_64l.asm using 64 bit registers to implement fib.c
global main
extern printf
section .data
format: db '%21ld', 10, 0
title: db 'fibinachi numbers', 10, 0
section .text
main:
push rbp ; set up stack
mov rdi, title ; arg 1 is a pointer
mov rax, 0 ; no vector registers in use
call printf
mov rcx, 95 ; rcx will countdown from 52 to 0
mov rax, 1 ; rax will hold the current number
mov rbx, 2 ; rbx will hold the next number
print:
; We need to call printf, but we are using rax, rbx, and rcx.
; printf may destroy rax and rcx so we will save these before
; the call and restore them afterwards.
push rax ; 32-bit stack operands are not encodable
push rcx ; in 64-bit mode, so we use the "r" names
mov rdi, format ; arg 1 is a pointer
mov rsi, rax ; arg 2 is the current number
mov rax, 0 ; no vector registers in use
call printf
pop rcx
pop rax
mov rdx, rax ; save the current number
mov rax, rbx ; next number is now current
add rbx, rdx ; get the new next number
dec rcx ; count down
jnz print ; if not done counting, do some more
pop rbp ; restore stack
mov rax, 0 ; normal exit
ret
implement fib.c using memory
; fib_64m.asm using 64 bit memory more like C code
; // fib.c same as computation as fib_64m.asm
; #include <stdio.h>
; int main(int argc, char *argv[])
; {
; long int c = 95; // loop counter
; long int a = 1; // current number, becomes next
; long int b = 2; // next number, becomes sum a+b
; long int d; // temp
; printf("fibinachi numbers\n");
; for(c=c; c!=0; c--)
; {
; printf("%21ld\n",a);
; d = a;
; a = b;
; b = d+b;
; }
; }
global main
extern printf
section .bss
d: resq 1 ; temp unused, kept in register rdx
section .data
c: dq 95 ; loop counter
a: dq 1 ; current number, becomes next
b: dq 2 ; next number, becomes sum a+b
format: db '%21ld', 10, 0
title: db 'fibinachi numbers', 10, 0
section .text
main:
push rbp ; set up stack
mov rdi, title ; arg 1 is a pointer
mov rax, 0 ; no vector registers in use
call printf
print:
; We need to call printf, but we are using rax, rbx, and rcx.
mov rdi, format ; arg 1 is a pointer
mov rsi,[a] ; arg 2 is the current number
mov rax, 0 ; no vector registers in use
call printf
mov rdx,[a] ; save the current number, in register
mov rbx,[b] ;
mov [a],rbx ; next number is now current, in ram
add rbx, rdx ; get the new next number
mov [b],rbx ; store in ram
mov rcx,[c] ; get loop count
dec rcx ; count down
mov [c],rcx ; save in ram
jnz print ; if not done counting, do some more
pop rbp ; restore stack
mov rax, 0 ; normal exit
ret ; return to operating system
Operating Systems use pages
Not a joke.
Operating systems run many processes. See Windows Task Manager,
Try Linux run top . Typically more than 50 processes.
Using lots of RAM.
Hard drives, disc, are read and written by sector, not byte or word.
Operating Systems allocate pages for processes, not byte or word.
Consider subset of three processes, P1, P2, P3 running in the OS.
Even is only one byte was needed by a .data or .bss segments,
the OS would allocate a page. Large segments may take many pages.
A possible RAM layout, not sequential, scattered
Page user
0-56 OS
57 P3 .data
58 P2 .bss
90-91 P3 .text
92 P1 .bss
93 P3 .bss
94-96 P2 .data
8250 P2 .text
9470 P1 .data
9480 P1 .text
Remember all addresses may start with zero in every segment of
every process. They may be relocated during linking to very
large addresses, yet many processes may get linked to
the same address. OH! How can these processes run at the
same time in the same RAM?
Yes, virtual memory! It has been around for many years.
Efficient virtual memory uses a TLB, Translation Lookaside Buffer.
In the OS there is a Page Table for every process. The OS
keeps a free page list that are pages available to be assigned
a process when it starts. We saw a cache in the previous
lecture, technically, the TLB is a cache for the OS to
use on the page tables.
The addresses you see in a load map or debugger are virtual
addresses, not the real RAM address.
The virtual address and physical address do not necessarily
have to be the same number of bits. The operation of virtual
memory is to convert a virtual address to a physical address:
Programmers Virtual Address
+----------------------------+-------------+
| Virtual Page Number VPN | page offset |
+----------------------------+-------------+
| |
v |
TLB |
| |
v v
+--------------------------+-------------+
| Physical Page Number PPN | page offset |
+--------------------------+-------------+
RAM Physical Address
Follow the virtual address to ultimately a physical address.
One obvious fact: A page must be a power of two bytes.
e.g. 4KB. Also a sector, that may be a different size,
as small as 256 bytes.
This is a very simplified example. Actual hardware is much
more complicated. Note the TLB in our first lecture.
Lecture 1 architecture
<- previous index next ->