<- previous    index    next ->

Lecture 6 Branching and loops

UGH! Note that < and > are interpreted by HTML,
thus source code, physically included, has & gt ; rather than symbol.
Be sure to download from link, not from HTML.

The basic integer compare instruction is  "cmp"
Following this instruction is typically one of:
  JL  label  ; jump on less than  "<"
  JLE label  ; jump on less than or equal "<="
  JG  label  ; jump on greater than ">"
  JGE label  ; jump on greater than or equal ">="
  JE  label  ; jump on equal "=="
  JNE label  ; jump on not equal "!="

After many integer arithmetic instructions
  JZ  label  ; jump on zero
  JNZ label  ; jump on non zero
  JS  label  ; jump on sign plus
  JNS label;  ; jump on sign not plus

Note: Use 'cmp' rather than 'sub' for comparison.
Overflow can occur on subtraction resulting in sign inversion.

if-then-else in assembly language

Convert a "C" 'if' statement to nasm assembly ifint_64.asm The significant features are: 1) use a compare instruction for the test 2) put a label on the start of the false branch (e.g. false1:) 3) put a label after the end of the 'if' statement (e.g. exit1:) 4) choose a conditional jump that goes to the false part 5) put an unconditional jump to (e.g. exit1:) at the end of the true part ; ifint_64.asm code ifint_64.c for nasm ; /* ifint_64.c an 'if' statement that will be coded for nasm */ ; #include <stdio.h> ; int main() ; { ; long int a=1; ; long int b=2; ; long int c=3; ; if(a<b) ; printf("true a < b \n"); ; else ; printf("wrong on a < b \n"); ; if(b>c) ; printf("wrong on b > c \n"); ; else ; printf("false b > c \n"); ; return 0; ;} ; result of executing both "C" and assembly is: ; true a < b ; false b > c global main ; define for linker extern printf ; tell linker we need this C function section .data ; Data section, initialized variables a: dq 1 b: dq 2 c: dq 3 fmt1: db "true a < b ",10,0 fmt2: db "wrong on a < b ",10,0 fmt3: db "wrong on b > c ",10,0 fmt4: db "false b > c ",10,0 section .text main: push rbp ; set up stack mov rax,[a] ; a cmp rax,[b] ; compare a to b jge false1 ; choose jump to false part ; a < b sign is set mov rdi, fmt1 ; printf("true a < b \n"); call printf jmp exit1 ; jump over false part false1: ; a < b is false mov rdi, fmt2 ; printf("wrong on a < b \n"); call printf exit1: ; finished 'if' statement mov rax,[b] ; b cmp rax,[c] ; compare b to c jle false2 ; choose jump to false part ; b > c sign is not set mov rdi, fmt3 ; printf("wrong on b > c \n"); call printf jmp exit2 ; jump over false part false2: ; b > c is false mov rdi, fmt4 ; printf("false b > c \n"); call printf exit2: ; finished 'if' statement pop rbp ; restore stack mov rax,0 ; normal, no error, return value ret ; return 0;

loop in assembly language

Convert a "C" loop to nasm assembly loopint_64.asm The significant features are: 1) "C" long int is 8-bytes, thus dd1[1] becomes dword [dd1+8] dd1[99] becomes dword [dd1+8*99] 2) "C" long int is 8-bytes, thus dd1[i]; i++; becomes add edi,8 since "i" is never stored, the register edi holds "i" 3) the 'cmp' instruction sets flags that control the jump instruction. cmp edi,8*99 is like i<99 in "C" jne loop1 jumps if register edi is not 8*99 ; loopint_64.asm code loopint.c for nasm ; /* loopint_64.c a very simple loop that will be coded for nasm */ ; #include <stdio.h> ; int main() ; { ; long int dd1[100]; // 100 could be 3 gigabytes ; long int i; // must be long for more than 2 gigabytes ; dd1[0]=5; /* be sure loop stays 1..98 */ ; dd1[99]=9; ; for(i=1; i<99; i++) dd1[i]=7; ; printf("dd1[0]=%ld, dd1[1]=%ld, dd1[98]=%ld, dd1[99]=%ld\n", ; dd1[0], dd1[1], dd1[98],dd1[99]); ; return 0; ;} ; execution output is dd1[0]=5, dd1[1]=7, dd1[98]=7, dd1[99]=9 section .bss dd1: resq 100 ; reserve 100 long int i: resq 1 ; actually unused, kept in register section .data ; Data section, initialized variables fmt: db "dd1[0]=%ld, dd1[1]=%ld, dd1[98]=%ld, dd1[99]=%ld",10,0 extern printf ; the C function, to be called section .text global main main: push rbp ; set up stack mov qword [dd1],5 ; dd1[0]=5; memory to memory mov qword [dd1+99*8],9 ; dd1[99]=9; indexed 99 qword mov rdi, 1*8 ; i=1; index, will move by 8 bytes loop1: mov qword [dd1+rdi],7 ; dd1[i]=7; add rdi, 8 ; i++; 8 bytes cmp rdi, 8*99 ; i<99 jne loop1 ; loop until incremented i=99 mov rdi, fmt ; pass address of format mov rsi, qword [dd1] ; dd1[0] first list parameter mov rdx, qword [dd1+1*8] ; dd1[1] second list parameter mov rcx, qword [dd1+98*8] ; dd1[98] third list parameter mov r8, qword [dd1+99*8] ; dd1[99] fourth list parameter mov rax, 0 ; no xmm used call printf ; Call C function pop rbp ; restore stack mov rax,0 ; normal, no error, return value ret ; return 0;

logic operations in assembly language

Previously, integer arithmetic in "C" was converted to NASM assembly language. The following is very similar (cut and past) of intarith_64.asm to intlogic_64.asm that shows the "C" operators "&" and, "|" or, "^" xor, "~" not. intlogic_64.asm ; intlogic_64.asm show some simple C code and corresponding nasm code ; the nasm code is one sample, not unique ; ; compile: nasm -f elf64 -l intlogic_64.lst intlogic_64.asm ; link: gcc -m64 -o intlogic_64 intlogic_64.o ; run: ./intlogic_64 > intlogic_64.out ; ; the output from running intlogic_64.asm and intlogic.c is ; c=5 , a=3, b=5, c=15 ; c=a&b, a=3, b=5, c=1 ; c=a|b, a=3, b=5, c=7 ; c=a^b, a=3, b=5, c=6 ; c=~a , a=3, b=5, c=-4 ; ;The file intlogic.c is: ; #include <stdio.h> ; int main() ; { ; long int a=3, b=5, c; ; ; c=15; ; printf("%s, a=%d, b=%d, c=%d\n","c=5 ", a, b, c); ; c=a&b; /* and */ ; printf("%s, a=%d, b=%d, c=%d\n","c=a&b", a, b, c); ; c=a|b; /* or */ ; printf("%s, a=%d, b=%d, c=%d\n","c=a|b", a, b, c); ; c=a^b; /* xor */ ; printf("%s, a=%d, b=%d, c=%d\n","c=a^b", a, b, c); ; c=~a; /* not */ ; printf("%s, a=%d, b=%d, c=%d\n","c=~a", a, b, c); ; return 0; ; } extern printf ; the C function to be called %macro pabc 1 ; a "simple" print macro section .data .str db %1,0 ; %1 is first actual in macro call section .text mov rdi, fmt ; address of format string mov rsi, .str ; users string mov rdx, [a] ; long int a mov rcx, [b] ; long int b mov r8, [c] ; long int c mov rax, 0 ; no xmm used call printf ; Call C function %endmacro section .data ; preset constants, writeable a: dq 3 ; 64-bit variable a initialized to 3 b: dq 5 ; 64-bit variable b initializes to 4 fmt: db "%s, a=%ld, b=%ld, c=%ld",10,0 ; format string for printf section .bss ; unitialized space c: resq 1 ; reserve a 64-bit word section .text ; instructions, code segment global main ; for gcc standard linking main: ; label push rbp ; set up stack lit5: ; c=5; mov rax,15 ; 5 is a literal constant mov [c],rax ; store into c pabc "c=5 " ; invoke the print macro andb: ; c=a&b; mov rax,[a] ; load a and rax,[b] ; and with b mov [c],rax ; store into c pabc "c=a&b" ; invoke the print macro orw: ; c=a-b; mov rax,[a] ; load a or rax,[b] ; logical or with b mov [c],rax ; store into c pabc "c=a|b" ; invoke the print macro xorw: ; c=a^b; mov rax,[a] ; load a xor rax,[b] ; exclusive or with b mov [c],rax ; store result in c pabc "c=a^b" ; invoke the print macro notw: ; c=~a; mov rax,[a] ; load c not rax ; not, complement mov [c],rax ; store result into c pabc "c=~a " ; invoke the print macro pop rbp ; restore stack mov rax,0 ; exit code, 0=normal ret ; main returns to operating system

loops in assembly language

One significant use of loops is to evaluate polynomials and convert numbers from one base to another. (Yes, this is related to project 1 for CMPE 310) The following program has three loops. Loop3 (h3loop) uses Horners method to evaluate a polynomial, using 'rdi' as an index, 'rcx' and 'loop' to do the loop. a_0 is first in the array, n=4. Loop4 (h4loop) uses Horners method, with data order optimized, using 'rcx' as both index and loop counter, to get a three instruction loop. a_4 is first in the array, n=4. Loop5 (h5loop) uses Horners method to evaluate a polynomial using double precision floating point. Note 8 byte increment and quad word to xmm0, to printf.

Horners method to evaluate polynomials in assembly language

Study horner_64.asm to understand the NASM coding of the loops. ; horner_64.asm Horners method of evaluating polynomials ; ; given a polynomial Y = a_n X^n + a_n-1 X^n-1 + ... a_1 X + a_0 ; a_n is the coefficient 'a' with subscript n. X^n is X to nth power ; compute y_1 = a_n * X + a_n-1 ; compute y_2 = y_1 * X + a_n-2 ; compute y_i = y_i-1 * X + a_n-i i=3..n ; thus y_n = Y = value of polynomial ; ; in assembly language: ; load some register with a_n, multiply by X ; add a_n-1, multiply by X, add a_n-2, multiply by X, ... ; finishing with the add a_0 ; ; output from execution: ; a 6319 ; aa 6319 ; af 6.319000e+03 extern printf section .data global main section .data fmta: db "a %ld",10,0 fmtaa: db "aa %ld",10,0 fmtflt: db "af %e",10,0 section .text main: push rbp ; set up stack ; evaluate an integer polynomial, X=7, using a count section .data a: dq 2,5,-7,22,-9 ; coefficients of polynomial, a_n first X: dq 7 ; X = 7 ; n=4, 8 bytes per coefficient section .text mov rax,[a] ; accumulate value here, get coefficient a_n mov rdi,1 ; subscript initialization mov rcx,4 ; loop iteration count initialization, n h3loop: imul rax,[X] ; * X (ignore edx) add rax,[a+8*rdi] ; + a_n-i inc rdi ; increment subscript loop h3loop ; decrement rcx, jump on non zero mov rsi, rax ; print rax mov rdi, fmta ; format mov rax, 0 ; no float call printf ; evaluate an integer polynomial, X=7, using a count as index ; optimal organization of data allows a three instruction loop section .data aa: dq -9,22,-7,5,2 ; coefficients of polynomial, a_0 first n: dq 4 ; n=4, 8 bytes per coefficient section .text mov rax,[aa+4*8] ; accumulate value here, get coefficient a_n mov rcx,[n] ; loop iteration count initialization, n h4loop: imul rax,[X] ; * X (ignore edx) add rax,[aa+8*rcx-8]; + aa_n-i loop h4loop ; decrement rcx, jump on non zero mov rsi, rax ; print rax mov rdi, fmtaa ; format mov rax, 0 ; no float call printf ; evaluate a double floating polynomial, X=7.0, using a count as index ; optimal organization of data allows a three instruction loop section .data af: dq -9.0,22.0,-7.0,5.0,2.0 ; coefficients of polynomial, a_0 first XF: dq 7.0 Y: dq 0.0 N: dd 4 section .text mov rcx,[N] ; loop iteration count initialization, n fld qword [af+8*rcx]; accumulate value here, get coefficient a_n h5loop: fmul qword [XF] ; * XF fadd qword [af+8*rcx-8] ; + aa_n-i loop h5loop ; decrement rcx, jump on non zero fstp qword [Y] ; store Y in order to print Y movq xmm0, qword [Y] ; well, may just mov reg mov rdi, fmtflt ; format mov rax, 1 ; one float call printf pop rbp ; restore stack mov rax,0 ; normal return ret ; return A "C" version with same data, slightly different code sequence. // horner_64.c long integer and double Horners method of evaluating polynomials // everything 64-bit // given a polynomial Y = a_n X^n + a_n-1 X^n-1 + ... a_1 X + a_0 // a_n is the coefficient 'a' with subscript n. X^n is X to nth power // compute y_1 = a_n * X + a_n-1 // compute y_2 = y_1 * X + a_n-2 // compute y_i = y_i-1 * X + a_n-i i=3..n // thus y_n = Y = value of polynomial  #include <stdio.h> int main(int argc, char *argv[]) { long int a[] = {2, 5, -7, 22, -9}; // a_n first long int aa[] = {-9, 22, -7, 5, 2}; // aa_0 first double af[] = {-9.0, 22.0, -7.0, 5.0, 2.0}; // af_0 first long int n = 4; long int X, Y; double XF, YF; long int i; // evaluate an integer polynomial a, X=7, using a_n first, count n X = 7; Y = a[0]*X + a[1]; for(i=2; i<=n; i++) Y = Y*X + a[i]; printf("a %ld\n", Y); // evaluate an integer polynomial aa , X=7, using a_0 first, count n X = 7; Y = aa[n]*X + aa[n-1]; for(i=n-2; i>=0; i--) Y = Y*X + aa[i]; printf("aa %ld\n", Y); // evaluate a double floating polynomial, X=7.0, using af_0 first, n XF = 7.0; YF = af[n]*X + af[n-1]; for(i=n-2; i>=0; i--) YF = YF*XF + af[i]; printf("af %e\n", YF); return 0; } Same output: a 6319 aa 6319 af 6.319000e+03

serial vs parallel, slow vs fast

Multiply hardware, serial Multiply hardware, parallel Then for wiring ground and power Possibly many mask layers Many complete chips are baked on a wafer
    <- previous    index    next ->

Other links

Go to top