<- previous index next ->
UGH! Note that < and > are interpreted by HTML,
thus source code, physically included, has & gt ; rather than symbol.
Be sure to download from link, not from HTML.
The basic integer compare instruction is "cmp"
Following this instruction is typically one of:
JL label ; jump on less than "<"
JLE label ; jump on less than or equal "<="
JG label ; jump on greater than ">"
JGE label ; jump on greater than or equal ">="
JE label ; jump on equal "=="
JNE label ; jump on not equal "!="
After many integer arithmetic instructions
JZ label ; jump on zero
JNZ label ; jump on non zero
JS label ; jump on sign plus
JNS label; ; jump on sign not plus
Note: Use 'cmp' rather than 'sub' for comparison.
Overflow can occur on subtraction resulting in sign inversion.
if-then-else in assembly language
Convert a "C" 'if' statement to nasm assembly ifint_64.asm
The significant features are:
1) use a compare instruction for the test
2) put a label on the start of the false branch (e.g. false1:)
3) put a label after the end of the 'if' statement (e.g. exit1:)
4) choose a conditional jump that goes to the false part
5) put an unconditional jump to (e.g. exit1:) at the end of the true part
; ifint_64.asm code ifint_64.c for nasm
; /* ifint_64.c an 'if' statement that will be coded for nasm */
; #include <stdio.h>
; int main()
; {
; long int a=1;
; long int b=2;
; long int c=3;
; if(a<b)
; printf("true a < b \n");
; else
; printf("wrong on a < b \n");
; if(b>c)
; printf("wrong on b > c \n");
; else
; printf("false b > c \n");
; return 0;
;}
; result of executing both "C" and assembly is:
; true a < b
; false b > c
global main ; define for linker
extern printf ; tell linker we need this C function
section .data ; Data section, initialized variables
a: dq 1
b: dq 2
c: dq 3
fmt1: db "true a < b ",10,0
fmt2: db "wrong on a < b ",10,0
fmt3: db "wrong on b > c ",10,0
fmt4: db "false b > c ",10,0
section .text
main: push rbp ; set up stack
mov rax,[a] ; a
cmp rax,[b] ; compare a to b
jge false1 ; choose jump to false part
; a < b sign is set
mov rdi, fmt1 ; printf("true a < b \n");
call printf
jmp exit1 ; jump over false part
false1: ; a < b is false
mov rdi, fmt2 ; printf("wrong on a < b \n");
call printf
exit1: ; finished 'if' statement
mov rax,[b] ; b
cmp rax,[c] ; compare b to c
jle false2 ; choose jump to false part
; b > c sign is not set
mov rdi, fmt3 ; printf("wrong on b > c \n");
call printf
jmp exit2 ; jump over false part
false2: ; b > c is false
mov rdi, fmt4 ; printf("false b > c \n");
call printf
exit2: ; finished 'if' statement
pop rbp ; restore stack
mov rax,0 ; normal, no error, return value
ret ; return 0;
loop in assembly language
Convert a "C" loop to nasm assembly loopint_64.asm
The significant features are:
1) "C" long int is 8-bytes, thus dd1[1] becomes dword [dd1+8]
dd1[99] becomes dword [dd1+8*99]
2) "C" long int is 8-bytes, thus dd1[i]; i++; becomes add edi,8
since "i" is never stored, the register edi holds "i"
3) the 'cmp' instruction sets flags that control the jump instruction.
cmp edi,8*99 is like i<99 in "C"
jne loop1 jumps if register edi is not 8*99
; loopint_64.asm code loopint.c for nasm
; /* loopint_64.c a very simple loop that will be coded for nasm */
; #include <stdio.h>
; int main()
; {
; long int dd1[100]; // 100 could be 3 gigabytes
; long int i; // must be long for more than 2 gigabytes
; dd1[0]=5; /* be sure loop stays 1..98 */
; dd1[99]=9;
; for(i=1; i<99; i++) dd1[i]=7;
; printf("dd1[0]=%ld, dd1[1]=%ld, dd1[98]=%ld, dd1[99]=%ld\n",
; dd1[0], dd1[1], dd1[98],dd1[99]);
; return 0;
;}
; execution output is dd1[0]=5, dd1[1]=7, dd1[98]=7, dd1[99]=9
section .bss
dd1: resq 100 ; reserve 100 long int
i: resq 1 ; actually unused, kept in register
section .data ; Data section, initialized variables
fmt: db "dd1[0]=%ld, dd1[1]=%ld, dd1[98]=%ld, dd1[99]=%ld",10,0
extern printf ; the C function, to be called
section .text
global main
main: push rbp ; set up stack
mov qword [dd1],5 ; dd1[0]=5; memory to memory
mov qword [dd1+99*8],9 ; dd1[99]=9; indexed 99 qword
mov rdi, 1*8 ; i=1; index, will move by 8 bytes
loop1: mov qword [dd1+rdi],7 ; dd1[i]=7;
add rdi, 8 ; i++; 8 bytes
cmp rdi, 8*99 ; i<99
jne loop1 ; loop until incremented i=99
mov rdi, fmt ; pass address of format
mov rsi, qword [dd1] ; dd1[0] first list parameter
mov rdx, qword [dd1+1*8] ; dd1[1] second list parameter
mov rcx, qword [dd1+98*8] ; dd1[98] third list parameter
mov r8, qword [dd1+99*8] ; dd1[99] fourth list parameter
mov rax, 0 ; no xmm used
call printf ; Call C function
pop rbp ; restore stack
mov rax,0 ; normal, no error, return value
ret ; return 0;
logic operations in assembly language
Previously, integer arithmetic in "C" was converted to
NASM assembly language. The following is very similar
(cut and past) of intarith_64.asm to intlogic_64.asm that
shows the "C" operators "&" and, "|" or, "^" xor, "~" not.
intlogic_64.asm
; intlogic_64.asm show some simple C code and corresponding nasm code
; the nasm code is one sample, not unique
;
; compile: nasm -f elf64 -l intlogic_64.lst intlogic_64.asm
; link: gcc -m64 -o intlogic_64 intlogic_64.o
; run: ./intlogic_64 > intlogic_64.out
;
; the output from running intlogic_64.asm and intlogic.c is
; c=5 , a=3, b=5, c=15
; c=a&b, a=3, b=5, c=1
; c=a|b, a=3, b=5, c=7
; c=a^b, a=3, b=5, c=6
; c=~a , a=3, b=5, c=-4
;
;The file intlogic.c is:
; #include <stdio.h>
; int main()
; {
; long int a=3, b=5, c;
;
; c=15;
; printf("%s, a=%d, b=%d, c=%d\n","c=5 ", a, b, c);
; c=a&b; /* and */
; printf("%s, a=%d, b=%d, c=%d\n","c=a&b", a, b, c);
; c=a|b; /* or */
; printf("%s, a=%d, b=%d, c=%d\n","c=a|b", a, b, c);
; c=a^b; /* xor */
; printf("%s, a=%d, b=%d, c=%d\n","c=a^b", a, b, c);
; c=~a; /* not */
; printf("%s, a=%d, b=%d, c=%d\n","c=~a", a, b, c);
; return 0;
; }
extern printf ; the C function to be called
%macro pabc 1 ; a "simple" print macro
section .data
.str db %1,0 ; %1 is first actual in macro call
section .text
mov rdi, fmt ; address of format string
mov rsi, .str ; users string
mov rdx, [a] ; long int a
mov rcx, [b] ; long int b
mov r8, [c] ; long int c
mov rax, 0 ; no xmm used
call printf ; Call C function
%endmacro
section .data ; preset constants, writeable
a: dq 3 ; 64-bit variable a initialized to 3
b: dq 5 ; 64-bit variable b initializes to 4
fmt: db "%s, a=%ld, b=%ld, c=%ld",10,0 ; format string for printf
section .bss ; unitialized space
c: resq 1 ; reserve a 64-bit word
section .text ; instructions, code segment
global main ; for gcc standard linking
main: ; label
push rbp ; set up stack
lit5: ; c=5;
mov rax,15 ; 5 is a literal constant
mov [c],rax ; store into c
pabc "c=5 " ; invoke the print macro
andb: ; c=a&b;
mov rax,[a] ; load a
and rax,[b] ; and with b
mov [c],rax ; store into c
pabc "c=a&b" ; invoke the print macro
orw: ; c=a-b;
mov rax,[a] ; load a
or rax,[b] ; logical or with b
mov [c],rax ; store into c
pabc "c=a|b" ; invoke the print macro
xorw: ; c=a^b;
mov rax,[a] ; load a
xor rax,[b] ; exclusive or with b
mov [c],rax ; store result in c
pabc "c=a^b" ; invoke the print macro
notw: ; c=~a;
mov rax,[a] ; load c
not rax ; not, complement
mov [c],rax ; store result into c
pabc "c=~a " ; invoke the print macro
pop rbp ; restore stack
mov rax,0 ; exit code, 0=normal
ret ; main returns to operating system
loops in assembly language
One significant use of loops is to evaluate polynomials and
convert numbers from one base to another.
(Yes, this is related to project 1 for CMPE 310)
The following program has three loops.
Loop3 (h3loop) uses Horners method to evaluate a polynomial,
using 'rdi' as an index, 'rcx' and 'loop' to do the loop.
a_0 is first in the array, n=4.
Loop4 (h4loop) uses Horners method, with data order optimized,
using 'rcx' as both index and loop counter, to get a
three instruction loop.
a_4 is first in the array, n=4.
Loop5 (h5loop) uses Horners method to evaluate a polynomial
using double precision floating point. Note 8 byte
increment and quad word to xmm0, to printf.
Horners method to evaluate polynomials in assembly language
Study horner_64.asm to understand
the NASM coding of the loops.
; horner_64.asm Horners method of evaluating polynomials
;
; given a polynomial Y = a_n X^n + a_n-1 X^n-1 + ... a_1 X + a_0
; a_n is the coefficient 'a' with subscript n. X^n is X to nth power
; compute y_1 = a_n * X + a_n-1
; compute y_2 = y_1 * X + a_n-2
; compute y_i = y_i-1 * X + a_n-i i=3..n
; thus y_n = Y = value of polynomial
;
; in assembly language:
; load some register with a_n, multiply by X
; add a_n-1, multiply by X, add a_n-2, multiply by X, ...
; finishing with the add a_0
;
; output from execution:
; a 6319
; aa 6319
; af 6.319000e+03
extern printf
section .data
global main
section .data
fmta: db "a %ld",10,0
fmtaa: db "aa %ld",10,0
fmtflt: db "af %e",10,0
section .text
main: push rbp ; set up stack
; evaluate an integer polynomial, X=7, using a count
section .data
a: dq 2,5,-7,22,-9 ; coefficients of polynomial, a_n first
X: dq 7 ; X = 7
; n=4, 8 bytes per coefficient
section .text
mov rax,[a] ; accumulate value here, get coefficient a_n
mov rdi,1 ; subscript initialization
mov rcx,4 ; loop iteration count initialization, n
h3loop: imul rax,[X] ; * X (ignore edx)
add rax,[a+8*rdi] ; + a_n-i
inc rdi ; increment subscript
loop h3loop ; decrement rcx, jump on non zero
mov rsi, rax ; print rax
mov rdi, fmta ; format
mov rax, 0 ; no float
call printf
; evaluate an integer polynomial, X=7, using a count as index
; optimal organization of data allows a three instruction loop
section .data
aa: dq -9,22,-7,5,2 ; coefficients of polynomial, a_0 first
n: dq 4 ; n=4, 8 bytes per coefficient
section .text
mov rax,[aa+4*8] ; accumulate value here, get coefficient a_n
mov rcx,[n] ; loop iteration count initialization, n
h4loop: imul rax,[X] ; * X (ignore edx)
add rax,[aa+8*rcx-8]; + aa_n-i
loop h4loop ; decrement rcx, jump on non zero
mov rsi, rax ; print rax
mov rdi, fmtaa ; format
mov rax, 0 ; no float
call printf
; evaluate a double floating polynomial, X=7.0, using a count as index
; optimal organization of data allows a three instruction loop
section .data
af: dq -9.0,22.0,-7.0,5.0,2.0 ; coefficients of polynomial, a_0 first
XF: dq 7.0
Y: dq 0.0
N: dd 4
section .text
mov rcx,[N] ; loop iteration count initialization, n
fld qword [af+8*rcx]; accumulate value here, get coefficient a_n
h5loop: fmul qword [XF] ; * XF
fadd qword [af+8*rcx-8] ; + aa_n-i
loop h5loop ; decrement rcx, jump on non zero
fstp qword [Y] ; store Y in order to print Y
movq xmm0, qword [Y] ; well, may just mov reg
mov rdi, fmtflt ; format
mov rax, 1 ; one float
call printf
pop rbp ; restore stack
mov rax,0 ; normal return
ret ; return
A "C" version with same data, slightly different code sequence.
// horner_64.c long integer and double Horners method of evaluating polynomials
// everything 64-bit
// given a polynomial Y = a_n X^n + a_n-1 X^n-1 + ... a_1 X + a_0
// a_n is the coefficient 'a' with subscript n. X^n is X to nth power
// compute y_1 = a_n * X + a_n-1
// compute y_2 = y_1 * X + a_n-2
// compute y_i = y_i-1 * X + a_n-i i=3..n
// thus y_n = Y = value of polynomial
#include <stdio.h>
int main(int argc, char *argv[])
{
long int a[] = {2, 5, -7, 22, -9}; // a_n first
long int aa[] = {-9, 22, -7, 5, 2}; // aa_0 first
double af[] = {-9.0, 22.0, -7.0, 5.0, 2.0}; // af_0 first
long int n = 4;
long int X, Y;
double XF, YF;
long int i;
// evaluate an integer polynomial a, X=7, using a_n first, count n
X = 7;
Y = a[0]*X + a[1];
for(i=2; i<=n; i++) Y = Y*X + a[i];
printf("a %ld\n", Y);
// evaluate an integer polynomial aa , X=7, using a_0 first, count n
X = 7;
Y = aa[n]*X + aa[n-1];
for(i=n-2; i>=0; i--) Y = Y*X + aa[i];
printf("aa %ld\n", Y);
// evaluate a double floating polynomial, X=7.0, using af_0 first, n
XF = 7.0;
YF = af[n]*X + af[n-1];
for(i=n-2; i>=0; i--) YF = YF*XF + af[i];
printf("af %e\n", YF);
return 0;
}
Same output:
a 6319
aa 6319
af 6.319000e+03
serial vs parallel, slow vs fast
Multiply hardware, serial
Multiply hardware, parallel
Then for wiring ground and power
Possibly many mask layers
Many complete chips are baked on a wafer
<- previous index next ->