斐波那契数列64位示例
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | .data msg_input: .asciz "Please type a number: " scanf_fmt : .asciz "%d" msg_output: .asciz "Fibonacci number %d is %ld\n" .text .global _start _start: stp x19, x30, [sp, #-16]! // Keep x19 and x30 (link register) sub sp, sp, #16 // Grow the stack because for a local // variable used by scanf. /* 堆栈如下: Contents Address | var | [sp] We will use the first 4 bytes for scanf | | [sp, #8] | x19 | [sp, #16] | x30 | [sp, #24] */ // 调用printf ldr x0, addr_msg_input // x0 ← &msg_input [64-bit] bl printf // call printf // 调用call // scanf("%d", &var); mov x1, sp // x1 ← sp ldr x0, addr_scanf_fmt // x0 ← &scanf_fmt [64-bit] bl scanf // call scanf // 调用fibonacci // res = fibonacci(var); ldr w0, [sp] // w0 ← *sp [32-bit] bl fibonacci // call fibonacci // Setup call to printf // printf("Fibonacci number %d is %ld\n", var, res); mov x2, x0 // x2 ← x0 ldr w1, [sp] // w1 ← *sp [32-bit] ldr x0, addr_msg_output // x0 ← &msg_output [64-bit] bl printf // call printf add sp, sp, #16 // Shrink the stack. ldp x19, x30, [sp], #16 // Restore x19 and x30 (link register) mov w0, #0 // w0 ← 0 mov x8, 93 svc 0 fibonacci: // fibonacci(n) -> result // n 是 32位 通过w0传递 // 结果是 64位 ,通过 x0传递 stp x19, x30, [sp, #-16]! // Keep x19 and x30 (link register) stp x20, x21, [sp, #-16]! // Keep x20 and x21 /* 堆栈如下: | x20 | [sp] | x21 | [sp, #8] | x19 | [sp, #16] | x30 | [sp, #24] */ cmp w0, #1 // Compare w0 with 1 and update the flags ble simple_case // if w0 <= 1 branch to simple_case // (otherwise continue to recursive_case) recursive_case: // recursive case // (this label is not used, added for clarity) mov w19, w0 // w19 ← w0 // Set up call to fibonacci // fibonacci(n-1); sub w0, w0, #1 // w0 ← w0 - 1 bl fibonacci // call fibonacci mov x20, x0 // x20 ← x0 sub w0, w19, #2 // w0 ← w19 - 2 bl fibonacci // call fibonacci mov x21, x0 // x21 ← x0 add x0, x20, x21 // x0 ← x20 + x21 b end // (unconditional) branch to end simple_case: sxtw x0, w0 // x0 ← ExtendSigned32To64(w0) end: ldp x20, x21, [sp], #16 // Restore x20 and x21 ldp x19, x30, [sp], #16 // Restore x19 and x30 (link register) ret addr_msg_input: .dword msg_input addr_msg_output: .dword msg_output addr_scanf_fmt: .dword scanf_fmt |
as -g -o fabo.o fabo.s
ld -g -o fabo fabo.o -lc -I /lib64/ld-linux-aarch64.so.1
斐波那契数列优化单循环64位示例
在上个例子中,对斐波那契数列函数进行优化。
- F0 = 0
- F1 = 1
- Fn = Fn-1 + Fn-2, where n > 1
要计算当前元素,只需要前面的两个元素,并不需要严格遵循数学定义来进行递归函数调用。
可以通过重复计算n-1个先前的斐波那契数来计算任何其他斐波那(n)。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 | .data msg_input: .asciz "Please type a number: " scanf_fmt : .asciz "%d" msg_output: .asciz "Fibonacci number %d is %ld\n" .text .global _start _start: stp x19, x30, [sp, #-16]! // Keep x19 and x30 (link register) sub sp, sp, #16 // Grow the stack because for a local // variable used by scanf. /* 堆栈如下: Contents Address | var | [sp] We will use the first 4 bytes for scanf | | [sp, #8] | x19 | [sp, #16] | x30 | [sp, #24] */ // 调用printf ldr x0, addr_msg_input // x0 ← &msg_input [64-bit] bl printf // call printf // 调用call // scanf("%d", &var); mov x1, sp // x1 ← sp ldr x0, addr_scanf_fmt // x0 ← &scanf_fmt [64-bit] bl scanf // call scanf // 调用fibonacci // res = fibonacci(var); ldr w0, [sp] // w0 ← *sp [32-bit] bl fibonacci // call fibonacci // Setup call to printf // printf("Fibonacci number %d is %ld\n", var, res); mov x2, x0 // x2 ← x0 ldr w1, [sp] // w1 ← *sp [32-bit] ldr x0, addr_msg_output // x0 ← &msg_output [64-bit] bl printf // call printf add sp, sp, #16 // Shrink the stack. ldp x19, x30, [sp], #16 // Restore x19 and x30 (link register) mov w0, #0 // w0 ← 0 mov x8, 93 svc 0 fibonacci: // fibonacci(n) -> result // n is 32-bit and will be passed in w0 // result is 64-bit and will be returned in x0 mov w3, w0 // w3 ← w0 cmp w3, #1 // compare w3 and 1 and update the flags b.le simple_case // 如果w3小于等于1,则到simple_case sub w3, w3, #1 // 否则w3 ← w3 - 1 mov x1, #0 // x1 ← 0 mov x2, #1 // x1 ← 1 b loop_check // branch to loop_check loop_body: add x0, x1, x2 // x0 ← x1 + x2 mov x1, x2 // x1 ← x2 mov x2, x0 // x2 ← x0 sub w3, w3, #1 // w3 ← w3 - 1 loop_check: cbnz w3, loop_body // 如果w3不等于0,则到loop_body b fibonacci_end // branch to fibonacci_end simple_case: sxtw x0, w0 // x0 ← ExtendSigned32To64(w0) fibonacci_end: ret addr_msg_input: .dword msg_input addr_msg_output: .dword msg_output addr_scanf_fmt: .dword scanf_fmt |
as -g -o fabo.o fabo.s
ld -g -o fabo fabo.o -lc -I /lib64/ld-linux-aarch64.so.1
优化后减少了对栈的入侵。