第20部分- Linux ARM汇编 函数调用斐波那契数列实现

斐波那契数列64位示例

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
.data
 
msg_input: .asciz "Please type a number: "
scanf_fmt : .asciz "%d"
msg_output: .asciz "Fibonacci number %d is %ld\n"
 
.text
.global _start
_start:
    stp x19, x30, [sp, #-16]! // Keep x19 and x30 (link register)
    sub sp, sp, #16           // Grow the stack because for a local
                              // variable used by scanf.
    /*
      堆栈如下:
      Contents  Address
      | var |    [sp]       We will use the first 4 bytes for scanf
      |     |    [sp, #8]
      | x19 |    [sp, #16]
      | x30 |    [sp, #24]
     */
 
    // 调用printf
    ldr x0, addr_msg_input   // x0 ← &msg_input  [64-bit]
    bl printf                // call printf
 
    // 调用call
    //   scanf("%d", &var);
    mov x1, sp               // x1 ← sp
    ldr x0, addr_scanf_fmt   // x0 ← &scanf_fmt  [64-bit]
    bl scanf                 // call scanf
 
    // 调用fibonacci
    //   res = fibonacci(var);
    ldr w0, [sp]             // w0 ← *sp   [32-bit]
    bl fibonacci             // call fibonacci
 
    // Setup call to printf
    //   printf("Fibonacci number %d is %ld\n", var, res);
    mov x2, x0               // x2 ← x0
                           
    ldr w1, [sp]             // w1 ← *sp   [32-bit]
    ldr x0, addr_msg_output  // x0 ← &msg_output [64-bit]
    bl printf                // call printf
 
    add sp, sp, #16          // Shrink the stack.
    ldp x19, x30, [sp], #16  // Restore x19 and x30 (link register)
    mov w0, #0               // w0 ← 0
    mov x8, 93
    svc 0


fibonacci:
    // fibonacci(n) -> result
    //   n 是 32位 通过w0传递
    //   结果是 64位 ,通过 x0传递
    stp x19, x30, [sp, #-16]!  // Keep x19 and x30 (link register)
    stp x20, x21, [sp, #-16]!  // Keep x20 and x21
    /*
      堆栈如下:
      | x20 |    [sp]
      | x21 |    [sp, #8]
      | x19 |    [sp, #16]
      | x30 |    [sp, #24]
     */
 
    cmp w0, #1                // Compare w0 with 1 and update the flags
    ble simple_case           // if w0 <= 1 branch to simple_case
                              // (otherwise continue to recursive_case)
 
    recursive_case:           // recursive case
                              // (this label is not used, added for clarity)
      mov w19, w0             // w19 ← w0
      // Set up call to fibonacci
      //     fibonacci(n-1);
      sub w0, w0, #1          // w0 ← w0 - 1
      bl fibonacci            // call fibonacci
      mov x20, x0             // x20 ← x0
 
      sub w0, w19, #2         // w0 ← w19 - 2
      bl fibonacci            // call fibonacci
      mov x21, x0             // x21 ← x0
 
      add x0, x20, x21        // x0 ← x20 + x21
      b end                   // (unconditional) branch to end
 
    simple_case:
      sxtw x0, w0             // x0 ← ExtendSigned32To64(w0)
 
    end:
      ldp x20, x21, [sp], #16  // Restore x20 and x21
      ldp x19, x30, [sp], #16  // Restore x19 and x30 (link register)
    ret

addr_msg_input: .dword msg_input
addr_msg_output: .dword msg_output
addr_scanf_fmt: .dword scanf_fmt

as -g -o fabo.o fabo.s

ld -g -o fabo fabo.o -lc -I /lib64/ld-linux-aarch64.so.1

斐波那契数列优化单循环64位示例

在上个例子中,对斐波那契数列函数进行优化。

  • F0 = 0
  • F1 = 1
  • Fn = Fn-1 + Fn-2, where n > 1

要计算当前元素,只需要前面的两个元素,并不需要严格遵循数学定义来进行递归函数调用。

可以通过重复计算n-1个先前的斐波那契数来计算任何其他斐波那(n)。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
.data
 
msg_input: .asciz "Please type a number: "
scanf_fmt : .asciz "%d"
msg_output: .asciz "Fibonacci number %d is %ld\n"
 
.text
.global _start
_start:
    stp x19, x30, [sp, #-16]! // Keep x19 and x30 (link register)
    sub sp, sp, #16           // Grow the stack because for a local
                              // variable used by scanf.
    /*
      堆栈如下:
      Contents  Address
      | var |    [sp]       We will use the first 4 bytes for scanf
      |     |    [sp, #8]
      | x19 |    [sp, #16]
      | x30 |    [sp, #24]
     */
    // 调用printf
    ldr x0, addr_msg_input   // x0 ← &msg_input  [64-bit]
    bl printf                // call printf
 
    // 调用call
    //   scanf("%d", &var);
    mov x1, sp               // x1 ← sp
    ldr x0, addr_scanf_fmt   // x0 ← &scanf_fmt  [64-bit]
    bl scanf                 // call scanf
 
    // 调用fibonacci
    //   res = fibonacci(var);
    ldr w0, [sp]             // w0 ← *sp   [32-bit]
    bl fibonacci             // call fibonacci
 
    // Setup call to printf
    //   printf("Fibonacci number %d is %ld\n", var, res);
    mov x2, x0               // x2 ← x0
                           
    ldr w1, [sp]             // w1 ← *sp   [32-bit]
    ldr x0, addr_msg_output  // x0 ← &msg_output [64-bit]
    bl printf                // call printf
 
    add sp, sp, #16          // Shrink the stack.
    ldp x19, x30, [sp], #16  // Restore x19 and x30 (link register)
    mov w0, #0               // w0 ← 0
    mov x8, 93
    svc 0

fibonacci:
    // fibonacci(n) -> result
    //   n is 32-bit and will be passed in w0
    //   result is 64-bit and will be returned in x0
    mov w3, w0          // w3 ← w0
    cmp w3, #1          // compare w3 and 1 and update the flags
    b.le simple_case    // 如果w3小于等于1,则到simple_case
    sub w3, w3, #1      // 否则w3 ← w3 - 1
    mov x1, #0          // x1 ← 0
    mov x2, #1          // x1 ← 1
    b loop_check        // branch to loop_check
loop_body:
    add x0, x1, x2      // x0 ← x1 + x2
    mov x1, x2          // x1 ← x2
    mov x2, x0          // x2 ← x0
    sub w3, w3, #1      // w3 ← w3 - 1
loop_check:
    cbnz w3, loop_body  // 如果w3不等于0,则到loop_body
 
    b fibonacci_end     // branch to fibonacci_end
simple_case:
    sxtw x0, w0         // x0 ← ExtendSigned32To64(w0)
 
fibonacci_end:
    ret

addr_msg_input: .dword msg_input
addr_msg_output: .dword msg_output
addr_scanf_fmt: .dword scanf_fmt

as -g -o fabo.o fabo.s

ld -g -o fabo fabo.o -lc -I /lib64/ld-linux-aarch64.so.1

优化后减少了对栈的入侵。