// file kernel/n/x86/gcd_n2.S: O(n^2) greatest common divisor
/*-----------------------------------------------------------------------+
 |  Copyright 2005-2006, Michel Quercia (michel.quercia@prepas.org)      |
 |                                                                       |
 |  This file is part of Numerix. Numerix is free software; you can      |
 |  redistribute it and/or modify it under the terms of the GNU Lesser   |
 |  General Public License as published by the Free Software Foundation; |
 |  either version 2.1 of the License, or (at your option) any later     |
 |  version.                                                             |
 |                                                                       |
 |  The Numerix Library is distributed in the hope that it will be       |
 |  useful, but WITHOUT ANY WARRANTY; without even the implied warranty  |
 |  of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU  |
 |  Lesser General Public License for more details.                      |
 |                                                                       |
 |  You should have received a copy of the GNU Lesser General Public     |
 |  License along with the GNU MP Library; see the file COPYING. If not, |
 |  write to the Free Software Foundation, Inc., 59 Temple Place -       |
 |  Suite 330, Boston, MA 02111-1307, USA.                               |
 +-----------------------------------------------------------------------+
 |                                                                       |
 |                           PGCD quadratique                            |
 |                                                                       |
 +-----------------------------------------------------------------------*/

                        # +------------------------+
                        # |  Pgcd  deux chiffres  |
                        # +------------------------+
        
# void xn(gcd_2)(chiffre *x)
#
# entre :
#   x = tableau de 8 chiffres [a0,a1,b0,b1,p,s,q,r]
#   a = a0 + BASE*a1, b = b0 + BASE*b1
#
# contrainte : 0 < b < a < BASE*b
#
# Dveloppe en fraction continue la fraction a/b tant que les coefficients
# tiennent sur un chiffre
#
# sortie :
#   [a0,a1,b0,b1] <- ind.
#   [p,s,q,r]     <- coefficients des combinaisons effectues
        
#ifdef assembly_sn_gcd_2
#undef L
#define L(x) .Lsn_gcd_2_##x
ENTER(sn_gcd_2)

#undef  _p_
#undef  _q_
#undef  _r_
#undef  _s_
#define _p_ 16(%esi)
#define _q_ 24(%esi)
#define _r_ 28(%esi)
#define _s_ 20(%esi)

        movl   arg1,   %esi             # esi <- &x
        movl   (%esi), %eax             # eax <- a0
        movl  4(%esi), %ebx             # ebx <- a1
        movl  8(%esi), %ecx             # ecx <- b0
        movl 12(%esi), %edx             # edx <- b1

        # [p,q,r,s] <- Id
        movl   $1,     %edi             # edi <- 1 (p)
        movl   $0,     %ebp             # ebp <- 0 (r)
        movl   %edi,   _p_
        movl   %edi,   _s_
        movl   %ebp,   _q_
        movl   %ebp,   _r_

        subl   %ecx,    %eax            # eax:ebx <- a - b
        sbbl   %edx,    %ebx

        # ici eax:ebx contient a - b
        #     ecx:edx contient b
        #     edi     contient p
        #     ebp     contient r
        # et a >= b
        # dcale b,p,r tant que a >= 2b
        jmp    L(shift_1)
        ALIGN_4
1:
        shll   $1,      %ecx            # ecx:edx <- 2^i*b
        rcll   $1,      %edx
        shll   $1,      %edi            # edi <- 2^i*p. si >= BASE, termin
        jc     L(done_1)
        shll   $1,      %ebp            # ebp <- 2^i*r. si >= BASE, termin
        jc     L(done_1)
L(shift_1):
        subl   %ecx,    %eax            # eax:ebx <- a - 2^i*b
        sbbl   %edx,    %ebx
        jnb    1b

        # ici eax:ebx contient a - 2^(i+1)*b
        #     ecx:edx contient 2^i*b
        #     edi     contient 2^i*p
        #     ebp     contient 2^i*r
        # et 2^i*b <= a < 2^(i+1)*b
        # calcule a/b par soustractions et dcalages
        addl   %ecx,    %eax            # eax:ebx <- a + 2^i*b
        adcl   %edx,    %ebx
        jmp    L(inc_qs)
        ALIGN_4
L(add_1):
        addl   %ecx,    %eax            # eax:ebx <- a + 2^i*b
        adcl   %edx,    %ebx

        # continue la division
L(loop_1):
        cmpl   _p_,     %edi            # i = 0 ?
        jz     L(next_1)
        shrl   $1,      %edx            # edx:ecx <- 2^i*b
        rcrl   $1,      %ecx
        shrl   $1,      %edi            # edi <- 2^i*p
        shrl   $1,      %ebp            # ebp <- 2^i*r
        subl   %ecx,    %eax            # eax:ebx <- a - 2^i*b
        sbbl   %edx,    %ebx
        jb     L(add_1)
L(inc_qs):
        addl   %edi,    _q_             # q += 2^i*p. si >= BASE, termin
        jc     1f
        addl   %ebp,    _s_             # s += 2^i*r. si >= BASE, termin
        jnc    L(loop_1)
        subl   %ebp,    _s_             # dfait les soustractions
1:
        subl   %edi,    _q_
L(done_1):
        RETURN_WITH_SP
        ALIGN_4

        # fin de la division de a par b
L(next_1):
        testl  %eax,    %eax            # si a = 0, termin
        jne    1f
        testl  %ebx,    %ebx
        jz     L(done_1)
1:
        movl   _s_,     %edi
        movl   _q_,     %ebp
        subl   %eax,    %ecx            # ecx:edx <- b - a
        sbbl   %ebx,    %edx
        jb     L(done_1)                # si < 0, termin
        
        # ici ecx:edx contient b - a
        #     eax:ebx contient a
        #     edi     contient s
        #     ebp     contient q
        # et b >= a
        # dcale a,s,q tant que b >= 2a
        jmp    L(shift_2)
        ALIGN_4
1:
        shll   $1,      %eax            # eax:ebx <- 2^i*a
        rcll   $1,      %ebx
        shll   $1,      %edi            # edi <- 2^i*s. si >= BASE, termin
        jc     L(done_2)
        shll   $1,      %ebp            # ebp <- 2^i*q. si >= BASE, termin
        jc     L(done_2)
L(shift_2):
        subl   %eax,    %ecx            # ecx:edx <- b - 2^i*a
        sbbl   %ebx,    %edx
        jnb    1b

        # ici ecx:edx contient b - 2^(i+1)*a
        #     eax:ebx contient 2^i*a
        #     edi     contient 2^i*s
        #     ebp     contient 2^i*q
        # et 2^i*a <= b < 2^(i+1)*a
        addl   %eax,    %ecx            # ecx:edx <- b + 2^i*a
        adcl   %ebx,    %edx
        jmp    L(inc_pr)
        ALIGN_4
L(add_2):
        addl   %eax,    %ecx            # ecx:edx <- b + 2^i*a
        adcl   %ebx,    %edx

        # continue la division
L(loop_2):
        cmpl   _s_,     %edi            # i = 0 ?
        jz     L(next_2)
        shrl   $1,      %ebx            # ebx:eax <- 2^i*a
        rcrl   $1,      %eax
        shrl   $1,      %edi            # edi <- 2^i*p
        shrl   $1,      %ebp            # ebp <- 2^i*r
        subl   %eax,    %ecx            # ecx:edx <- b - 2^i*a
        sbbl   %ebx,    %edx
        jb     L(add_2)
L(inc_pr):
        addl   %edi,    _r_             # r += 2^i*s. si >= BASE, termin
        jc     1f
        addl   %ebp,    _p_             # p += 2^i*q. si >= BASE, termin
        jnc    L(loop_2)
        subl   %ebp,    _p_             # dfait les soustractions
1:
        subl   %edi,    _r_
L(done_2):
        RETURN_WITH_SP
        ALIGN_4

        # fin de la division de b par a
L(next_2):
        testl  %ecx,    %ecx            # si b = 0, termin
        jne    1f
        testl  %edx,    %edx
        jz     L(done_2)
1:
        movl   _p_,     %edi
        movl   _r_,     %ebp
        subl   %ecx,    %eax            # eax:ebx <- a - b
        sbbl   %edx,    %ebx
        jb     L(done_2)                # si < 0, termin
        jmp    L(shift_1)

#endif /* assembly_gcd_2 */


                      # +-----------------------------+
                      # |  Demi-pgcd  deux chiffres  |
                      # +-----------------------------+

# void xn(hgcd_2)(chiffre *x)
#
# entre :
#   x = tableau de 8 chiffres [a0,a1,b0,b1,p,s,q,r]
#   a = a0 + BASE*a1, b = b0 + BASE*b1
#
# contrainte : 0 < b < a
#
# Dveloppe en fraction continue les fractions a/(b+1) et (a+1)/b
# tant que les quotients concident et que les coefficients tiennent
# sur un chiffre
#
# sortie :
#   [a0,a1,b0,b1] <- ind.
#   [p,s,q,r]     <- coefficients des combinaisons effectues
        
#ifdef assembly_sn_hgcd_2
#undef L
#define L(x) .Lsn_hgcd_2_##x
ENTER(sn_hgcd_2)

#undef  _p_
#undef  _q_
#undef  _r_
#undef  _s_
#define _p_ 16(%esi)
#define _q_ 24(%esi)
#define _r_ 28(%esi)
#define _s_ 20(%esi)

        movl   arg1,   %esi             # esi <- &x
        movl   (%esi), %eax             # eax <- a0
        movl  4(%esi), %ebx             # ebx <- a1
        movl  8(%esi), %ecx             # ecx <- b0
        movl 12(%esi), %edx             # edx <- b1

        # [p,q,r,s] <- Id
        movl   $1,     %edi             # edi <- 1 (p)
        movl   $0,     %ebp             # ebp <- 0 (r)
        movl   %edi,   _p_
        movl   %edi,   _s_
        movl   %ebp,   _q_
        movl   %ebp,   _r_

        # b <- b+p
        addl   %edi,    %ecx
        adcl   $0,      %edx
#if 0
        /* inutile */
        jnc    1f
        movl   %edi,    _q_             # si b+p >= BASE, q <- 1 et termin
        RETURN_WITH_SP
        ALIGN_4
1:
#endif
        subl   %ecx,    %eax            # eax:ebx <- (a-q) - (b+p)
        sbbl   %edx,    %ebx

        # ici eax:ebx contient (a-q) - (b+p)
        #     ecx:edx contient b+p
        #     edi     contient p
        #     ebp     contient r
        # et a-q >= b+p
        # dcale b,p,r tant que a-q >= 2(b+p)
        jmp    L(shift_1)
        ALIGN_4
1:
        shll   $1,      %ecx            # ecx:edx <- 2^i*(b+p)
        rcll   $1,      %edx
        shll   $1,      %edi            # edi <- 2^i*p. si >= BASE, termin
        jc     L(done_1)
        shll   $1,      %ebp            # ebp <- 2^i*r. si >= BASE, termin
        jc     L(done_1)
L(shift_1):
        subl   %ecx,    %eax            # eax:ebx <- (a-q) - 2^i*(b+p)
        sbbl   %edx,    %ebx
        jnb    1b

        # ici eax:ebx contient (a-q) - 2^(i+1)*(b+p)
        #     ecx:edx contient 2^i*(b+p)
        #     edi     contient 2^i*p
        #     ebp     contient 2^i*r
        # et 2^i*(b+p) <= a-q < 2^(i+1)*(b+p)
        # calcule (a-q)/(b+p) par soustractions et dcalages
        addl   %ecx,    %eax            # eax:ebx <- (a-q) + 2^i*(b+p)
        adcl   %edx,    %ebx
        jmp    L(inc_qs)
        ALIGN_4
L(add_1):
        addl   %ecx,    %eax            # eax:ebx <- (a-q) + 2^i*(b+p)
        adcl   %edx,    %ebx

        # continue la division
L(loop_1):
        cmpl   _p_,     %edi            # i = 0 ?
        jz     L(next_1)
        shrl   $1,      %edx            # edx:ecx <- 2^i*(b+p)
        rcrl   $1,      %ecx
        shrl   $1,      %edi            # edi <- 2^i*p
        shrl   $1,      %ebp            # ebp <- 2^i*r
        subl   %ecx,    %eax            # eax:ebx <- (a-q) - 2^i*(b+p)
        sbbl   %edx,    %ebx
        jb     L(add_1)
L(inc_qs):
        addl   %edi,    _q_             # q += 2^i*p. si >= BASE, termin
        jc     1f
        addl   %ebp,    _s_             # s += 2^i*r. si >= BASE, termin
        jnc    L(loop_1)
        subl   %ebp,    _s_             # dfait les soustractions
1:
        subl   %edi,    _q_
L(done_1):
        RETURN_WITH_SP
        ALIGN_4

        # fin de la division de a-q par b+p
L(next_1):
        subl   %edi,    %ecx            # ecx:edx <- b-r
        sbbl   $0,      %edx
        subl   %ebp,    %ecx
        sbbl   $0,      %edx
        movl   _s_,     %edi
        movl   _q_,     %ebp
        addl   %edi,    %eax            # eax:ebx <- a+s
        adcl   $0,      %ebx
        addl   %ebp,    %eax
        adcl   $0,      %ebx
        subl   %eax,    %ecx            # ecx:edx <- (b-r) - (a+s)
        sbbl   %ebx,    %edx
        jb     L(done_1)                # si < 0, termin
        
        # ici ecx:edx contient (b-r) - (a+s)
        #     eax:ebx contient a+s
        #     edi     contient s
        #     ebp     contient q
        # et b-r >= a+s
        # dcale a,s,q tant que b-r >= 2(a+s)
        jmp    L(shift_2)
        ALIGN_4
1:
        shll   $1,      %eax            # eax:ebx <- 2^i*(a+s)
        rcll   $1,      %ebx
        shll   $1,      %edi            # edi <- 2^i*s. si >= BASE, termin
        jc     L(done_2)
        shll   $1,      %ebp            # ebp <- 2^i*q. si >= BASE, termin
        jc     L(done_2)
L(shift_2):
        subl   %eax,    %ecx            # ecx:edx <- (b-r) - 2^i*(a+s)
        sbbl   %ebx,    %edx
        jnb    1b

        # ici ecx:edx contient (b-r) - 2^(i+1)*(a+s)
        #     eax:ebx contient 2^i*(a+s)
        #     edi     contient 2^i*s
        #     ebp     contient 2^i*q
        # et 2^i*(a+s) <= b-r < 2^(i+1)*(a+s)
        addl   %eax,    %ecx            # ecx:edx <- (b-r) + 2^i*(a+s)
        adcl   %ebx,    %edx
        jmp    L(inc_pr)
        ALIGN_4
L(add_2):
        addl   %eax,    %ecx            # ecx:edx <- (b-r) + 2^i*(a+s)
        adcl   %ebx,    %edx

        # continue la division
L(loop_2):
        cmpl   _s_,     %edi            # i = 0 ?
        jz     L(next_2)
        shrl   $1,      %ebx            # ebx:eax <- 2^i*(a+s)
        rcrl   $1,      %eax
        shrl   $1,      %edi            # edi <- 2^i*p
        shrl   $1,      %ebp            # ebp <- 2^i*r
        subl   %eax,    %ecx            # ecx:edx <- (b-r) - 2^i*(a+s)
        sbbl   %ebx,    %edx
        jb     L(add_2)
L(inc_pr):
        addl   %edi,    _r_             # r += 2^i*s. si >= BASE, termin
        jc     1f
        addl   %ebp,    _p_             # p += 2^i*q. si >= BASE, termin
        jnc    L(loop_2)
        subl   %ebp,    _p_             # dfait les soustractions
1:
        subl   %edi,    _r_
L(done_2):
        RETURN_WITH_SP
        ALIGN_4

        # fin de la division de b-r par a+s
L(next_2):
        subl   %edi,    %eax            # eax:ebx <- a-q
        sbbl   $0,      %ebx
        subl   %ebp,    %eax
        sbbl   $0,      %ebx
        movl   _p_,     %edi
        movl   _r_,     %ebp
        addl   %edi,    %ecx            # ecx:edx <- b+p
        adcl   $0,      %edx
        addl   %ebp,    %ecx
        adcl   $0,      %edx
        subl   %ecx,    %eax            # eax:ebx <- (a-q) - (b+p)
        sbbl   %edx,    %ebx
        jb     L(done_2)                # si < 0, termin
        jmp    L(shift_1)

#endif /* assembly_hgcd_2 */

