/*=============================================================================

    This file is part of FLINT.

    FLINT is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    FLINT is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with FLINT; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA

=============================================================================*/
/******************************************************************************

    Copyright (C) 2008, 2009 William Hart
    Copyright (C) 2010 Sebastian Pancratz
    Copyright (C) 2011 Fredrik Johansson

******************************************************************************/

*******************************************************************************

    Memory management

*******************************************************************************

void fmpz_poly_init(fmpz_poly_t poly)

    Initialises \code{poly} for use, setting its length to zero.  
    A corresponding call to\\ \code{fmpz_poly_clear()} must be made after 
    finishing with the \code{fmpz_poly_t} to free the memory used by 
    the polynomial.

void fmpz_poly_init2(fmpz_poly_t poly, slong alloc)

    Initialises \code{poly} with space for at least \code{alloc} coefficients 
    and sets the length to zero.  The allocated coefficients are all set to 
    zero.

void fmpz_poly_realloc(fmpz_poly_t poly, slong alloc)

    Reallocates the given polynomial to have space for \code{alloc} 
    coefficients.  If \code{alloc} is zero the polynomial is cleared 
    and then reinitialised.  If the current length is greater than 
    \code{alloc} the polynomial is first truncated to length \code{alloc}.

void fmpz_poly_fit_length(fmpz_poly_t poly, slong len)

    If \code{len} is greater than the number of coefficients currently 
    allocated, then the polynomial is reallocated to have space for at 
    least \code{len} coefficients.  No data is lost when calling this 
    function.

    The function efficiently deals with the case where \code{fit_length} is 
    called many times in small increments by at least doubling the number 
    of allocated coefficients when length is larger than the number of 
    coefficients currently allocated.

void fmpz_poly_clear(fmpz_poly_t poly)

    Clears the given polynomial, releasing any memory used.  It must 
    be reinitialised in order to be used again.

void _fmpz_poly_normalise(fmpz_poly_t poly)

    Sets the length of \code{poly} so that the top coefficient is non-zero. 
    If all coefficients are zero, the length is set to zero.  This function 
    is mainly used internally, as all functions guarantee normalisation.

void _fmpz_poly_set_length(fmpz_poly_t poly, slong newlen)

    Demotes the coefficients of \code{poly} beyond \code{newlen} and sets 
    the length of \code{poly} to \code{newlen}.

*******************************************************************************

    Polynomial parameters

*******************************************************************************

slong fmpz_poly_length(const fmpz_poly_t poly)

    Returns the length of \code{poly}.  The zero polynomial has length zero.

slong fmpz_poly_degree(const fmpz_poly_t poly)

    Returns the degree of \code{poly}, which is one less than its length.

*******************************************************************************

    Assignment and basic manipulation

*******************************************************************************

void fmpz_poly_set(fmpz_poly_t poly1, const fmpz_poly_t poly2)

    Sets \code{poly1} to equal \code{poly2}.

void fmpz_poly_set_si(fmpz_poly_t poly, slong c)

    Sets \code{poly} to the signed integer \code{c}.

void fmpz_poly_set_ui(fmpz_poly_t poly, ulong c)

    Sets \code{poly} to the unsigned integer \code{c}.

void fmpz_poly_set_fmpz(fmpz_poly_t poly, const fmpz_t c)

    Sets \code{poly} to the integer \code{c}.

void fmpz_poly_set_mpz(fmpz_poly_t poly, const mpz_t c)

    Sets \code{poly} to the integer \code{c}.

int _fmpz_poly_set_str(fmpz * poly, const char * str)

    Sets \code{poly} to the polynomial encoded in the null-terminated 
    string \code{str}.  Assumes that \code{poly} is allocated as a 
    sufficiently large array suitable for the number of coefficients 
    present in \code{str}.
    
    Returns $0$ if no error occurred.  Otherwise, returns a non-zero 
    value, in which case the resulting value of \code{poly} is undefined.  
    If \code{str} is not null-terminated, calling this method might result 
    in a segmentation fault.

int fmpz_poly_set_str(fmpz_poly_t poly, const char * str)

    Imports a polynomial from a null-terminated string.  If the string 
    \code{str} represents a valid polynomial returns $1$, otherwise 
    returns $0$.
    
    Returns $0$ if no error occurred.  Otherwise, returns a non-zero value, 
    in which case the resulting value of \code{poly} is undefined.  If 
    \code{str} is not null-terminated, calling this method might result in 
    a segmentation fault.

char * _fmpz_poly_get_str(const fmpz * poly, slong len)

    Returns the plain FLINT string representation of the polynomial 
    \code{(poly, len)}.

char * fmpz_poly_get_str(const fmpz_poly_t poly)

    Returns the plain FLINT string representation of the polynomial 
    \code{poly}.

char * _fmpz_poly_get_str_pretty(const fmpz * poly, slong len, const char * x)

    Returns a pretty representation of the polynomial 
    \code{(poly, len)} using the null-terminated string~\code{x} as the 
    variable name.

char * fmpz_poly_get_str_pretty(const fmpz_poly_t poly, const char * x)

    Returns a pretty representation of the polynomial~\code{poly} using the 
    null-terminated string \code{x} as the variable name.

void fmpz_poly_zero(fmpz_poly_t poly)

    Sets \code{poly} to the zero polynomial.

void fmpz_poly_one(fmpz_poly_t poly)

    Sets \code{poly} to the constant polynomial one.

void fmpz_poly_zero_coeffs(fmpz_poly_t poly, slong i, slong j)

    Sets the coefficients of $x^i, \dotsc, x^{j-1}$ to zero.

void fmpz_poly_swap(fmpz_poly_t poly1, fmpz_poly_t poly2)

    Swaps \code{poly1} and \code{poly2}.  This is done efficiently without 
    copying data by swapping pointers, etc.

void _fmpz_poly_reverse(fmpz * res, const fmpz * poly, slong len, slong n)

    Sets \code{(res, n)} to the reverse of \code{(poly, n)}, where 
    \code{poly} is in fact an array of length \code{len}.  Assumes that 
    \code{0 < len <= n}.  Supports aliasing of \code{res} and \code{poly}, 
    but the behaviour is undefined in case of partial overlap.

void fmpz_poly_reverse(fmpz_poly_t res, const fmpz_poly_t poly, slong n)

    This function considers the polynomial \code{poly} to be of length $n$, 
    notionally truncating and zero padding if required, and reverses 
    the result.  Since the function normalises its result \code{res} may be 
    of length less than $n$.

void fmpz_poly_truncate(fmpz_poly_t poly, slong newlen)

    If the current length of \code{poly} is greater than \code{newlen}, it 
    is truncated to have the given length.  Discarded coefficients are not 
    necessarily set to zero.

*******************************************************************************

    Randomisation

*******************************************************************************

void fmpz_poly_randtest(fmpz_poly_t f, flint_rand_t state, 
                                                slong len, mp_bitcnt_t bits)

    Sets $f$ to a random polynomial with up to the given length and where 
    each coefficient has up to the given number of bits. The coefficients 
    are signed randomly. One must call \code{flint_randinit()} before 
    calling this function.

void fmpz_poly_randtest_unsigned(fmpz_poly_t f, flint_rand_t state, 
                                                slong len, mp_bitcnt_t bits)

    Sets $f$ to a random polynomial with up to the given length and where
    each coefficient has up to the given number of bits. One must call 
    \code{flint_randinit()} before calling this function.

void fmpz_poly_randtest_not_zero(fmpz_poly_t f, flint_rand_t state, 
                                                slong len, mp_bitcnt_t bits)

    As for \code{fmpz_poly_randtest()} except that \code{len} and bits may 
    not be zero and the polynomial generated is guaranteed not to be the 
    zero polynomial.  One must call \code{flint_randinit()} before 
    calling this function.

*******************************************************************************

    Getting and setting coefficients

*******************************************************************************

void fmpz_poly_get_coeff_fmpz(fmpz_t x, const fmpz_poly_t poly, slong n)

    Sets $x$ to the $n$th coefficient of \code{poly}.  Coefficient 
    numbering is from zero and if $n$ is set to a value beyond the end of 
    the polynomial, zero is returned.

slong fmpz_poly_get_coeff_si(const fmpz_poly_t poly, slong n)

    Returns coefficient $n$ of \code{poly} as a \code{slong}. The result is 
    undefined if the value does not fit into a \code{slong}. Coefficient 
    numbering is from zero and if $n$ is set to a value beyond the end of 
    the polynomial, zero is returned.

ulong fmpz_poly_get_coeff_ui(const fmpz_poly_t poly, slong n)

    Returns coefficient $n$ of \code{poly} as a \code{ulong}.  The result is 
    undefined if the value does not fit into a \code{ulong}.  Coefficient 
    numbering is from zero and if $n$ is set to a value beyond the end of the 
    polynomial, zero is returned.

fmpz * fmpz_poly_get_coeff_ptr(const fmpz_poly_t poly, slong n)

    Returns a reference to the coefficient of $x^n$ in the polynomial, 
    as an \code{fmpz *}.  This function is provided so that individual 
    coefficients can be accessed and operated on by functions in the 
    \code{fmpz} module.  This function does not make a copy of the 
    data, but returns a reference to the actual coefficient.

    Returns \code{NULL} when $n$ exceeds the degree of the polynomial.

    This function is implemented as a macro.

fmpz * fmpz_poly_lead(const fmpz_poly_t poly)

    Returns a reference to the leading coefficient of the polynomial, 
    as an \code{fmpz *}.  This function is provided so that the leading 
    coefficient can be easily accessed and operated on by functions in 
    the \code{fmpz} module.  This function does not make a copy of the 
    data, but returns a reference to the actual coefficient.

    Returns \code{NULL} when the polynomial is zero.

    This function is implemented as a macro.

void fmpz_poly_set_coeff_fmpz(fmpz_poly_t poly, slong n, const fmpz_t x)

    Sets coefficient $n$ of \code{poly} to the \code{fmpz} value \code{x}.  
    Coefficient numbering starts from zero and if $n$ is beyond the current 
    length of \code{poly} then the polynomial is extended and zero 
    coefficients inserted if necessary.

void fmpz_poly_set_coeff_si(fmpz_poly_t poly, slong n, slong x)

    Sets coefficient $n$ of \code{poly} to the \code{slong} value \code{x}. 
    Coefficient numbering starts from zero and if $n$ is beyond the current 
    length of \code{poly} then the polynomial is extended and zero 
    coefficients inserted if necessary.

void fmpz_poly_set_coeff_ui(fmpz_poly_t poly, slong n, ulong x)

    Sets coefficient $n$ of \code{poly} to the \code{ulong} value 
    \code{x}.  Coefficient numbering starts from zero and if $n$ is beyond 
    the current length of \code{poly} then the polynomial is extended and 
    zero coefficients inserted if necessary.

*******************************************************************************

    Comparison

*******************************************************************************

int fmpz_poly_equal(const fmpz_poly_t poly1, const fmpz_poly_t poly2)

    Returns $1$ if \code{poly1} is equal to \code{poly2}, otherwise 
    returns $0$.  The polynomials are assumed to be normalised.

int fmpz_poly_is_zero(const fmpz_poly_t poly)

    Returns $1$ if the polynomial is zero and $0$ otherwise.

    This function is implemented as a macro.

int fmpz_poly_is_one(const fmpz_poly_t poly)

    Returns $1$ if the polynomial is one and $0$ otherwise.

int fmpz_poly_is_unit(const fmpz_poly_t poly)

    Returns $1$ is the polynomial is the constant polynomial $\pm 1$, 
    and $0$ otherwise.

*******************************************************************************

    Addition and subtraction

*******************************************************************************

void _fmpz_poly_add(fmpz * res, const fmpz * poly1, slong len1, 
                                               const fmpz * poly2, slong len2)

    Sets \code{res} to the sum of \code{(poly1, len1)} and 
    \code{(poly2, len2)}.  It is assumed that \code{res} has 
    sufficient space for the longer of the two polynomials.

void fmpz_poly_add(fmpz_poly_t res, const fmpz_poly_t poly1, 
                                                       const fmpz_poly_t poly2)

    Sets \code{res} to the sum of \code{poly1} and \code{poly2}.

void _fmpz_poly_sub(fmpz * res, const fmpz * poly1, slong len1, 
                                                const fmpz * poly2, slong len2)

    Sets \code{res} to \code{(poly1, len1)} minus \code{(poly2, len2)}.  It 
    is assumed that \code{res} has sufficient space for the longer of the 
    two polynomials.

void fmpz_poly_sub(fmpz_poly_t res, const fmpz_poly_t poly1, 
                                                       const fmpz_poly_t poly2)

    Sets \code{res} to \code{poly1} minus \code{poly2}.

void fmpz_poly_neg(fmpz_poly_t res, const fmpz_poly_t poly)

    Sets \code{res} to \code{-poly}.

*******************************************************************************

    Scalar multiplication and division

*******************************************************************************

void fmpz_poly_scalar_mul_fmpz(fmpz_poly_t poly1, 
                                       const fmpz_poly_t poly2, const fmpz_t x)

    Sets \code{poly1} to \code{poly2} times $x$.

void fmpz_poly_scalar_mul_mpz(fmpz_poly_t poly1, 
                                       const fmpz_poly_t poly2, const mpz_t x)

    Sets \code{poly1} to \code{poly2} times the \code{mpz_t} $x$.

void fmpz_poly_scalar_mul_si(fmpz_poly_t poly1, fmpz_poly_t poly2, slong x)

    Sets \code{poly1} to \code{poly2} times the signed \code{slong x}.

void fmpz_poly_scalar_mul_ui(fmpz_poly_t poly1, fmpz_poly_t poly2, ulong x)

    Sets \code{poly1} to \code{poly2} times the \code{ulong x}.

void fmpz_poly_scalar_mul_2exp(fmpz_poly_t poly1, fmpz_poly_t poly2, ulong exp)

    Sets \code{poly1} to \code{poly2} times \code{2^exp}.

void fmpz_poly_scalar_addmul_fmpz(fmpz_poly_t poly1, const fmpz_poly_t poly2, 
                                                                const fmpz_t x)

    Sets \code{poly} to \code{poly1 + x * poly2}.

void fmpz_poly_scalar_submul_fmpz(fmpz_poly_t poly1, const fmpz_poly_t poly2, 
                                                                const fmpz_t x)

    Sets \code{poly} to \code{poly1 - x * poly2}.

void fmpz_poly_scalar_fdiv_fmpz(fmpz_poly_t poly1, 
                                       const fmpz_poly_t poly2, const fmpz_t x)

    Sets \code{poly1} to \code{poly2} divided by the \code{fmpz_t x}, 
    rounding coefficients down toward~$- \infty$.

void fmpz_poly_scalar_fdiv_mpz(fmpz_poly_t poly1, 
                                       const fmpz_poly_t poly2, const mpz_t x)

    Sets \code{poly1} to \code{poly2} divided by the \code{mpz_t x}, 
    rounding coefficients down toward~$- \infty$.

void fmpz_poly_scalar_fdiv_si(fmpz_poly_t poly1, fmpz_poly_t poly2, slong x)

    Sets \code{poly1} to \code{poly2} divided by the \code{slong x}, 
    rounding coefficients down toward~$- \infty$.

void fmpz_poly_scalar_fdiv_ui(fmpz_poly_t poly1, fmpz_poly_t poly2, ulong x)

    Sets \code{poly1} to \code{poly2} divided by the \code{ulong x}, 
    rounding coefficients down toward~$- \infty$.

void fmpz_poly_scalar_fdiv_2exp(fmpz_poly_t poly1, fmpz_poly_t poly2, ulong x)

    Sets \code{poly1} to \code{poly2} divided by \code{2^x}, 
    rounding coefficients down toward~$- \infty$.

void fmpz_poly_scalar_tdiv_fmpz(fmpz_poly_t poly1, 
                                       const fmpz_poly_t poly2, const fmpz_t x)

    Sets \code{poly1} to \code{poly2} divided by the \code{fmpz_t x}, 
    rounding coefficients toward~$0$.

void fmpz_poly_scalar_tdiv_si(fmpz_poly_t poly1, fmpz_poly_t poly2, slong x)

    Sets \code{poly1} to \code{poly2} divided by the \code{slong x}, 
    rounding coefficients toward~$0$.

void fmpz_poly_scalar_tdiv_ui(fmpz_poly_t poly1, fmpz_poly_t poly2, ulong x)

    Sets \code{poly1} to \code{poly2} divided by the \code{ulong x}, 
    rounding coefficients toward~$0$.

void fmpz_poly_scalar_tdiv_2exp(fmpz_poly_t poly1, fmpz_poly_t poly2, ulong x)

    Sets \code{poly1} to \code{poly2} divided by \code{2^x}, 
    rounding coefficients toward~$0$.

void fmpz_poly_scalar_divexact_fmpz(fmpz_poly_t poly1, 
                                       const fmpz_poly_t poly2, const fmpz_t x)

    Sets \code{poly1} to \code{poly2} divided by the \code{fmpz_t x}, 
    assuming the division is exact for every coefficient.

void fmpz_poly_scalar_divexact_mpz(fmpz_poly_t poly1, 
                                       const fmpz_poly_t poly2, const mpz_t x)

    Sets \code{poly1} to \code{poly2} divided by the \code{mpz_t x}, 
    assuming the coefficient is exact for every coefficient.

id fmpz_poly_scalar_divexact_si(fmpz_poly_t poly1, fmpz_poly_t poly2, slong x)

    Sets \code{poly1} to \code{poly2} divided by the \code{slong x}, 
    assuming the coefficient is exact for every coefficient.

void fmpz_poly_scalar_divexact_ui(fmpz_poly_t poly1, 
                                  fmpz_poly_t poly2, ulong x)

    Sets \code{poly1} to \code{poly2} divided by the \code{ulong x}, 
    assuming the coefficient is exact for every coefficient.

void fmpz_poly_scalar_mod_fmpz(fmpz_poly_t poly1, 
                               const fmpz_poly_t poly2, const fmpz_t p)

    Sets \code{poly1} to \code{poly2}, reducing each coefficient 
    modulo $p > 0$.

void fmpz_poly_scalar_smod_fmpz(fmpz_poly_t poly1, 
                                const fmpz_poly_t poly2, const fmpz_t p)

    Sets \code{poly1} to \code{poly2}, symmetrically reducing 
    each coefficient modulo $p > 0$, that is, choosing the unique 
    representative in the interval $(-p/2, p/2]$.

*******************************************************************************

    Bit packing

*******************************************************************************

void _fmpz_poly_bit_pack(mp_ptr arr, const fmpz * poly,
                                  slong len, mp_bitcnt_t bit_size, int negate)

    Packs the coefficients of \code{poly} into bitfields of the given 
    \code{bit_size}, negating the coefficients before packing 
    if \code{negate} is set to $-1$.

int _fmpz_poly_bit_unpack(fmpz * poly, slong len, 
                               mp_srcptr arr, mp_bitcnt_t bit_size, int negate)

    Unpacks the polynomial of given length from the array as packed into 
    fields of the given \code{bit_size}, finally negating the coefficients 
    if \code{negate} is set to $-1$. Returns borrow, which is nonzero if a
    leading term with coefficient $\pm1$ should be added at
    position \code{len} of \code{poly}.

void _fmpz_poly_bit_unpack_unsigned(fmpz * poly, slong len, 
                                         mp_srcptr_t arr, mp_bitcnt_t bit_size)

    Unpacks the polynomial of given length from the array as packed into 
    fields of the given \code{bit_size}.  The coefficients are assumed to 
    be unsigned.

void fmpz_poly_bit_pack(fmpz_t f, const fmpz_poly_t poly, mp_bitcnt_t bit_size)

    Packs \code{poly} into bitfields of size \code{bit_size}, writing the
    result to \code{f}. The sign of \code{f} will be the same as that of
    the leading coefficient of \code{poly}.

void fmpz_poly_bit_unpack(fmpz_poly_t poly, const fmpz_t f,
        mp_bitcnt_t bit_size)

    Unpacks the polynomial with signed coefficients packed into
    fields of size \code{bit_size} as represented by the integer \code{f}.

void fmpz_poly_bit_unpack_unsigned(fmpz_poly_t poly, const fmpz_t f,
        mp_bitcnt_t bit_size)

    Unpacks the polynomial with unsigned coefficients packed into
    fields of size \code{bit_size} as represented by the integer \code{f}.
    It is required that \code{f} is nonnegative.

*******************************************************************************

    Multiplication

*******************************************************************************

void _fmpz_poly_mul_classical(fmpz * res, const fmpz * poly1, slong len1, 
                                               const fmpz * poly2, slong len2)

    Sets \code{(res, len1 + len2 - 1)} to the product of \code{(poly1, len1)} 
    and \code{(poly2, len2)}.

    Assumes \code{len1} and \code{len2} are positive.  Allows zero-padding 
    of the two input polynomials.  No aliasing of inputs with outputs is 
    allowed.

void fmpz_poly_mul_classical(fmpz_poly_t res, 
                              const fmpz_poly_t poly1, const fmpz_poly_t poly2)

    Sets \code{res} to the product of \code{poly1} and \code{poly2}, computed 
    using the classical or schoolbook method.

void _fmpz_poly_mullow_classical(fmpz * res, const fmpz * poly1, slong len1, 
                                       const fmpz * poly2, slong len2, slong n)

    Sets \code{(res, n)} to the first $n$ coefficients of \code{(poly1, len1)} 
    multiplied by \code{(poly2, len2)}.

    Assumes \code{0 < n <= len1 + len2 - 1}.  Assumes neither \code{len1} nor 
    \code{len2} is zero.

void fmpz_poly_mullow_classical(fmpz_poly_t res, 
                    const fmpz_poly_t poly1, const fmpz_poly_t poly2, slong n)

    Sets \code{res} to the first $n$ coefficients of \code{poly1 * poly2}.

void _fmpz_poly_mulhigh_classical(fmpz * res, const fmpz * poly1, slong len1, 
                                   const fmpz * poly2, slong len2, slong start)

    Sets the first \code{start} coefficients of \code{res} to zero and the 
    remainder to the corresponding coefficients of 
    \code{(poly1, len1) * (poly2, len2)}.

    Assumes \code{start <= len1 + len2 - 1}.  Assumes neither \code{len1} nor 
    \code{len2} is zero.

void fmpz_poly_mulhigh_classical(fmpz_poly_t res, 
                const fmpz_poly_t poly1, const fmpz_poly_t poly2, slong start)

    Sets the first \code{start} coefficients of \code{res} to zero and the 
    remainder to the corresponding coefficients of the product of \code{poly1}
    and \code{poly2}.

void _fmpz_poly_mulmid_classical(fmpz * res, const fmpz * poly1, slong len1, 
                                               const fmpz * poly2, slong len2)

    Sets \code{res} to the middle \code{len1 - len2 + 1} coefficients of 
    the product of \code{(poly1, len1)} and \code{(poly2, len2)}, i.e.\ the 
    coefficients from degree \code{len2 - 1} to \code{len1 - 1} inclusive.  
    Assumes that \code{len1 >= len2 > 0}.

void fmpz_poly_mulmid_classical(fmpz_poly_t res, 
                              const fmpz_poly_t poly1, const fmpz_poly_t poly2)

    Sets \code{res} to the middle \code{len(poly1) - len(poly2) + 1} 
    coefficients of \code{poly1 * poly2}, i.e.\ the coefficient from degree 
    \code{len2 - 1} to \code{len1 - 1} inclusive.  Assumes that 
    \code{len1 >= len2}.

void _fmpz_poly_mul_karatsuba(fmpz * res, const fmpz * poly1, slong len1, 
                                               const fmpz * poly2, slong len2)

    Sets \code{(res, len1 + len2 - 1)} to the product of \code{(poly1, len1)} 
    and \code{(poly2, len2)}.  Assumes \code{len1 >= len2 > 0}.  Allows 
    zero-padding of the two input polynomials.  No aliasing of inputs with 
    outputs is allowed.

void fmpz_poly_mul_karatsuba(fmpz_poly_t res, 
                              const fmpz_poly_t poly1, const fmpz_poly_t poly2)

    Sets \code{res} to the product of \code{poly1} and \code{poly2}.

void _fmpz_poly_mullow_karatsuba_n(fmpz * res, const fmpz * poly1, 
                                                  const fmpz * poly2, slong n)

    Sets \code{res} to the product of \code{poly1} and \code{poly2} and 
    truncates to the given length.  It is assumed that \code{poly1} and 
    \code{poly2} are precisely the given length, possibly zero padded.  
    Assumes $n$ is not zero.

void fmpz_poly_mullow_karatsuba_n(fmpz_poly_t res, 
                    const fmpz_poly_t poly1, const fmpz_poly_t poly2, slong n)

    Sets \code{res} to the product of \code{poly1} and \code{poly2} and 
    truncates to the given length.

void _fmpz_poly_mulhigh_karatsuba_n(fmpz * res, const fmpz * poly1, 
                                                const fmpz * poly2, slong len)

    Sets \code{res} to the product of \code{poly1} and \code{poly2} and 
    truncates at the top to the given length.  The first \code{len - 1} 
    coefficients are set to zero. It is assumed that \code{poly1} and 
    \code{poly2} are precisely the given length, possibly zero padded.  
    Assumes \code{len} is not zero.

void fmpz_poly_mulhigh_karatsuba_n(fmpz_poly_t res, 
                  const fmpz_poly_t poly1, const fmpz_poly_t poly2, slong len)

    Sets the first \code{len - 1} coefficients of the result to zero and the 
    remaining coefficients to the corresponding coefficients of the product of 
    \code{poly1} and \code{poly2}.  Assumes \code{poly1} and \code{poly2} are 
    at most of the given length.

void _fmpz_poly_mul_KS(fmpz * res, const fmpz * poly1, slong len1, 
                                               const fmpz * poly2, slong len2)

    Sets \code{(res, len1 + len2 - 1)} to the product of \code{(poly1, len1)} 
    and \code{(poly2, len2)}.

    Places no assumptions on \code{len1} and \code{len2}.  Allows zero-padding 
    of the two input polynomials.  Supports aliasing of inputs and outputs.

void fmpz_poly_mul_KS(fmpz_poly_t res, 
                              const fmpz_poly_t poly1, const fmpz_poly_t poly2)

    Sets \code{res} to the product of \code{poly1} and \code{poly2}.

void _fmpz_poly_mullow_KS(fmpz * res, const fmpz * poly1, slong len1, 
                                     const fmpz * poly2, slong len2, slong n)

    Sets \code{(res, n)} to the lowest $n$ coefficients of the product of 
    \code{(poly1, len1)} and \code{(poly2, len2)}.

    Assumes that \code{len1} and \code{len2} are positive, but does allow 
    for the polynomials to be zero-padded.  The polynomials may be zero, 
    too.  Assumes $n$ is positive.  Supports aliasing between \code{res}, 
    \code{poly1} and \code{poly2}.

void fmpz_poly_mullow_KS(fmpz_poly_t res, 
                  const fmpz_poly_t poly1, const fmpz_poly_t poly2, slong n)

    Sets \code{res} to the lowest $n$ coefficients of the product of 
    \code{poly1} and \code{poly2}.

void _fmpz_poly_mul_SS(fmpz * output, const fmpz * input1, slong length1, 
                                            const fmpz * input2, slong length2)

    Sets \code{(output, length1 + length2 - 1)} to the product of 
    \code{(input1, length1)} and \code{(input2, length2)}.

    We must have \code{len1 > 1} and \code{len2 > 1}.  Allows zero-padding 
    of the two input polynomials.  Supports aliasing of inputs and outputs.

void fmpz_poly_mul_SS(fmpz_poly_t res,
                           const fmpz_poly_t poly1, const fmpz_poly_t poly2)

    Sets \code{res} to the product of \code{poly1} and \code{poly2}. Uses the
    Sch\"{o}nhage-Strassen algorithm.

void _fmpz_poly_mullow_SS(fmpz * output, const fmpz * input1, slong length1, 
                                  const fmpz * input2, slong length2, slong n)

    Sets \code{(res, n)} to the lowest $n$ coefficients of the product of 
    \code{(poly1, len1)} and \code{(poly2, len2)}.

    Assumes that \code{len1} and \code{len2} are positive, but does allow 
    for the polynomials to be zero-padded.  We must have \code{len1 > 1} 
    and \code{len2 > 1}. Assumes $n$ is positive. Supports aliasing between 
    \code{res}, \code{poly1} and \code{poly2}.

void fmpz_poly_mullow_SS(fmpz_poly_t res,
                     const fmpz_poly_t poly1, const fmpz_poly_t poly2, slong n)

    Sets \code{res} to the lowest $n$ coefficients of the product of 
    \code{poly1} and \code{poly2}.

void _fmpz_poly_mul(fmpz * res, const fmpz * poly1, slong len1, 
                                               const fmpz * poly2, slong len2)

    Sets \code{(res, len1 + len2 - 1)} to the product of \code{(poly1, len1)} 
    and \code{(poly2, len2)}.  Assumes \code{len1 >= len2 > 0}.  Allows 
    zero-padding of the two input polynomials. Does not support aliasing 
    between the inputs and the output.


void fmpz_poly_mul(fmpz_poly_t res, 
                              const fmpz_poly_t poly1, const fmpz_poly_t poly2)

    Sets \code{res} to the product of \code{poly1} and \code{poly2}.  Chooses 
    an optimal algorithm from the choices above.

void _fmpz_poly_mullow(fmpz * res, const fmpz * poly1, slong len1, 
                                     const fmpz * poly2, slong len2, slong n)

    Sets \code{(res, n)} to the lowest $n$ coefficients of the product of 
    \code{(poly1, len1)} and \code{(poly2, len2)}.

    Assumes \code{len1 >= len2 > 0} and \code{0 < n <= len1 + len2 - 1}.  
    Allows for zero-padding in the inputs.  Does not support aliasing between 
    the inputs and the output.

void fmpz_poly_mullow(fmpz_poly_t res, 
                  const fmpz_poly_t poly1, const fmpz_poly_t poly2, slong n)

    Sets \code{res} to the lowest $n$ coefficients of the product of 
    \code{poly1} and \code{poly2}.

void fmpz_poly_mulhigh_n(fmpz_poly_t res, 
                    const fmpz_poly_t poly1, const fmpz_poly_t poly2, slong n)

    Sets the high $n$ coefficients of \code{res} to the high $n$ coefficients 
    of the product of \code{poly1} and \code{poly2}, assuming the latter are 
    precisely $n$ coefficients in length, zero padded if necessary.  The 
    remaining $n - 1$ coefficients may be arbitrary.

*******************************************************************************

    Squaring

*******************************************************************************

void _fmpz_poly_sqr_KS(fmpz * rop, const fmpz * op, slong len)

    Sets \code{(rop, 2*len - 1)} to the square of \code{(op, len)}, 
    assuming that \code{len > 0}.

    Supports zero-padding in \code{(op, len)}.  Does not support aliasing.

void fmpz_poly_sqr_KS(fmpz_poly_t rop, const fmpz_poly_t op)

    Sets \code{rop} to the square of the polynomial \code{op} using 
    Kronecker segmentation.

void _fmpz_poly_sqr_karatsuba(fmpz * rop, const fmpz * op, slong len)

    Sets \code{(rop, 2*len - 1)} to the square of \code{(op, len)}, 
    assuming that \code{len > 0}.

    Supports zero-padding in \code{(op, len)}.  Does not support aliasing.

void fmpz_poly_sqr_karatsuba(fmpz_poly_t rop, const fmpz_poly_t op)

    Sets \code{rop} to the square of the polynomial \code{op} using 
    the Karatsuba multiplication algorithm.

void _fmpz_poly_sqr_classical(fmpz * rop, const fmpz * op, slong len)

    Sets \code{(rop, 2*len - 1)} to the square of \code{(op, len)}, 
    assuming that \code{len > 0}.

    Supports zero-padding in \code{(op, len)}.  Does not support aliasing.

void fmpz_poly_sqr_classical(fmpz_poly_t rop, const fmpz_poly_t op)

    Sets \code{rop} to the square of the polynomial \code{op} using 
    the classical or schoolbook method.

void _fmpz_poly_sqr(fmpz * rop, const fmpz * op, slong len)

    Sets \code{(rop, 2*len - 1)} to the square of \code{(op, len)}, 
    assuming that \code{len > 0}.

    Supports zero-padding in \code{(op, len)}.  Does not support aliasing.

void fmpz_poly_sqr(fmpz_poly_t rop, const fmpz_poly_t op)

    Sets \code{rop} to the square of the polynomial \code{op}.

void _fmpz_poly_sqrlow_KS(fmpz * res, const fmpz * poly, slong len, slong n)

    Sets \code{(res, n)} to the lowest $n$ coefficients 
    of the square of \code{(poly, len)}.

    Assumes that \code{len} is positive, but does allow for the polynomial 
    to be zero-padded.  The polynomial may be zero, too.  Assumes $n$ is 
    positive.  Supports aliasing between \code{res} and \code{poly}.

void fmpz_poly_sqrlow_KS(fmpz_poly_t res, const fmpz_poly_t poly, slong n)

    Sets \code{res} to the lowest $n$ coefficients 
    of the square of \code{poly}.

void _fmpz_poly_sqrlow_karatsuba_n(fmpz * res, const fmpz * poly, slong n)

    Sets \code{(res, n)} to the square of \code{(poly, n)} truncated 
    to length $n$, which is assumed to be positive.  Allows for \code{poly} 
    to be zero-oadded. 

void fmpz_poly_sqrlow_karatsuba_n(fmpz_poly_t res, 
                                  const fmpz_poly_t poly, slong n)

    Sets \code{res} to the square of \code{poly} and 
    truncates to the given length.

void _fmpz_poly_sqrlow_classical(fmpz * res, 
                                 const fmpz * poly, slong len, slong n)

    Sets \code{(res, n)} to the first $n$ coefficients of the square 
    of \code{(poly, len)}.

    Assumes that \code{0 < n <= 2 * len - 1}.  

void fmpz_poly_sqrlow_classical(fmpz_poly_t res, 
                                const fmpz_poly_t poly, slong n)

    Sets \code{res} to the first $n$ coefficients of 
    the square of \code{poly}.

void _fmpz_poly_sqrlow(fmpz * res, const fmpz * poly, slong len, slong n)

    Sets \code{(res, n)} to the lowest $n$ coefficients 
    of the square of \code{(poly, len)}.

    Assumes \code{len1 >= len2 > 0} and \code{0 < n <= 2 * len - 1}.  
    Allows for zero-padding in the input.  Does not support aliasing 
    between the input and the output.

void fmpz_poly_sqrlow(fmpz_poly_t res, const fmpz_poly_t poly, slong n)

    Sets \code{res} to the lowest $n$ coefficients 
    of the square of \code{poly}.

*******************************************************************************

    Powering

*******************************************************************************

void _fmpz_poly_pow_multinomial(fmpz * res, 
                                        const fmpz * poly, slong len, ulong e)

    Computes \code{res = poly^e}.  This uses the J.C.P.~Miller pure 
    recurrence as follows:

    If $\ell$ is the index of the lowest non-zero coefficient in \code{poly}, 
    as a first step this method zeros out the lowest $e \ell$ coefficients of 
    \code{res}.  The recurrence above is then used to compute the remaining 
    coefficients.

    Assumes \code{len > 0}, \code{e > 0}.  Does not support aliasing.

void fmpz_poly_pow_multinomial(fmpz_poly_t res, 
                                               const fmpz_poly_t poly, ulong e)

    Computes \code{res = poly^e} using a generalisation of binomial expansion 
    called the J.C.P.~Miller pure recurrence~\citep{Knu1997, Zei1995}.  
    If $e$ is zero, returns one, so that in particular \code{0^0 = 1}.
    
    The formal statement of the recurrence is as follows.  Write the input 
    polynomial as $P(x) = p_0 + p_1 x + \dotsb + p_m x^m$ with $p_0 \neq 0$ 
    and let 
    \begin{equation*}
    P(x)^n = a(n, 0) + a(n, 1) x + \dotsb + a(n, mn) x^{mn}.
    \end{equation*}
    Then $a(n, 0) = p_0^n$ and, for all $1 \leq k \leq mn$, 
    \begin{equation*}
    a(n, k) = 
        (k p_0)^{-1} \sum_{i = 1}^m p_i \bigl( (n + 1) i - k \bigr) a(n, k-i).
    \end{equation*}
    
    % [1] D. Knuth, The Art of Computer Programming Vol. 2, Seminumerical 
    % Algorithms, Third Edition (Reading, Massachusetts: Addison-Wesley, 1997)
    %
    % [2] D. Zeilberger, The J.C.P. Miller Recurrence for Exponentiating a 
    % Polynomial, and its q-Analog, Journal of Difference Equations and 
    % Applications, 1995, Vol. 1, pp. 57--60

void _fmpz_poly_pow_binomial(fmpz * res, const fmpz * poly, ulong e)

    Computes \code{res = poly^e} when poly is of length~$2$, using binomial 
    expansion. 

    Assumes $e > 0$.  Does not support aliasing.

void fmpz_poly_pow_binomial(fmpz_poly_t res, const fmpz_poly_t poly, ulong e)

    Computes \code{res = poly^e} when \code{poly} is of length~$2$, using 
    binomial expansion.

    If the length of \code{poly} is not~$2$, raises an exception and aborts.

void _fmpz_poly_pow_addchains(fmpz * res, const fmpz * poly, slong len, 
                                                          const int * a, int n)

    Given a star chain $1 = a_0 < a_1 < \dotsb < a_n = e$ computes 
    \code{res = poly^e}.
    
    A star chain is an addition chain $1 = a_0 < a_1 < \dotsb < a_n$ such 
    that, for all $i > 0$, $a_i = a_{i-1} + a_j$ for some $j < i$.
    
    Assumes that $e > 2$, or equivalently $n > 1$, and \code{len > 0}.  Does 
    not support aliasing.

void fmpz_poly_pow_addchains(fmpz_poly_t res, const fmpz_poly_t poly, ulong e)
    
    Computes \code{res = poly^e} using addition chains whenever 
    $0 \leq e \leq 148$.

    If $e > 148$, raises an exception and aborts.

void _fmpz_poly_pow_binexp(fmpz * res, const fmpz * poly, slong len, ulong e)

    Sets \code{res = poly^e} using left-to-right binary exponentiation as 
    described in~\citep[p.~461]{Knu1997}.
    
    Assumes that \code{len > 0}, \code{e > 1}.  Assumes that \code{res} is 
    an array of length at least \code{e*(len - 1) + 1}.  Does not support 
    aliasing.

void fmpz_poly_pow_binexp(fmpz_poly_t res, const fmpz_poly_t poly, ulong e)

    Computes \code{res = poly^e} using the binary exponentiation algorithm.  
    If $e$ is zero, returns one, so that in particular \code{0^0 = 1}.

void _fmpz_poly_pow_small(fmpz * res, const fmpz * poly, slong len, ulong e)

    Sets \code{res = poly^e} whenever $0 \leq e \leq 4$.
    
    Assumes that \code{len > 0} and that \code{res} is an array of length 
    at least \code{e*(len - 1) + 1}.  Does not support aliasing.

void _fmpz_poly_pow(fmpz * res, const fmpz * poly, slong len, ulong e)

    Sets \code{res = poly^e}, assuming that \code{e, len > 0} and that 
    \code{res} has space for \code{e*(len - 1) + 1} coefficients.  Does 
    not support aliasing.

void fmpz_poly_pow(fmpz_poly_t res, const fmpz_poly_t poly, ulong e)

    Computes \code{res = poly^e}.  If $e$ is zero, returns one, 
    so that in particular \code{0^0 = 1}.

void _fmpz_poly_pow_trunc(fmpz * res, const fmpz * poly, ulong e, slong n)

    Sets \code{(res, n)} to \code{(poly, n)} raised to the power $e$ and 
    truncated to length $n$.

    Assumes that $e, n > 0$.  Allows zero-padding of \code{(poly, n)}.  
    Does not support aliasing of any inputs and outputs.

void fmpz_poly_pow_trunc(fmpz_poly_t res, 
                                     const fmpz_poly_t poly, ulong e, slong n)

    Notationally raises \code{poly} to the power $e$, truncates the result 
    to length $n$ and writes the result in \code{res}.  This is computed 
    much more efficiently than simply powering the polynomial and truncating.

    Thus, if $n = 0$ the result is zero.  Otherwise, whenever $e = 0$ the 
    result will be the constant polynomial equal to $1$.

    This function can be used to raise power series to a power in an 
    efficient way.

*******************************************************************************

    Shifting

*******************************************************************************

void _fmpz_poly_shift_left(fmpz * res, const fmpz * poly, slong len, slong n)

    Sets \code{(res, len + n)} to \code{(poly, len)} shifted left by 
    $n$ coefficients.  

    Inserts zero coefficients at the lower end.  Assumes that \code{len} 
    and $n$ are positive, and that \code{res} fits \code{len + n} elements.
    Supports aliasing between \code{res} and \code{poly}.

void fmpz_poly_shift_left(fmpz_poly_t res, const fmpz_poly_t poly, slong n)

    Sets \code{res} to \code{poly} shifted left by $n$ coeffs.  Zero 
    coefficients are inserted.

void _fmpz_poly_shift_right(fmpz * res, const fmpz * poly, slong len, slong n)

    Sets \code{(res, len - n)} to \code{(poly, len)} shifted right by 
    $n$ coefficients.  

    Assumes that \code{len} and $n$ are positive, that \code{len > n}, 
    and that \code{res} fits \code{len - n} elements.  Supports aliasing 
    between \code{res} and \code{poly}, although in this case the top 
    coefficients of \code{poly} are not set to zero.

void fmpz_poly_shift_right(fmpz_poly_t res, const fmpz_poly_t poly, slong n)

    Sets \code{res} to \code{poly} shifted right by $n$ coefficients.  If $n$ 
    is equal to or greater than the current length of \code{poly}, \code{res} 
    is set to the zero polynomial.

*******************************************************************************

    Bit sizes and norms

*******************************************************************************

ulong fmpz_poly_max_limbs(const fmpz_poly_t poly)

    Returns the maximum number of limbs required to store the absolute value 
    of coefficients of \code{poly}.  If \code{poly} is zero, returns $0$.

slong fmpz_poly_max_bits(const fmpz_poly_t poly)

    Computes the maximum number of bits $b$ required to store the absolute 
    value of coefficients of \code{poly}.  If all the coefficients of 
    \code{poly} are non-negative, $b$ is returned, otherwise $-b$ is returned.

void fmpz_poly_height(fmpz_t height, const fmpz_poly_t poly)

    Computes the height of \code{poly}, defined as the largest of the
    absolute values the coefficients of \code{poly}. Equivalently, this
    gives the infinity norm of the coefficients. If \code{poly} is zero,
    the height is $0$.

void _fmpz_poly_2norm(fmpz_t res, const fmpz * poly, slong len)

    Sets \code{res} to the Euclidean norm of \code{(poly, len)}, that is, 
    the integer square root of the sum of the squares of the coefficients 
    of \code{poly}.

void fmpz_poly_2norm(fmpz_t res, const fmpz_poly_t poly)

    Sets \code{res} to the Euclidean norm of \code{poly}, that is, the 
    integer square root of the sum of the squares of the coefficients of 
    \code{poly}.

mp_limb_t _fmpz_poly_2norm_normalised_bits(const fmpz * poly, slong len)

    Returns an upper bound on the number of bits of the normalised 
    Euclidean norm of \code{(poly, len)}, i.e. the number of bits of 
    the Euclidean norm divided by the absolute value of the leading 
    coefficient. The returned value will be no more than 1 bit too 
    large. 
    
    This is used in the computation of the Landau-Mignotte bound. 

    It is assumed that \code{len > 0}. The result only makes sense 
    if the leading coefficient is nonzero.

*******************************************************************************

    Greatest common divisor

*******************************************************************************

void _fmpz_poly_gcd_subresultant(fmpz * res, const fmpz * poly1, slong len1, 
                                               const fmpz * poly2, slong len2)

    Computes the greatest common divisor \code{(res, len2)} of 
    \code{(poly1, len1)} and \code{(poly2, len2)}, assuming 
    \code{len1 >= len2 > 0}.  The result is normalised to have 
    positive leading coefficient.  Aliasing between \code{res}, 
    \code{poly1} and \code{poly2} is supported.

void fmpz_poly_gcd_subresultant(fmpz_poly_t res, const fmpz_poly_t poly1, 
                                                       const fmpz_poly_t poly2)

    Computes the greatest common divisor \code{res} of \code{poly1} and 
    \code{poly2}, normalised to have non-negative leading coefficient.

    This function uses the subresultant algorithm as described 
    in~\citep[Algorithm~3.3.1]{Coh1996}.

int _fmpz_poly_gcd_heuristic(fmpz * res, const fmpz * poly1, slong len1, 
                                               const fmpz * poly2, slong len2)

    Computes the greatest common divisor \code{(res, len2)} of 
    \code{(poly1, len1)} and \code{(poly2, len2)}, assuming 
    \code{len1 >= len2 > 0}.  The result is normalised to have 
    positive leading coefficient.  Aliasing between \code{res}, 
    \code{poly1} and \code{poly2} is not supported. The function
    may not always succeed in finding the GCD. If it fails, the
    function returns 0, otherwise it returns 1.

int fmpz_poly_gcd_heuristic(fmpz_poly_t res, const fmpz_poly_t poly1, 
                                                       const fmpz_poly_t poly2)

    Computes the greatest common divisor \code{res} of \code{poly1} and 
    \code{poly2}, normalised to have non-negative leading coefficient.
    
    The function may not always succeed in finding the GCD. If it fails, 
    the function returns 0, otherwise it returns 1.

    This function uses the heuristic GCD algorithm (GCDHEU). The basic
    strategy is to remove the content of the polynomials, pack them 
    using Kronecker segmentation (given a bound on the size of the
    coefficients of the GCD) and take the integer GCD. Unpack the 
    result and test divisibility.

void _fmpz_poly_gcd_modular(fmpz * res, const fmpz * poly1, slong len1, 
                                              const fmpz * poly2, slong len2)

    Computes the greatest common divisor \code{(res, len2)} of 
    \code{(poly1, len1)} and \code{(poly2, len2)}, assuming 
    \code{len1 >= len2 > 0}.  The result is normalised to have 
    positive leading coefficient.  Aliasing between \code{res}, 
    \code{poly1} and \code{poly2} is not supported. 

void fmpz_poly_gcd_modular(fmpz_poly_t res,
                           const fmpz_poly_t poly1, const fmpz_poly_t poly2)

    Computes the greatest common divisor \code{res} of \code{poly1} and 
    \code{poly2}, normalised to have non-negative leading coefficient.
    
    This function uses the modular GCD algorithm. The basic
    strategy is to remove the content of the polynomials, reduce them 
    modulo sufficiently many primes and do CRT reconstruction until
    some bound is reached (or we can prove with trial division that
    we have the GCD).

void _fmpz_poly_gcd(fmpz * res, const fmpz * poly1, slong len1, 
                                               const fmpz * poly2, slong len2)

    Computes the greatest common divisor \code{res} of \code{(poly1, len1)} 
    and \code{(poly2, len2)}, assuming \code{len1 >= len2 > 0}.  The result 
    is normalised to have positive leading coefficient.

    Assumes that \code{res} has space for \code{len2} coefficients.  
    Aliasing between \code{res}, \code{poly1} and \code{poly2} is not 
    supported.

void fmpz_poly_gcd(fmpz_poly_t res, const fmpz_poly_t poly1, 
                                                       const fmpz_poly_t poly2)

    Computes the greatest common divisor \code{res} of \code{poly1} and 
    \code{poly2}, normalised to have non-negative leading coefficient.

void _fmpz_poly_xgcd_modular(fmpz_t r, fmpz * s, fmpz * t, 
                        const fmpz * f, slong len1, const fmpz * g, slong len2)

    Set $r$ to the resultant of \code{(f, len1)} and \code{(g, len2)}.
    If the resultant is zero, the function returns immediately. Otherwise it
    finds polynomials $s$ and $t$ such that \code{s*f + t*g = r}. The length
    of $s$ will be no greater than \code{len2} and the length of $t$ will be
    no greater than \code{len1} (both are zero padded if necessary).

    It is assumed that \code{len1 >= len2 > 0}. No aliasing of inputs and 
    outputs is permitted.

    The function assumes that $f$ and $g$ are primitive (have Gaussian content
    equal to 1). The result is undefined otherwise.

    Uses a multimodular algorithm. The resultant is first computed and 
    extended GCD's modulo various primes $p$ are computed and combined using
    CRT. When the CRT stabilises the resulting polynomials are simply reduced
    modulo further primes until a proven bound is reached.

void fmpz_poly_xgcd_modular(fmpz_t r, fmpz_poly_t s, fmpz_poly_t t,
                                    const fmpz_poly_t f, const fmpz_poly_t g)

    Set $r$ to the resultant of $f$ and $g$. If the resultant is zero, the
    function then returns immediately, otherwise $s$ and $t$ are found such
    that \code{s*f + t*g = r}.

    The function assumes that $f$ and $g$ are primitive (have Gaussian content
    equal to 1). The result is undefined otherwise.

    Uses the multimodular algorithm.

void _fmpz_poly_xgcd(fmpz_t r, fmpz * s, fmpz * t, 
                        const fmpz * f, slong len1, const fmpz * g, slong len2)

    Set $r$ to the resultant of \code{(f, len1)} and \code{(g, len2)}.
    If the resultant is zero, the function returns immediately. Otherwise it
    finds polynomials $s$ and $t$ such that \code{s*f + t*g = r}. The length
    of $s$ will be no greater than \code{len2} and the length of $t$ will be
    no greater than \code{len1} (both are zero padded if necessary).

    The function assumes that $f$ and $g$ are primitive (have Gaussian content
    equal to 1). The result is undefined otherwise.

    It is assumed that \code{len1 >= len2 > 0}. No aliasing of inputs and 
    outputs is permitted.

void fmpz_poly_xgcd(fmpz_t r, fmpz_poly_t s, fmpz_poly_t t,
                                    const fmpz_poly_t f, const fmpz_poly_t g)

    Set $r$ to the resultant of $f$ and $g$. If the resultant is zero, the
    function then returns immediately, otherwise $s$ and $t$ are found such
    that \code{s*f + t*g = r}.

    The function assumes that $f$ and $g$ are primitive (have Gaussian content
    equal to 1). The result is undefined otherwise.

void _fmpz_poly_lcm(fmpz * res, const fmpz * poly1, slong len1, 
                                              const fmpz * poly2, slong len2)

    Sets \code{(res, len1 + len2 - 1)} to the least common multiple 
    of the two polynomials \code{(poly1, len1)} and \code{(poly2, len2)}, 
    normalised to have non-negative leading coefficient.

    Assumes that \code{len1 >= len2 > 0}.

    Does not support aliasing.

void fmpz_poly_lcm(fmpz_poly_t res, const fmpz_poly_t poly1, 
                                                    const fmpz_poly_t poly2)

    Sets \code{res} to the least common multiple of the two 
    polynomials \code{poly1} and \code{poly2}, normalised to 
    have non-negative leading coefficient.

    If either of the two polynomials is zero, sets \code{res} 
    to zero.

    This ensures that the equality
    \begin{equation*}
    f g = \gcd(f, g) \operatorname{lcm}(f, g)
    \end{equation*}
    holds up to sign.

void _fmpz_poly_resultant(fmpz_t res, const fmpz * poly1, slong len1, 
                                      const fmpz * poly2, slong len2)

    Sets \code{res} to the resultant of \code{(poly1, len1)} and 
    \code{(poly2, len2)}, assuming that \code{len1 >= len2 > 0}.

void fmpz_poly_resultant(fmpz_t res, const fmpz_poly_t poly1, 
                                     const fmpz_poly_t poly2)

    Computes the resultant of \code{poly1} and \code{poly2}.

    For two non-zero polynomials $f(x) = a_m x^m + \dotsb + a_0$ and 
    $g(x) = b_n x^n + \dotsb + b_0$ of degrees $m$ and $n$, the resultant 
    is defined to be 
    \begin{equation*}
        a_m^n b_n^m \prod_{(x, y) : f(x) = g(y) = 0} (x - y).
    \end{equation*}
    For convenience, we define the resultant to be equal to zero if either 
    of the two polynomials is zero.

    This function uses the algorithm described 
    in~\citep[Algorithm~3.3.7]{Coh1996}.

*******************************************************************************

    Gaussian content

*******************************************************************************

void _fmpz_poly_content(fmpz_t res, const fmpz * poly, slong len)

    Sets \code{res} to the non-negative content of \code{(poly, len)}.  
    Aliasing between \code{res} and the coefficients of \code{poly} is 
    not supported.

void fmpz_poly_content(fmpz_t res, const fmpz_poly_t poly)

    Sets \code{res} to the non-negative content of \code{poly}.  The content 
    of the zero polynomial is defined to be zero.  Supports aliasing, that is, 
    \code{res} is allowed to be one of the coefficients of \code{poly}.

void _fmpz_poly_primitive_part(fmpz * res, const fmpz * poly, slong len)

    Sets \code{(res, len)} to \code{(poly, len)} divided by the content 
    of \code{(poly, len)}, and normalises the result to have non-negative 
    leading coefficient.

    Assumes that \code{(poly, len)} is non-zero.  Supports aliasing of 
    \code{res} and \code{poly}.

void fmpz_poly_primitive_part(fmpz_poly_t res, const fmpz_poly_t poly)

    Sets \code{res} to \code{poly} divided by the content of \code{poly}, 
    and normalises the result to have non-negative leading coefficient.  
    If \code{poly} is zero, sets \code{res} to zero.

*******************************************************************************

    Square-free

*******************************************************************************

int _fmpz_poly_is_squarefree(const fmpz * poly, slong len)

    Returns whether the polynomial \code{(poly, len)} is square-free.

int fmpz_poly_is_squarefree(const fmpz_poly_t poly)

    Returns whether the polynomial \code{poly} is square-free.  A non-zero 
    polynomial is defined to be square-free if it has no non-unit square 
    factors.  We also define the zero polynomial to be square-free.
    
    Returns~$1$ if the length of \code{poly} is at most~$2$.  Returns whether 
    the discriminant is zero for quadratic polynomials.  Otherwise, returns 
    whether the greatest common divisor of \code{poly} and its derivative has 
    length~$1$.

*******************************************************************************

    Euclidean division

*******************************************************************************

void _fmpz_poly_divrem_basecase(fmpz * Q, fmpz * R, const fmpz * A, 
                                        slong lenA, const fmpz * B, slong lenB)

    Computes \code{(Q, lenA - lenB + 1)}, \code{(R, lenA)} such that 
    $A = B Q + R$ and each coefficient of $R$ beyond \code{lenB} is reduced 
    modulo the leading coefficient of $B$. 
    If the leading coefficient of $B$ is $\pm 1$ or the division is exact, 
    this is the same thing as division over~$\Q$.

    Assumes that $\len(A), \len(B) > 0$.  Allows zero-padding in 
    \code{(A, lenA)}.  $R$ and $A$ may be aliased, but apart from this no 
    aliasing of input and output operands is allowed.

void fmpz_poly_divrem_basecase(fmpz_poly_t Q, fmpz_poly_t R, 
                                      const fmpz_poly_t A, const fmpz_poly_t B)

    Computes $Q$, $R$ such that $A = B Q + R$ and each coefficient of $R$ 
    beyond $\len(B) - 1$ is reduced modulo the leading coefficient of $B$.  
    If the leading coefficient of $B$ is $\pm 1$ or the division is exact, 
    this is the same thing as division over~$\Q$.  An exception is raised 
    if $B$ is zero.

void _fmpz_poly_divrem_divconquer_recursive(fmpz * Q, fmpz * BQ, fmpz * W, 
                                    const fmpz * A, const fmpz * B, slong lenB)

    Computes \code{(Q, lenB)}, \code{(BQ, 2 lenB - 1)} such that 
    $BQ = B \times Q$ and $A = B Q + R$ where each coefficient of $R$ beyond 
    $\len(B) - 1$ is reduced modulo the leading coefficient of $B$.  We 
    assume that $\len(A) = 2 \len(B) - 1$.  If the leading coefficient 
    of $B$ is $\pm 1$ or the division is exact, this is the same as division 
    over~$\Q$.

    Assumes $\len(B) > 0$.  Allows zero-padding in \code{(A, lenA)}.  Requires 
    a temporary array \code{(W, 2 lenB - 1)}.  No aliasing of input and output 
    operands is allowed.

    This function does not read the bottom $\len(B) - 1$ coefficients from 
    $A$, which means that they might not even need to exist in allocated 
    memory.

void _fmpz_poly_divrem_divconquer(fmpz * Q, fmpz * R, 
                        const fmpz * A, slong lenA, const fmpz * B, slong lenB)

    Computes \code{(Q, lenA - lenB + 1)}, \code{(R, lenA)} such that 
    $A = B Q + R$ and each coefficient of $R$ beyond $\len(B) - 1$ is 
    reduced modulo the leading coefficient of $B$.  If the leading 
    coefficient of $B$ is $\pm 1$ or the division is exact, this is 
    the same as division over~$\Q$.

    Assumes $\len(A) \geq \len(B) > 0$.  Allows zero-padding in 
    \code{(A, lenA)}.  No aliasing of input and output operands is 
    allowed.

void fmpz_poly_divrem_divconquer(fmpz_poly_t Q, fmpz_poly_t R, 
                                      const fmpz_poly_t A, const fmpz_poly_t B)

    Computes $Q$, $R$ such that $A = B Q + R$ and each coefficient of $R$ 
    beyond $\len(B) - 1$ is reduced modulo the leading coefficient of $B$. 
    If the leading coefficient of $B$ is $\pm 1$ or the division is exact, 
    this is the same as division over~$\Q$.  An exception is raised if $B$ 
    is zero.

void _fmpz_poly_divrem(fmpz * Q, fmpz * R, const fmpz * A, slong lenA, 
                                           const fmpz * B, slong lenB)

    Computes \code{(Q, lenA - lenB + 1)}, \code{(R, lenA)} such that 
    $A = B Q + R$ and each coefficient of $R$ beyond $\len(B) - 1$ is 
    reduced modulo the leading coefficient of $B$.  If the leading 
    coefficient of $B$ is $\pm 1$ or the division is exact, this is 
    the same thing as division over~$\Q$.

    Assumes $\len(A) \geq \len(B) > 0$.  Allows zero-padding in 
    \code{(A, lenA)}.  No aliasing of input and output operands is 
    allowed.

void fmpz_poly_divrem(fmpz_poly_t Q, fmpz_poly_t R, const fmpz_poly_t A, 
                                                    const fmpz_poly_t B)

    Computes $Q$, $R$ such that $A = B Q + R$ and each coefficient of $R$ 
    beyond $\len(B) - 1$ is reduced modulo the leading coefficient of $B$. 
    If the leading coefficient of $B$ is $\pm 1$ or the division is exact, 
    this is the same as division over~$\Q$.  An exception is raised if $B$ 
    is zero.

void _fmpz_poly_div_basecase(fmpz * Q, fmpz * R, const fmpz * A, slong lenA,
                                                   const fmpz * B, slong lenB)

    Computes the quotient \code{(Q, lenA - lenB + 1)} of \code{(A, lenA)} 
    divided by \code{(B, lenB)}.

    Notationally, computes $Q$, $R$ such that $A = B Q + R$ and each 
    coefficient of $R$ beyond $\len(B) - 1$ is reduced modulo the leading 
    coefficient of $B$. 

    If the leading coefficient of $B$ is $\pm 1$ or the division is exact, 
    this is the same as division over~$\Q$.

    Assumes $\len(A), \len(B) > 0$.  Allows zero-padding in \code{(A, lenA)}. 
    Requires a temporary array $R$ of size at least the (actual) length 
    of $A$. For convenience, $R$ may be \code{NULL}.  $R$ and $A$ may be 
    aliased, but apart from this no aliasing of input and output operands 
    is allowed.

void fmpz_poly_div_basecase(fmpz_poly_t Q, 
                                      const fmpz_poly_t A, const fmpz_poly_t B)

    Computes the quotient $Q$ of $A$ divided by $Q$.

    Notationally, computes $Q$, $R$ such that $A = B Q + R$ and each 
    coefficient of $R$ beyond $\len(B) - 1$ is reduced modulo the leading 
    coefficient of $B$.

    If the leading coefficient of $B$ is $\pm 1$ or the division is exact, 
    this is the same as division over~$\Q$.  An exception is raised if $B$ 
    is zero.

void _fmpz_poly_divremlow_divconquer_recursive(fmpz * Q, fmpz * BQ, 
                                    const fmpz * A, const fmpz * B, slong lenB)

    Divide and conquer division of \code{(A, 2 lenB - 1)} by \code{(B, lenB)}, 
    computing only the bottom $\len(B) - 1$ coefficients of $B Q$.

    Assumes $\len(B) > 0$.  Requires $B Q$ to have length at least 
    $2 \len(B) - 1$, although only the bottom $\len(B) - 1$ coefficients will 
    carry meaningful output.  Does not support any aliasing.  Allows 
    zero-padding in $A$, but not in $B$.

void _fmpz_poly_div_divconquer_recursive(fmpz * Q, fmpz * temp, 
                                    const fmpz * A, const fmpz * B, slong lenB)

    Recursive short division in the balanced case.

    Computes the quotient \code{(Q, lenB)} of \code{(A, 2 lenB - 1)} upon 
    division by \code{(B, lenB)}.  Requires $\len(B) > 0$.  Needs a 
    temporary array \code{temp} of length $2 \len(B) - 1$.  Does not support 
    any aliasing.

    For further details, see~\citep{Mul2000}.

void _fmpz_poly_div_divconquer(fmpz * Q, const fmpz * A, slong lenA, 
                                         const fmpz * B, slong lenB)

    Computes the quotient \code{(Q, lenA - lenB + 1)} of \code{(A, lenA)} 
    upon division by \code{(B, lenB)}.  Assumes that 
    $\len(A) \geq \len(B) > 0$.  Does not support aliasing.

fmpz_poly_div_divconquer(fmpz_poly_t Q, 
                         const fmpz_poly_t A, const fmpz_poly_t B)

    Computes the quotient $Q$ of $A$ divided by $B$.

    Notationally, computes $Q$, $R$ such that $A = B Q + R$ and each 
    coefficient of $R$ beyond $\len(B) - 1$ is reduced modulo the leading 
    coefficient of $B$. 

    If the leading coefficient of $B$ is $\pm 1$ or the division is exact, 
    this is the same as division over~$\Q$.  An exception is raised if $B$ 
    is zero.

void _fmpz_poly_div(fmpz * Q, const fmpz * A, slong lenA,
                              const fmpz * B, slong lenB)

    Computes the quotient \code{(Q, lenA - lenB + 1)} of \code{(A, lenA)} 
    divided by \code{(B, lenB)}.

    Notationally, computes $Q$, $R$ such that $A = B Q + R$ and each 
    coefficient of $R$ beyond $\len(B) - 1$ is reduced modulo the leading 
    coefficient of $B$.  If the leading coefficient of $B$ is $\pm 1$ or 
    the division is exact, this is the same as division over~$\Q$.

    Assumes $\len(A) \geq \len(B) > 0$.  Allows zero-padding in 
    \code{(A, lenA)}.  Aliasing of input and output operands is not 
    allowed.

void fmpz_poly_div(fmpz_poly_t Q, const fmpz_poly_t A, const fmpz_poly_t B)

    Computes the quotient $Q$ of $A$ divided by $B$.

    Notationally, computes $Q$, $R$ such that $A = B Q + R$ and each 
    coefficient of $R$ beyond $\len(B) - 1$ is reduced modulo the leading 
    coefficient of $B$.  If the leading coefficient of $B$ is $\pm 1$ or 
    the division is exact, this is the same as division over $Q$.  An 
    exception is raised if $B$ is zero.

void _fmpz_poly_rem_basecase(fmpz * R, const fmpz * A, slong lenA, 
                                       const fmpz * B, slong lenB)

    Computes the remainder \code{(R, lenA)} of \code{(A, lenA)} upon 
    division by \code{(B, lenB)}.

    Notationally, computes $Q$, $R$ such that $A = B Q + R$ and each 
    coefficient of $R$ beyond $\len(B) - 1$ is reduced modulo the leading 
    coefficient of $B$.  If the leading coefficient of $B$ is $\pm 1$ or 
    the division is exact, this is the same thing as division over~$\Q$.

    Assumes that $\len(A), \len(B) > 0$.  Allows zero-padding in 
    \code{(A, lenA)}.  $R$ and $A$ may be aliased, but apart from this no 
    aliasing of input and output operands is allowed.

void fmpz_poly_rem_basecase(fmpz_poly_t R, 
                                      const fmpz_poly_t A, const fmpz_poly_t B)

    Computes the remainder $R$ of $A$ upon division by $B$.

    Notationally, computes $Q$, $R$ such that $A = B Q + R$ and each 
    coefficient of $R$ beyond $\len(B) - 1$ is reduced modulo the leading 
    coefficient of $B$.  If the leading coefficient of $B$ is $\pm 1$ or 
    the division is exact, this is the same as division over~$\Q$.  An 
    exception is raised if $B$ is zero.

void _fmpz_poly_rem(fmpz * R, const fmpz * A, slong lenA, 
                              const fmpz * B, slong lenB)

    Computes the remainder \code{(R, lenA)} of \code{(A, lenA)} upon division 
    by \code{(B, lenB)}.

    Notationally, computes $Q$, $R$ such that $A = B Q + R$ and each 
    coefficient of $R$ beyond $\len(B) - 1$ is reduced modulo the leading 
    coefficient of $B$.  If the leading coefficient of $B$ is $\pm 1$ or 
    the division is exact, this is the same thing as division over~$\Q$.

    Assumes that $\len(A) \geq \len(B) > 0$.  Allows zero-padding in 
    \code{(A, lenA)}.  Aliasing of input and output operands is not allowed.

void fmpz_poly_rem(fmpz_poly_t R, const fmpz_poly_t A, const fmpz_poly_t B)

    Computes the remainder $R$ of $A$ upon division by $B$.

    Notationally, computes $Q$, $R$ such that $A = B Q + R$ and each 
    coefficient of $R$ beyond $\len(B) - 1$ is reduced modulo the leading 
    coefficient of $B$.  If the leading coefficient of $B$ is $\pm 1$ or 
    the division is exact, this is the same as division over~$\Q$.  An 
    exception is raised if $B$ is zero.

void _fmpz_poly_div_root(fmpz * Q, const fmpz * A, slong len, const fmpz_t c)

    Computes the quotient \code{(Q, len-1)} of \code{(A, len)} upon
    division by $x - c$.

    Supports aliasing of \code{Q} and \code{A}, but the result is
    undefined in case of partial overlap.

void fmpz_poly_div_root(fmpz_poly_t Q, const fmpz_poly_t A, const fmpz_t c)

    Computes the quotient \code{(Q, len-1)} of \code{(A, len)} upon
    division by $x - c$.

*******************************************************************************

    Division with precomputed inverse

*******************************************************************************

void _fmpz_poly_preinvert(fmpz * B_inv, const fmpz * B, slong n)

    Given a monic polynomial \code{B} of length \code{n}, compute a precomputed
    inverse \code{B_inv} of length \code{n} for use in the functions below. No 
    aliasing of B and $B_inv$ is permitted. We assume \code{n} is not zero.

void fmpz_poly_preinvert(fmpz_poly_t B_inv, const fmpz_poly_t B)

    Given a monic polynomial \code{B}, compute a precomputed inverse 
    \code{B_inv} for use in the functions below. An exception is raised if
    \code{B} is zero.

void _fmpz_poly_div_preinv(fmpz * Q, const fmpz * A, slong len1, 
                               const fmpz * B, const fmpz * B_inv, slong len2)

    Given a precomputed inverse \code{B_inv} of the polynomial \code{B} of 
    length \code{len2}, compute the quotient \code{Q} of \code{A} by \code{B}.
    We assume the length \code{len1} of \code{A} is at least \code{len2}. The
    polynomial \code{Q} must have space for \code{len1 - len2 + 1} 
    coefficients. No aliasing of operands is permitted.

void fmpz_poly_div_preinv(fmpz_poly_t Q, const fmpz_poly_t A, 
                                 const fmpz_poly_t B, const fmpz_poly_t B_inv)

    Given a precomputed inverse \code{B_inv} of the polynomial \code{B}, 
    compute the quotient \code{Q} of \code{A} by \code{B}. Aliasing of \code{B}
    and \code{B_inv} is not permitted. 

void _fmpz_poly_divrem_preinv(fmpz * Q, fmpz * A, slong len1, 
                               const fmpz * B, const fmpz * B_inv, slong len2)

    Given a precomputed inverse \code{B_inv} of the polynomial \code{B} of 
    length \code{len2}, compute the quotient \code{Q} of \code{A} by \code{B}.
    The remainder is then placed in \code{A}. We assume the length \code{len1}
    of \code{A} is at least \code{len2}. The polynomial \code{Q} must have
    space for \code{len1 - len2 + 1} coefficients. No aliasing of operands is 
    permitted.

void fmpz_poly_divrem_preinv(fmpz_poly_t Q, fmpz_poly_t R, 
            const fmpz_poly_t A, const fmpz_poly_t B, const fmpz_poly_t B_inv)

    Given a precomputed inverse \code{B_inv} of the polynomial \code{B}, 
    compute the quotient \code{Q} of \code{A} by \code{B} and the remainder
    \code{R}. Aliasing of \code{B} and \code{B_inv} is not permitted. 

fmpz ** _fmpz_poly_powers_precompute(const fmpz * B, slong len)

    Computes \code{2*len - 1} powers of $x$ modulo the polynomial $B$ of
    the given length. This is used as a kind of precomputed inverse in
    the remainder routine below.

void fmpz_poly_powers_precompute(fmpz_poly_powers_precomp_t pinv,
                                                              fmpz_poly_t poly)

    Computes \code{2*len - 1} powers of $x$ modulo the polynomial $B$ of
    the given length. This is used as a kind of precomputed inverse in
    the remainder routine below.

void _fmpz_poly_powers_clear(fmpz ** powers, slong len)

    Clean up resources used by precomputed powers which have been computed
    by\\
    \code{_fmpz_poly_powers_precompute}.

void fmpz_poly_powers_clear(fmpz_poly_powers_precomp_t pinv)

    Clean up resources used by precomputed powers which have been computed
    by\\
    \code{fmpz_poly_powers_precompute}.

void _fmpz_poly_rem_powers_precomp(fmpz * A, slong m, 
                                 const fmpz * B, slong n, fmpz ** const powers)

    Set $A$ to the remainder of $A$ divide $B$ given precomputed powers mod $B$
    provided by \code{_fmpz_poly_powers_precompute}. No aliasing is allowed.

void fmpz_poly_rem_powers_precomp(fmpz_poly_t R, 
                             const fmpz_poly_t A, const fmpz_poly_t B, 
                                        const fmpz_poly_powers_precomp_t B_inv)

    Set $R$ to the remainder of $A$ divide $B$ given precomputed powers mod $B$
    provided by \code{fmpz_poly_powers_precompute}.

*******************************************************************************

    Divisibility testing

*******************************************************************************

int _fmpz_poly_divides(fmpz * Q, const fmpz * A,
                  slong lenA, const fmpz * B, slong lenB)

    Returns 1 if \code{(B, lenB)} divides \code{(A, lenA)} exactly and
    sets $Q$ to the quotient, otherwise returns 0. 

    It is assumed that $\len(A) \geq \len(B) > 0$ and that $Q$ has space
    for $\len(A) - \len(B) + 1$ coefficients.

    Aliasing of $Q$ with either of the inputs is not permitted.

    This function is currently unoptimised and provided for convenience
    only.

int fmpz_poly_divides(fmpz_poly_t Q,
              const fmpz_poly_t A, const fmpz_poly_t B)

    Returns 1 if $B$ divides $A$ exactly and sets $Q$ to the quotient,
    otherwise returns 0.

    This function is currently unoptimised and provided for convenience
    only.

*******************************************************************************

    Power series division

*******************************************************************************

void _fmpz_poly_inv_series_newton(fmpz * Qinv, const fmpz * Q, slong n)

    Computes the first $n$ terms of the inverse power series of $Q$ using 
    Newton iteration.

    Assumes that $n \geq 1$, that $Q$ has length at least $n$ and constant 
    term~$\pm 1$.  Does not support aliasing.

id fmpz_poly_inv_series_newton(fmpz_poly_t Qinv, const fmpz_poly_t Q, slong n)

    Computes the first $n$ terms of the inverse power series of $Q$ 
    using Newton iteration, assuming that $Q$ has constant term~$\pm 1$ 
    and $n \geq 1$.

void _fmpz_poly_inv_series(fmpz * Qinv, const fmpz * Q, slong n)

    Computes the first $n$ terms of the inverse power series of $Q$.

    Assumes that $n \geq 1$, that $Q$ has length at least $n$ and constant 
    term~$1$.  Does not support aliasing.

void fmpz_poly_inv_series(fmpz_poly_t Qinv, const fmpz_poly_t Q, slong n)

    Computes the first $n$ terms of the inverse power series of $Q$, 
    assuming $Q$ has constant term~$1$ and $n \geq 1$.

void _fmpz_poly_div_series(fmpz * Q, const fmpz * A, const fmpz * B)

    Divides \code{(A, n)} by \code{(B, n)} as power series over $\Z$, 
    assuming $B$ has constant term~$1$ and $n \geq 1$.

    Only supports aliasing of \code{(Q, n)} and \code{(B, n)}.

void fmpz_poly_div_series(fmpz_poly_t Q, const fmpz_poly_t A, 
                                         const fmpz_poly_t B, slong n)

    Performs power series division in $\Z[[x]] / (x^n)$.  The function 
    considers the polynomials $A$ and $B$ as power series of length $n$ 
    starting with the constant terms.  The function assumes that $B$ has 
    constant term~$1$ and $n \geq 1$.

*******************************************************************************

    Pseudo division

*******************************************************************************

void _fmpz_poly_pseudo_divrem_basecase(fmpz * Q, fmpz * R, 
                     ulong * d, const fmpz * A, slong lenA, 
                          const fmpz * B, slong lenB, const fmpz_preinvn_t inv)

    If $\ell$ is the leading coefficient of $B$, then computes $Q$, $R$ such 
    that $\ell^d A = Q B + R$.  This function is used for simulating division 
    over~$\Q$.

    Assumes that $\len(A) \geq \len(B) > 0$.  Assumes that $Q$ can fit 
    $\len(A) - \len(B) + 1$ coefficients, and that $R$ can fit $\len(A)$ 
    coefficients.  Supports aliasing of \code{(R, lenA)} and \code{(A, lenA)}. 
    But other than this,  no aliasing of the inputs and outputs is suppported.

    An optional precomputed inverse of the leading coefficient of $B$ from
    \code{fmpz_preinvn_init} can be supplied. Otherwise \code{inv} should be
    \code{NULL}. 

void fmpz_poly_pseudo_divrem_basecase(fmpz_poly_t Q, fmpz_poly_t R, 
                           ulong * d, const fmpz_poly_t A, const fmpz_poly_t B)

    If $\ell$ is the leading coefficient of $B$, then computes $Q$, $R$ such 
    that $\ell^d A = Q B + R$.  This function is used for simulating division 
    over~$\Q$.

void _fmpz_poly_pseudo_divrem_divconquer(fmpz * Q, fmpz * R, 
                     ulong * d, const fmpz * A, slong lenB, 
                          const fmpz * B, slong lenB, const fmpz_preinvn_t inv)

    Computes \code{(Q, lenA - lenB + 1)}, \code{(R, lenA)} such that 
    $\ell^d A = B Q + R$, only setting the bottom $\len(B) - 1$ coefficients 
    of $R$ to their correct values.  The remaining top coefficients of 
    \code{(R, lenA)} may be arbitrary.

    Assumes $\len(A) \geq \len(B) > 0$.  Allows zero-padding in 
    \code{(A, lenA)}.  No aliasing of input and output operands is allowed.

    An optional precomputed inverse of the leading coefficient of $B$ from
    \code{fmpz_preinvn_init} can be supplied. Otherwise \code{inv} should be
    \code{NULL}. 

void fmpz_poly_pseudo_divrem_divconquer(fmpz_poly_t Q, fmpz_poly_t R, 
                           ulong * d, const fmpz_poly_t A, const fmpz_poly_t B)

    Computes $Q$, $R$, and $d$ such that $\ell^d A = B Q + R$, where $R$ has 
    length less than the length of $B$ and $\ell$ is the leading coefficient 
    of $B$.  An exception is raised if $B$ is zero.

void _fmpz_poly_pseudo_divrem_cohen(fmpz * Q, fmpz * R, const fmpz * A, 
                                        slong lenA, const fmpz * B, slong lenB)

    Assumes that $\len(A) \geq \len(B) > 0$.  Assumes that $Q$ can fit 
    $\len(A) - \len(B) + 1$ coefficients, and that $R$ can fit $\len(A)$ 
    coefficients.  Supports aliasing of \code{(R, lenA)} and \code{(A, lenA)}. 
    But other than this, no aliasing of the inputs and outputs is supported.

void fmpz_poly_pseudo_divrem_cohen(fmpz_poly_t Q, fmpz_poly_t R, 
                                      const fmpz_poly_t A, const fmpz_poly_t B)

    This is a variant of \code{fmpz_poly_pseudo_divrem} which computes 
    polynomials $Q$ and $R$ such that $\ell^d A = B Q + R$.  However, the 
    value of $d$ is fixed at $\max{\{0, \len(A) - \len(B) + 1\}}$.

    This function is faster when the remainder is not well behaved, i.e.\ 
    where it is not expected to be close to zero.  Note that this function 
    is not asymptotically fast.  It is efficient only for short polynomials, 
    e.g.\ when $\len(B) < 32$.

void _fmpz_poly_pseudo_rem_cohen(fmpz * R, const fmpz * A, slong lenA, 
                                                   const fmpz * B, slong lenB)

    Assumes that $\len(A) \geq \len(B) > 0$.  Assumes that $R$ can fit 
    $\len(A)$ coefficients.  Supports aliasing of \code{(R, lenA)} and 
    \code{(A, lenA)}.  But other than this, no aliasing of the inputs and 
    outputs is supported.

void fmpz_poly_pseudo_rem_cohen(fmpz_poly_t R, const fmpz_poly_t A, 
                                                           const fmpz_poly_t B)

    This is a variant of \code{fmpz_poly_pseudo_rem()} which computes 
    polynomials $Q$ and $R$ such that $\ell^d A = B Q + R$, but only 
    returns $R$.  However, the value of $d$ is fixed at 
    $\max{\{0, \len(A) - \len(B) + 1\}}$.

    This function is faster when the remainder is not well behaved, i.e.\ 
    where it is not expected to be close to zero.  Note that this function 
    is not asymptotically fast.  It is efficient only for short polynomials, 
    e.g.\ when $\len(B) < 32$.

    This function uses the algorithm described 
    in~\citep[Algorithm~3.1.2]{Coh1996}.

void _fmpz_poly_pseudo_divrem(fmpz * Q, fmpz * R, ulong * d, const fmpz * A, 
              slong lenA, const fmpz * B, slong lenB, const fmpz_preinvn_t inv)

    If $\ell$ is the leading coefficient of $B$, then computes 
    \code{(Q, lenA - lenB + 1)}, \code{(R, lenB - 1)} and $d$ such that 
    $\ell^d A = B Q + R$.  This function is used for simulating division 
    over~$\Q$.

    Assumes that $\len(A) \geq \len(B) > 0$.  Assumes that $Q$ can fit 
    $\len(A) - \len(B) + 1$ coefficients, and that $R$ can fit $\len(A)$ 
    coefficients, although on exit only the bottom $\len(B)$ coefficients 
    will carry meaningful data.

    Supports aliasing of \code{(R, lenA)} and \code{(A, lenA)}.  But other 
    than this, no aliasing of the inputs and outputs is suppported.

    An optional precomputed inverse of the leading coefficient of $B$ from
    \code{fmpz_preinvn_init} can be supplied. Otherwise \code{inv} should be
    \code{NULL}. 

void fmpz_poly_pseudo_divrem(fmpz_poly_t Q, fmpz_poly_t R, ulong * d, 
                                      const fmpz_poly_t A, const fmpz_poly_t B)

    Computes $Q$, $R$, and $d$ such that $\ell^d A = B Q + R$.

void _fmpz_poly_pseudo_div(fmpz * Q, ulong * d, const fmpz * A, slong lenA, 
                          const fmpz * B, slong lenB, const fmpz_preinvn_t inv)

    Pseudo-division, only returning the quotient.

void fmpz_poly_pseudo_div(fmpz_poly_t Q, ulong * d, const fmpz_poly_t A, 
                                                    const fmpz_poly_t B)

    Pseudo-division, only returning the quotient.

void _fmpz_poly_pseudo_rem(fmpz * R, ulong * d, const fmpz * A, slong lenA, 
                          const fmpz * B, slong lenB, const fmpz_preinvn_t inv)

    Pseudo-division, only returning the remainder.

void fmpz_poly_pseudo_rem(fmpz_poly_t R, ulong * d, const fmpz_poly_t A, 
                                                    const fmpz_poly_t B)

    Pseudo-division, only returning the remainder.

*******************************************************************************

    Derivative

*******************************************************************************

void _fmpz_poly_derivative(fmpz * rpoly, const fmpz * poly, slong len) 

    Sets \code{(rpoly, len - 1)} to the derivative of \code{(poly, len)}.  
    Also handles the cases where \code{len} is $0$ or $1$ correctly. 
    Supports aliasing of \code{rpoly} and \code{poly}.

void fmpz_poly_derivative(fmpz_poly_t res, const fmpz_poly_t poly)

    Sets \code{res} to the derivative of \code{poly}.

*******************************************************************************

    Evaluation

*******************************************************************************

void _fmpz_poly_evaluate_divconquer_fmpz(fmpz_t res, 
                                 const fmpz * poly, slong len, const fmpz_t a)

    Evaluates the polynomial \code{(poly, len)} at the integer~$a$ using 
    a divide and conquer approach.  Assumes that the length of the polynomial 
    is at least one.  Allows zero padding.  Does not allow aliasing between 
    \code{res} and \code{x}.

void fmpz_poly_evaluate_divconquer_fmpz(fmpz_t res, const fmpz_poly_t poly, 
                                        const fmpz_t a)

    Evaluates the polynomial \code{poly} at the integer $a$ using a divide 
    and conquer approach.

    Aliasing between \code{res} and \code{a} is supported, however, 
    \code{res} may not be part of \code{poly}.

void _fmpz_poly_evaluate_horner_fmpz(fmpz_t res, const fmpz * f, slong len, 
                                     const fmpz_t a)

    Evaluates the polynomial \code{(f, len)} at the integer $a$ using 
    Horner's rule, and sets \code{res} to the result.  Aliasing between 
    \code{res} and $a$ or any of the coefficients of $f$ is not supported.

void fmpz_poly_evaluate_horner_fmpz(fmpz_t res, const fmpz_poly_t f, 
                                    const fmpz_t a)

    Evaluates the polynomial $f$ at the integer $a$ using Horner's rule, and 
    sets \code{res} to the result.

    As expected, aliasing between \code{res} and \code{a} is supported.  
    However, \code{res} may not be aliased with a coefficient of $f$.

void _fmpz_poly_evaluate_fmpz(fmpz_t res, const fmpz * f, slong len, 
                              const fmpz_t a)

    Evaluates the polynomial \code{(f, len)} at the integer $a$ and sets 
    \code{res} to the result.  Aliasing between \code{res} and $a$ or any
    of the coefficients of $f$ is not supported.

void fmpz_poly_evaluate_fmpz(fmpz_t res, const fmpz_poly_t f, const fmpz_t a)

    Evaluates the polynomial $f$ at the integer $a$ and sets \code{res} 
    to the result.

    As expected, aliasing between \code{res} and $a$ is supported.  However, 
    \code{res} may not be aliased with a coefficient of $f$.

void _fmpz_poly_evaluate_horner_mpq(fmpz_t rnum, fmpz_t rden, 
                                    const fmpz * f, slong len, 
                                    const fmpz_t anum, const fmpz_t aden)

    Evaluates the polynomial \code{(f, len)} at the rational 
    \code{(anum, aden)} using Horner's rule, and sets \code{(rnum, rden)} to 
    the result in lowest terms.  
    
    Aliasing between \code{(rnum, rden)} and \code{(anum, aden)} or any of 
    the coefficients of $f$ is not supported.

void fmpz_poly_evaluate_horner_mpq(mpq_t res, const fmpz_poly_t f, 
                                                                 const mpq_t a)

    Evaluates the polynomial $f$ at the rational $a$ using Horner's rule, and 
    sets \code{res} to the result.

void _fmpz_poly_evaluate_mpq(fmpz_t rnum, fmpz_t rden, 
                             const fmpz * f, slong len, 
                             const fmpz_t anum, const fmpz_t aden)

    Evaluates the polynomial \code{(f, len)} at the rational 
    \code{(anum, aden)} and sets \code{(rnum, rden)} to the result in lowest 
    terms.

    Aliasing between \code{(rnum, rden)} and \code{(anum, aden)} or any of 
    the coefficients of $f$ is not supported.

void fmpz_poly_evaluate_mpq(mpq_t res, const fmpz_poly_t f, const mpq_t a)

    Evaluates the polynomial $f$ at the rational $a$ and sets \code{res} to 
    the result.

mp_limb_t _fmpz_poly_evaluate_mod(const fmpz * poly, slong len, mp_limb_t a, 
                                                   mp_limb_t n, mp_limb_t ninv)

    Evaluates \code{(poly, len)} at the value $a$ modulo $n$ and 
    returns the result.  The last argument \code{ninv} must be set 
    to the precomputed inverse of $n$, which can be obtained using 
    the function \code{n_preinvert_limb()}.

mp_limb_t fmpz_poly_evaluate_mod(const fmpz_poly_t poly, mp_limb_t a, 
                                                                   mp_limb_t n)

    Evaluates \code{poly} at the value $a$ modulo $n$ and returns the result. 

void fmpz_poly_evaluate_fmpz_vec(fmpz * res, const fmpz_poly_t f,
                                                const fmpz * a, slong n)

    Evaluates \code{f} at the $n$ values given in the vector \code{f},
    writing the results to \code{res}.

*******************************************************************************

    Newton basis

*******************************************************************************

void _fmpz_poly_monomial_to_newton(fmpz * poly, const fmpz * roots, slong n)

    Converts \code{(poly, n)} in-place from its coefficients given
    in the standard monomial basis to the Newton basis
    for the roots $r_0, r_1, \ldots, r_{n-2}$.
    In other words, this determines output coefficients $c_i$ such that
    $$c_0 + c_1(x-r_0) + c_2(x-r_0)(x-r_1) + \ldots +
        c_{n-1}(x-r_0)(x-r_1)\cdots(x-r_{n-2})$$
    is equal to the input polynomial.
    Uses repeated polynomial division.

void _fmpz_poly_newton_to_monomial(fmpz * poly, const fmpz * roots, slong n)

    Converts \code{(poly, n)} in-place from its coefficients given
    in the Newton basis for the roots $r_0, r_1, \ldots, r_{n-2}$
    to the standard monomial basis. In other words, this evaluates
    $$c_0 + c_1(x-r_0) + c_2(x-r_0)(x-r_1) + \ldots +
        c_{n-1}(x-r_0)(x-r_1)\cdots(x-r_{n-2})$$
    where $c_i$ are the input coefficients for \code{poly}.
    Uses Horner's rule.

*******************************************************************************

    Interpolation

*******************************************************************************

void
fmpz_poly_interpolate_fmpz_vec(fmpz_poly_t poly,
                                    const fmpz * xs, const fmpz * ys, slong n)

    Sets \code{poly} to the unique interpolating polynomial of degree at
    most $n - 1$ satisfying $f(x_i) = y_i$ for every pair $x_i, y_u$ in
    \code{xs} and \code{ys}, assuming that this polynomial has integer
    coefficients.

    If an interpolating polynomial with integer coefficients does not
    exist, the result is undefined.

    It is assumed that the $x$ values are distinct.

*******************************************************************************

    Composition

*******************************************************************************

void _fmpz_poly_compose_horner(fmpz * res, 
                const fmpz * poly1, slong len1, const fmpz * poly2, slong len2)

    Sets \code{res} to the composition of \code{(poly1, len1)} and 
    \code{(poly2, len2)}.

    Assumes that \code{res} has space for \code{(len1-1)*(len2-1) + 1} 
    coefficients.  Assumes that \code{poly1} and \code{poly2} are non-zero 
    polynomials.  Does not support aliasing between any of the inputs and 
    the output.

void fmpz_poly_compose_horner(fmpz_poly_t res, const fmpz_poly_t poly1, 
                                                       const fmpz_poly_t poly2)

    Sets \code{res} to the composition of \code{poly1} and \code{poly2}.  
    To be more precise, denoting \code{res}, \code{poly1}, and \code{poly2} 
    by $f$, $g$, and $h$, sets $f(t) = g(h(t))$.

    This implementation uses Horner's method.

void _fmpz_poly_compose_divconquer(fmpz * res, const fmpz * poly1, slong len1, 
                                               const fmpz * poly2, slong len2)

    Computes the composition of \code{(poly1, len1)} and \code{(poly2, len2)} 
    using a divide and conquer approach and places the result into \code{res}, 
    assuming \code{res} can hold the output of length 
    \code{(len1 - 1) * (len2 - 1) + 1}.

    Assumes \code{len1, len2 > 0}.  Does not support aliasing between 
    \code{res} and any of \code{(poly1, len1)} and \code{(poly2, len2)}.

void fmpz_poly_compose_divconquer(fmpz_poly_t res, const fmpz_poly_t poly1,
                                                   const fmpz_poly_t poly2)

    Sets \code{res} to the composition of \code{poly1} and \code{poly2}.  
    To be precise about the order of composition, denoting \code{res}, 
    \code{poly1}, and \code{poly2} by $f$, $g$, and $h$, respectively, 
    sets $f(t) = g(h(t))$.

void _fmpz_poly_compose(fmpz * res, 
                const fmpz * poly1, slong len1, const fmpz * poly2, slong len2)

    Sets \code{res} to the composition of \code{(poly1, len1)} and 
    \code{(poly2, len2)}.  

    Assumes that \code{res} has space for \code{(len1-1)*(len2-1) + 1} 
    coefficients.  Assumes that \code{poly1} and \code{poly2} are non-zero 
    polynomials.  Does not support aliasing between any of the inputs and 
    the output.

void fmpz_poly_compose(fmpz_poly_t res, const fmpz_poly_t poly1, 
                                                       const fmpz_poly_t poly2)

    Sets \code{res} to the composition of \code{poly1} and \code{poly2}.  
    To be precise about the order of composition, denoting \code{res}, 
    \code{poly1}, and \code{poly2} by $f$, $g$, and $h$, respectively, 
    sets $f(t) = g(h(t))$.

*******************************************************************************

    Taylor shift

*******************************************************************************

void _fmpz_poly_taylor_shift_horner(fmpz * poly, const fmpz_t c, slong n)

    Performs the Taylor shift composing \code{poly} by $x+c$ in-place.
    Uses an efficient version Horner's rule.

void fmpz_poly_taylor_shift_horner(fmpz_poly_t g, const fmpz_poly_t f,
    const fmpz_t c)

    Performs the Taylor shift composing \code{f} by $x+c$.

void _fmpz_poly_taylor_shift_divconquer(fmpz * poly, const fmpz_t c, slong n)

    Performs the Taylor shift composing \code{poly} by $x+c$ in-place.
    Uses the divide-and-conquer polynomial composition algorithm.

void fmpz_poly_taylor_shift_divconquer(fmpz_poly_t g, const fmpz_poly_t f,
    const fmpz_t c)

    Performs the Taylor shift composing \code{f} by $x+c$.
    Uses the divide-and-conquer polynomial composition algorithm.

void _fmpz_poly_taylor_shift(fmpz * poly, const fmpz_t c, slong n)

    Performs the Taylor shift composing \code{poly} by $x+c$ in-place.

void fmpz_poly_taylor_shift(fmpz_poly_t g, const fmpz_poly_t f, const fmpz_t c)

    Performs the Taylor shift composing \code{f} by $x+c$.

*******************************************************************************

    Power series composition

*******************************************************************************

void _fmpz_poly_compose_series_horner(fmpz * res,
      const fmpz * poly1, slong len1, const fmpz * poly2, slong len2, slong n)

    Sets \code{res} to the composition of \code{poly1} and \code{poly2}
    modulo $x^n$, where the constant term of \code{poly2} is required
    to be zero.

    Assumes that \code{len1, len2, n > 0}, that \code{len1, len2 <= n},
    and that\\ \code{(len1-1) * (len2-1) + 1 <= n}, and that \code{res} has
    space for \code{n} coefficients. Does not support aliasing between any
    of the inputs and the output.

    This implementation uses the Horner scheme.

void fmpz_poly_compose_series_horner(fmpz_poly_t res, 
                    const fmpz_poly_t poly1, const fmpz_poly_t poly2, slong n)

    Sets \code{res} to the composition of \code{poly1} and \code{poly2}
    modulo $x^n$, where the constant term of \code{poly2} is required
    to be zero.

    This implementation uses the Horner scheme.

void _fmpz_poly_compose_series_brent_kung(fmpz * res, const fmpz * poly1,
        slong len1, const fmpz * poly2, slong len2, slong n)

    Sets \code{res} to the composition of \code{poly1} and \code{poly2}
    modulo $x^n$, where the constant term of \code{poly2} is required
    to be zero.

    Assumes that \code{len1, len2, n > 0}, that \code{len1, len2 <= n},
    and that\\ \code{(len1-1) * (len2-1) + 1 <= n}, and that \code{res} has
    space for \code{n} coefficients. Does not support aliasing between any
    of the inputs and the output.

    This implementation uses Brent-Kung algorithm 2.1 \cite{BrentKung1978}.

void fmpz_poly_compose_series_brent_kung(fmpz_poly_t res, 
                const fmpz_poly_t poly1, const fmpz_poly_t poly2, slong n)

    Sets \code{res} to the composition of \code{poly1} and \code{poly2}
    modulo $x^n$, where the constant term of \code{poly2} is required
    to be zero.

    This implementation uses Brent-Kung algorithm 2.1 \cite{BrentKung1978}.

void _fmpz_poly_compose_series(fmpz * res, const fmpz * poly1, slong len1, 
                                      const fmpz * poly2, slong len2, slong n)

    Sets \code{res} to the composition of \code{poly1} and \code{poly2}
    modulo $x^n$, where the constant term of \code{poly2} is required
    to be zero.

    Assumes that \code{len1, len2, n > 0}, that \code{len1, len2 <= n},
    and that\\ \code{(len1-1) * (len2-1) + 1 <= n}, and that \code{res} has
    space for \code{n} coefficients. Does not support aliasing between any
    of the inputs and the output.

    This implementation automatically switches between the Horner scheme
    and Brent-Kung algorithm 2.1 depending on the size of the inputs.

void fmpz_poly_compose_series(fmpz_poly_t res, 
                    const fmpz_poly_t poly1, const fmpz_poly_t poly2, slong n)

    Sets \code{res} to the composition of \code{poly1} and \code{poly2}
    modulo $x^n$, where the constant term of \code{poly2} is required
    to be zero.

    This implementation automatically switches between the Horner scheme
    and Brent-Kung algorithm 2.1 depending on the size of the inputs.

*******************************************************************************

    Power series reversion

*******************************************************************************

void _fmpz_poly_revert_series_lagrange(fmpz * Qinv, const fmpz * Q, slong n)

    Sets \code{Qinv} to the compositional inverse or reversion of \code{Q}
    as a power series, i.e. computes $Q^{-1}$ such that
    $Q(Q^{-1}(x)) = Q^{-1}(Q(x)) = x \bmod x^n$. The arguments must
    both have length \code{n} and may not be aliased.
    It is required that $Q_0 = 0$ and $Q_1 = \pm 1$.

    This implementation uses the Lagrange inversion formula.

void fmpz_poly_revert_series_lagrange(fmpz_poly_t Qinv,
            const fmpz_poly_t Q, slong n)

    Sets \code{Qinv} to the compositional inverse or reversion of \code{Q}
    as a power series, i.e. computes $Q^{-1}$ such that
    $Q(Q^{-1}(x)) = Q^{-1}(Q(x)) = x \bmod x^n$.
    It is required that $Q_0 = 0$ and $Q_1 = \pm 1$.

    This implementation uses the Lagrange inversion formula.

void _fmpz_poly_revert_series_lagrange_fast(fmpz * Qinv,
        const fmpz * Q, slong n)

    Sets \code{Qinv} to the compositional inverse or reversion of \code{Q}
    as a power series, i.e. computes $Q^{-1}$ such that
    $Q(Q^{-1}(x)) = Q^{-1}(Q(x)) = x \bmod x^n$. The arguments must
    both have length \code{n} and may not be aliased.
    It is required that $Q_0 = 0$ and $Q_1 = \pm 1$.

    This implementation uses a reduced-complexity implementation
    of the Lagrange inversion formula.

void fmpz_poly_revert_series_lagrange_fast(fmpz_poly_t Qinv,
            const fmpz_poly_t Q, slong n)

    Sets \code{Qinv} to the compositional inverse or reversion of \code{Q}
    as a power series, i.e. computes $Q^{-1}$ such that
    $Q(Q^{-1}(x)) = Q^{-1}(Q(x)) = x \bmod x^n$.
    It is required that $Q_0 = 0$ and $Q_1 = \pm 1$.

    This implementation uses a reduced-complexity implementation
    of the Lagrange inversion formula.

void _fmpz_poly_revert_series_newton(fmpz * Qinv, const fmpz * Q, slong n)

    Sets \code{Qinv} to the compositional inverse or reversion of \code{Q}
    as a power series, i.e. computes $Q^{-1}$ such that
    $Q(Q^{-1}(x)) = Q^{-1}(Q(x)) = x \bmod x^n$. The arguments must
    both have length \code{n} and may not be aliased.
    It is required that $Q_0 = 0$ and $Q_1 = \pm 1$.

    This implementation uses Newton iteration \cite{BrentKung1978}.

void fmpz_poly_revert_series_newton(fmpz_poly_t Qinv,
        const fmpz_poly_t Q, slong n)

    Sets \code{Qinv} to the compositional inverse or reversion of \code{Q}
    as a power series, i.e. computes $Q^{-1}$ such that
    $Q(Q^{-1}(x)) = Q^{-1}(Q(x)) = x \bmod x^n$.
    It is required that $Q_0 = 0$ and $Q_1 = \pm 1$.

    This implementation uses Newton iteration \cite{BrentKung1978}.

void _fmpz_poly_revert_series(fmpz * Qinv, const fmpz * Q, slong n)

    Sets \code{Qinv} to the compositional inverse or reversion of \code{Q}
    as a power series, i.e. computes $Q^{-1}$ such that
    $Q(Q^{-1}(x)) = Q^{-1}(Q(x)) = x \bmod x^n$. The arguments must
    both have length \code{n} and may not be aliased.
    It is required that $Q_0 = 0$ and $Q_1 = \pm 1$.

    This implementation defaults to the fast version of
    Lagrange interpolation.

void fmpz_poly_revert_series(fmpz_poly_t Qinv, const fmpz_poly_t Q, slong n)

    Sets \code{Qinv} to the compositional inverse or reversion of \code{Q}
    as a power series, i.e. computes $Q^{-1}$ such that
    $Q(Q^{-1}(x)) = Q^{-1}(Q(x)) = x \bmod x^n$.
    It is required that $Q_0 = 0$ and $Q_1 = \pm 1$.

    This implementation defaults to the fast version of
    Lagrange interpolation.

*******************************************************************************

    Square root

*******************************************************************************

int _fmpz_poly_sqrt_classical(fmpz * res, const fmpz * poly, slong len)

    If \code{(poly, len)} is a perfect square, sets \code{(res, len / 2 + 1)}
    to the square root of \code{poly} with positive leading coefficient
    and returns 1. Otherwise returns 0.

    This function first uses various tests to detect nonsquares quickly.
    Then, it computes the square root iteratively from top to bottom,
    requiring $O(n^2)$ coefficient operations.

int fmpz_poly_sqrt_classical(fmpz_poly_t b, const fmpz_poly_t a)

    If \code{a} is a perfect square, sets \code{b} to the square root of
    \code{a} with positive leading coefficient and returns 1.
    Otherwise returns 0.

int _fmpz_poly_sqrt(fmpz * res, const fmpz * poly, slong len)

    If \code{(poly, len)} is a perfect square, sets \code{(res, len / 2 + 1)}
    to the square root of \code{poly} with positive leading coefficient
    and returns 1. Otherwise returns 0.

int fmpz_poly_sqrt(fmpz_poly_t b, const fmpz_poly_t a)

    If \code{a} is a perfect square, sets \code{b} to the square root of
    \code{a} with positive leading coefficient and returns 1.
    Otherwise returns 0.

*******************************************************************************

    Signature

*******************************************************************************

void _fmpz_poly_signature(slong * r1, slong * r2, const fmpz * poly, slong len)

    Computes the signature $(r_1, r_2)$ of the polynomial 
    \code{(poly, len)}.  Assumes that the polynomial is squarefree over~$\Q$.

void fmpz_poly_signature(slong * r1, slong * r2, const fmpz_poly_t poly)

    Computes the signature $(r_1, r_2)$ of the polynomial \code{poly}, 
    which is assumed to be square-free over~$\Q$.  The values of $r_1$ and 
    $2 r_2$ are the number of real and complex roots of the polynomial, 
    respectively.  For convenience, the zero polynomial is allowed, in which 
    case the output is $(0, 0)$.

    If the polynomial is not square-free, the behaviour is undefined and an 
    exception may be raised.

    This function uses the algorithm described 
    in~\citep[Algorithm~4.1.11]{Coh1996}.

*******************************************************************************

    Hensel lifting

*******************************************************************************

void fmpz_poly_hensel_build_tree(slong * link, fmpz_poly_t *v, fmpz_poly_t *w, 
                                 const nmod_poly_factor_t fac)

    Initialises and builds a Hensel tree consisting of two arrays $v$, $w$ 
    of polynomials an array of links, called \code{link}.

    The caller supplies a set of $r$ local factors (in the factor structure 
    \code{fac}) of some polynomial $F$ over $\mathbf{Z}$. They also supply 
    two arrays of initialised polynomials $v$ and $w$, each of length 
    $2r - 2$ and an array \code{link}, also of length $2r - 2$.

    We will have five arrays: a $v$ of \code{fmpz_poly_t}'s and a $V$ of 
    \code{nmod_poly_t}'s and also a $w$ and a $W$ and \code{link}.  Here's 
    the idea: we sort each leaf and node of a factor tree by degree, in 
    fact choosing to multiply the two smallest factors, then the next two 
    smallest (factors or products) etc.\ until a tree is made.  The tree 
    will be stored in the $v$'s. The first two elements of $v$ will be the 
    smallest modular factors, the last two elements of $v$ will multiply to 
    form $F$ itself.  Since $v$ will be rearranging the original factors we 
    will need to be able to recover the original order. For this we use the 
    array \code{link} which has nonnegative even numbers and negative numbers. 
    It is an array of \code{slong}'s which aligns with $V$ and $v$ if 
    \code{link} has a negative number in spot $j$ that means $V_j$ is an 
    original modular factor which has been lifted, if \code{link[j]} is a 
    nonnegative even number then $V_j$ stores a product of the two entries 
    at \code{V[link[j]]} and \code{V[link[j]+1]}.  
    $W$ and $w$ play the role of the extended GCD, at $V_0$, $V_2$, $V_4$, 
    etc.\ we have a new product, $W_0$, $W_2$, $W_4$, etc.\ are the XGCD 
    cofactors of the $V$'s. For example, 
    $V_0 W_0 + V_1 W_1 \equiv 1 \pmod{p^{\ell}}$ for some $\ell$.  These 
    will be lifted along with the entries in $V$.  It is not enough to just 
    lift each factor, we have to lift the entire tree and the tree of 
    XGCD cofactors.

void fmpz_poly_hensel_lift(fmpz_poly_t G, fmpz_poly_t H, 
    fmpz_poly_t A, fmpz_poly_t B, 
    const fmpz_poly_t f, 
    const fmpz_poly_t g, const fmpz_poly_t h, 
    const fmpz_poly_t a, const fmpz_poly_t b, 
    const fmpz_t p, const fmpz_t p1)

    This is the main Hensel lifting routine, which performs a Hensel step
    from polynomials mod $p$ to polynomials mod $P = p p_1$. One starts with 
    polynomials $f$, $g$, $h$ such that $f = gh \pmod p$. The polynomials 
    $a$, $b$ satisfy $ag + bh = 1 \pmod p$. 

    The lifting formulae are 
    \begin{align*}
    G & = \biggl( \bigl( \frac{f-gh}{p} \bigr) b \bmod g \biggr) p + g \\
    H & = \biggl( \bigl( \frac{f-gh}{p} \bigr) a \bmod h \biggr) p + h \\
    B & = \biggl( \bigl( \frac{1-aG-bH}{p} \bigr) b \bmod g \biggr) p + b \\
    A & = \biggl( \bigl( \frac{1-aG-bH}{p} \bigr) a \bmod h \biggr) p + a.
    \end{align*}

    Upon return we have $A G + B H = 1 \pmod P$ and $f = G H \pmod P$, 
    where $G = g \pmod p$ etc.

    We require that $1 < p_1 \leq p$ and that the input polynomials $f, g, h$ 
    have degree at least~$1$ and that the input polynomials $a$ and $b$ are 
    non-zero.

    The output arguments $G, H, A, B$ may only be aliased with 
    the input arguments $g, h, a, b$, respectively.

void fmpz_poly_hensel_lift_without_inverse(fmpz_poly_t Gout, fmpz_poly_t Hout, 
    const fmpz_poly_t f, const fmpz_poly_t g, const fmpz_poly_t h, 
    const fmpz_poly_t a, const fmpz_poly_t b, 
    const fmpz_t p, const fmpz_t p1)

    Given polynomials such that $f = gh \pmod p$ and $ag + bh = 1 \pmod p$, 
    lifts only the factors $g$ and $h$ modulo $P = p p_1$.

    See \code{fmpz_poly_hensel_lift()}.

void fmpz_poly_hensel_lift_only_inverse(fmpz_poly_t Aout, fmpz_poly_t Bout, 
    const fmpz_poly_t G, const fmpz_poly_t H, 
    const fmpz_poly_t a, const fmpz_poly_t b, 
    const fmpz_t p, const fmpz_t p1)

    Given polynomials such that $f = gh \pmod p$ and $ag + bh = 1 \pmod p$, 
    lifts only the cofactors $a$ and $b$ modulo $P = p p_1$.

    See \code{fmpz_poly_hensel_lift()}.

void fmpz_poly_hensel_lift_tree_recursive(slong *link, 
    fmpz_poly_t *v, fmpz_poly_t *w, fmpz_poly_t f, slong j, slong inv, 
    const fmpz_t p0, const fmpz_t p1)

    Takes a current Hensel tree \code{(link, v, w)} and a pair $(j,j+1)$ 
    of entries in the tree and lifts the tree from mod $p_0$ to 
    mod $P = p_0 p_1$, where $1 < p_1 \leq p_0$.

    Set \code{inv} to $-1$ if restarting Hensel lifting, $0$ if stopping 
    and $1$ otherwise. 

    Here $f = g h$ is the polynomial whose factors we are trying to lift. 
    We will have that \code{v[j]} is the product of \code{v[link[j]]} and 
    \code{v[link[j] + 1]} as described above.

    Does support aliasing of $f$ with one of the polynomials in 
    the lists $v$ and $w$.  But the polynomials in these two lists 
    are not allowed to be aliases of each other.

void fmpz_poly_hensel_lift_tree(slong *link, fmpz_poly_t *v, fmpz_poly_t *w, 
    fmpz_poly_t f, slong r, const fmpz_t p, slong e0, slong e1, slong inv)

    Computes $p_0 = p^{e_0}$ and $p_1 = p^{e_1 - e_0}$ for a small prime $p$ 
    and $P = p^{e_1}$.

    If we aim to lift to $p^b$ then $f$ is the polynomial whose factors we 
    wish to lift, made monic mod $p^b$. As usual, \code{(link, v, w)} is an 
    initialised tree.

    This starts the recursion on lifting the \emph{product tree} for lifting 
    from $p^{e_0}$ to $p^{e_1}$. The value of \code{inv} corresponds to that 
    given for the function \code{fmpz_poly_hensel_lift_tree_recursive()}. We 
    set $r$ to the number of local factors of $f$.

    In terms of the notation, above $P = p^{e_1}$, $p_0 = p^{e_0}$ and 
    $p_1 = p^{e_1-e_0}$.

    Assumes that $f$ is monic.

    Assumes that $1 < p_1 \leq p_0$, that is, $0 < e_1 \leq e_0$.

slong _fmpz_poly_hensel_start_lift(fmpz_poly_factor_t lifted_fac, slong *link, 
    fmpz_poly_t *v, fmpz_poly_t *w, const fmpz_poly_t f, 
    const nmod_poly_factor_t local_fac, slong N)

    This function takes the local factors in \code{local_fac} 
    and Hensel lifts them until they are known mod $p^N$, where 
    $N \geq 1$.

    These lifted factors will be stored (in the same ordering) in 
    \code{lifted_fac}. It is assumed that \code{link}, \code{v}, and 
    \code{w} are initialized arrays \code{fmpz_poly_t}'s with at least 
    $2*r - 2$ entries and that $r \geq 2$.  This is done outside of 
    this function so that you can keep them for restarting Hensel lifting 
    later. The product of local factors must be squarefree.

    The return value is an exponent which must be passed to the function\\ 
    \code{_fmpz_poly_hensel_continue_lift()} as \code{prev_exp} if the 
    Hensel lifting is to be resumed.

    Currently, supports the case when $N = 1$ for convenience, 
    although it is preferable in this case to simple iterate 
    over the local factors and convert them to polynomials over 
    $\mathbf{Z}$.

slong _fmpz_poly_hensel_continue_lift(fmpz_poly_factor_t lifted_fac, 
    slong *link, fmpz_poly_t *v, fmpz_poly_t *w, const fmpz_poly_t f, 
    slong prev, slong curr, slong N, const fmpz_t p)

    This function restarts a stopped Hensel lift.

    It lifts from \code{curr} to $N$. It also requires \code{prev} 
    (to lift the cofactors) given as the return value of the function 
    \code{_fmpz_poly_hensel_start_lift()} or the function\\ 
    \code{_fmpz_poly_hensel_continue_lift()}. The current lifted factors 
    are supplied in \code{lifted_fac} and upon return are updated
    there. As usual \code{link}, \code{v}, and \code{w} describe the 
    current Hensel tree, $r$ is the number of local factors and $p$ is 
    the small prime modulo whose power we are lifting to. It is required 
    that \code{curr} be at least $1$ and that \code{N > curr}.

    Currently, supports the case when \code{prev} and \code{curr} 
    are equal.

void fmpz_poly_hensel_lift_once(fmpz_poly_factor_t lifted_fac, 
                                const fmpz_poly_t f, 
                                const nmod_poly_factor_t local_fac, slong N)

    This function does a Hensel lift. 

    It lifts local factors stored in \code{local_fac} of $f$ to $p^N$, 
    where $N \geq 2$. The lifted factors will be stored in \code{lifted_fac}. 
    This lift cannot be restarted. This function is a convenience function 
    intended for end users. The product of local factors must be squarefree.

*******************************************************************************

    Input and output

    The functions in this section are not intended to be particularly fast. 
    They are intended mainly as a debugging aid.

    For the string output functions there are two variants.  The first uses a 
    simple string representation of polynomials which prints only the length 
    of the polynomial and the integer coefficients, whilst the latter variant, 
    appended with \code{_pretty}, uses a more traditional string 
    representation of polynomials which prints a variable name as part of the 
    representation. 

    The first string representation is given by a sequence of integers, in 
    decimal notation, separated by white space.  The first integer gives the 
    length of the polynomial; the remaining integers are the coefficients. 
    For example $5x^3 - x + 1$ is represented by the string 
    \code{"4  1 -1 0 5"}, and the zero polynomial is represented by \code{"0"}.
    The coefficients may be signed and arbitrary precision.

    The string representation of the functions appended by \code{_pretty} 
    includes only the non-zero terms of the polynomial, starting with the 
    one of highest degree.  Each term starts with a coefficient, prepended 
    with a sign, followed by the character \code{*}, followed by a variable 
    name, which must be passed as a string parameter to the function, 
    followed by a carot \code{^} followed by a non-negative exponent.

    If the sign of the leading coefficient is positive, it is omitted. Also 
    the exponents of the degree $1$ and $0$ terms are omitted, as is the 
    variable and the \code{*} character in the case of the degree $0$ 
    coefficient.  If the coefficient is plus or minus one, the coefficient 
    is omitted, except for the sign.

    Some examples of the \code{_pretty} representation are:

    \begin{lstlisting}
    5*x^3+7*x-4
    x^2+3
    -x^4+2*x-1
    x+1
    5
    \end{lstlisting}

*******************************************************************************

int _fmpz_poly_print(const fmpz * poly, slong len)

    Prints the polynomial \code{(poly, len)} to \code{stdout}.

    In case of success, returns a positive value.  In case of failure, 
    returns a non-positive value.

int fmpz_poly_print(const fmpz_poly_t poly)

    Prints the polynomial to \code{stdout}.

    In case of success, returns a positive value.  In case of failure, 
    returns a non-positive value.

int _fmpz_poly_print_pretty(const fmpz * poly, slong len, const char * x)

    Prints the pretty representation of \code{(poly, len)} to \code{stdout},
    using the string \code{x} to represent the indeterminate.

    In case of success, returns a positive value.  In case of failure, 
    returns a non-positive value.

int fmpz_poly_print_pretty(const fmpz_poly_t poly, const char * x)

    Prints the pretty representation of \code{poly} to \code{stdout},
    using the string \code{x} to represent the indeterminate.

    In case of success, returns a positive value.  In case of failure, 
    returns a non-positive value.

int _fmpz_poly_fprint(FILE * file, const fmpz * poly, slong len)

    Prints the polynomial \code{(poly, len)} to the stream \code{file}.

    In case of success, returns a positive value.  In case of failure, 
    returns a non-positive value.

int fmpz_poly_fprint(FILE * file, const fmpz_poly_t poly)

    Prints the polynomial to the stream \code{file}.

    In case of success, returns a positive value.  In case of failure, 
    returns a non-positive value.

int _fmpz_poly_fprint_pretty(FILE * file, 
                                       const fmpz * poly, slong len, char * x)

    Prints the pretty representation of \code{(poly, len)} to the stream 
    \code{file}, using the string \code{x} to represent the indeterminate.

    In case of success, returns a positive value.  In case of failure, 
    returns a non-positive value.

int fmpz_poly_fprint_pretty(FILE * file, const fmpz_poly_t poly, char * x)

    Prints the pretty representation of \code{poly} to the stream \code{file}, 
    using the string \code{x} to represent the indeterminate.

    In case of success, returns a positive value.  In case of failure, 
    returns a non-positive value.

int fmpz_poly_read(fmpz_poly_t poly)

    Reads a polynomial from \code{stdin}, storing the result in \code{poly}.

    In case of success, returns a positive number.  In case of failure, 
    returns a non-positive value.

int fmpz_poly_read_pretty(fmpz_poly_t poly, char **x)

    Reads a polynomial in pretty format from \code{stdin}.

    For further details, see the documentation for the function 
    \code{fmpz_poly_fread_pretty()}.

int fmpz_poly_fread(FILE * file, fmpz_poly_t poly)

    Reads a polynomial from the stream \code{file}, storing the result 
    in \code{poly}.

    In case of success, returns a positive number.  In case of failure, 
    returns a non-positive value.

int fmpz_poly_fread_pretty(FILE *file, fmpz_poly_t poly, char **x)

    Reads a polynomial from the file \code{file} and sets \code{poly} 
    to this polynomial.  The string \code{*x} is set to the variable 
    name that is used in the input.

    The parser is implemented via a finite state machine as follows:
    \begin{verbatim}
        state   event     next state
        ----------------------------
          0      '-'          1
                 D            2
                 V0           3
          1      D            2
                 V0           3
          2      D            2
                 '*'          4
                 '+', '-'     1
          3      V            3
                 '^'          5
                 '+', '-'     1
          4      V0           3
          5      D            6
          6      D            6
                 '+', '-'     1
    \end{verbatim}
    Here, {\tt D} refers to any digit, {\tt V0} to any character which 
    is allowed as the first character in the variable name (an alphetic 
    character), and {\tt V} to any character which is allowed in the 
    remaining part of the variable name (an alphanumeric character or 
    underscore).

    Once we encounter a character which does not fit into the above 
    pattern, we stop.

    Returns a positive value, equal to the number of characters read from 
    the file, in case of success.  Returns a non-positive value in case of 
    failure, which could either be a read error or the indicator of a 
    malformed input.

*******************************************************************************

    Modular reduction and reconstruction

*******************************************************************************

void fmpz_poly_get_nmod_poly(nmod_poly_t Amod, fmpz_poly_t A)

    Sets the coefficients of \code{Amod} to the coefficients in \code{A},
    reduced by the modulus of \code{Amod}.

void fmpz_poly_set_nmod_poly(fmpz_poly_t A, const nmod_poly_t Amod)

    Sets the coefficients of \code{A} to the residues in \code{Amod},
    normalised to the interval $-m/2 \le r < m/2$ where $m$ is the modulus.

void fmpz_poly_set_nmod_poly_unsigned(fmpz_poly_t A, const nmod_poly_t Amod)

    Sets the coefficients of \code{A} to the residues in \code{Amod},
    normalised to the interval $0 \le r < m$ where $m$ is the modulus.

void
_fmpz_poly_CRT_ui_precomp(fmpz * res, const fmpz * poly1, slong len1,
               const fmpz_t m1, mp_srcptr poly2, slong len2, mp_limb_t m2,
                mp_limb_t m2inv, fmpz_t m1m2, mp_limb_t c, int sign)

    Sets the coefficients in \code{res} to the CRT reconstruction modulo
    $m_1m_2$ of the residues \code{(poly1, len1)} and \code{(poly2, len2)}
    which are images modulo $m_1$ and $m_2$ respectively.
    The caller must supply the precomputed product of the input moduli as
    $m_1m_2$, the inverse of $m_1$ modulo $m_2$ as $c$, and
    the precomputed inverse of $m_2$ (in the form computed by
    \code{n_preinvert_limb}) as \code{m2inv}.

    If \code{sign} = 0, residues $0 <= r < m_1 m_2$ are computed, while
    if \code{sign} = 1, residues $-m_1 m_2/2 <= r < m_1 m_2/2$ are computed.

    Coefficients of \code{res} are written up to the maximum of
    \code{len1} and \code{len2}.

void
_fmpz_poly_CRT_ui(fmpz * res, const fmpz * poly1, slong len1,
               const fmpz_t m1, mp_srcptr poly2, slong len2, mp_limb_t m2,
                mp_limb_t m2inv, int sign)

    This function is identical to \code{_fmpz_poly_CRT_ui_precomp},
    apart from automatically computing $m_1m_2$ and $c$. It also
    aborts if $c$ cannot be computed.

void fmpz_poly_CRT_ui(fmpz_poly_t res, const fmpz_poly_t poly1,
                        const fmpz_t m, const nmod_poly_t poly2, int sign)

    Given \code{poly1} with coefficients modulo \code{m} and \code{poly2}
    with modulus $n$, sets \code{res} to the CRT reconstruction modulo $mn$
    with coefficients satisfying $-mn/2 \le c < mn/2$ (if sign = 1)
    or $0 \le c < mn$ (if sign = 0).

*******************************************************************************

    Products

*******************************************************************************

void _fmpz_poly_product_roots_fmpz_vec(fmpz * poly, const fmpz * xs, slong n)

    Sets \code{(poly, n + 1)} to the monic polynomial which is the product
    of $(x - x_0)(x - x_1) \cdots (x - x_{n-1})$, the roots $x_i$ being
    given by \code{xs}.

    Aliasing of the input and output is not allowed.


void fmpz_poly_product_roots_fmpz_vec(fmpz_poly_t poly,
        const fmpz * xs, slong n)

    Sets \code{poly} to the monic polynomial which is the product
    of $(x - x_0)(x - x_1) \cdots (x - x_{n-1})$, the roots $x_i$ being
    given by \code{xs}.

*******************************************************************************

    Newton basis conversion

*******************************************************************************

void _fmpz_poly_monomial_to_newton(fmpz * poly, const fmpz * roots, slong n)

    Converts the polynomial in-place from its coefficients in the
    monomial basis to the Newton basis $1, (x-r_0), (x-r_0)(x-r_1), \ldots$.
    Uses Horner's rule, requiring $O(n^2)$ operations.

void _fmpz_poly_newton_to_monomial(fmpz * poly, const fmpz * roots, slong n)

    Converts the polynomial in-place from its coefficients in the
    Newton basis $1, (x-r_0), (x-r_0)(x-r_1), \ldots$ to the monomial
    basis. Uses repeated polynomial division, requiring $O(n^2)$ operations.

*******************************************************************************

    Roots

*******************************************************************************

void _fmpz_poly_bound_roots(fmpz_t bound, const fmpz * poly, slong len)

void fmpz_poly_bound_roots(fmpz_t bound, const fmpz_poly_t poly)

    Computes a nonnegative integer \code{bound} that bounds the absolute
    value of all complex roots of \code{poly}. Uses Fujiwara's bound

    $$
    2 \max \left(
        \left|\frac{a_{n-1}}{a_n}\right|,
        \left|\frac{a_{n-2}}{a_n}\right|^{\frac{1}{2}}, \dots
        \left|\frac{a_1}{a_n}\right|^{\frac{1}{n-1}},
        \left|\frac{a_0}{2a_n}\right|^{\frac{1}{n}}
    \right)
    $$

    where the coefficients of the polynomial are $a_0, \ldots, a_n$.

