\[
\begin{aligned}
u&=\begin{cases}
\left\lbrack x^{n-1}\right\rbrack\dfrac{U_e\left(x^2,y\right)}{V\left(x^2,y\right)}&\text{ if }n-1\text{ is even,} \\
\left\lbrack x^{n-1}\right\rbrack\dfrac{xU_o\left(x^2,y\right)}{V\left(x^2,y\right)}&\text{ if }n-1\text{ is odd.}
\end{cases} \\
&=\begin{cases}
\left\lbrack x^{\left\lceil n/2\right\rceil-1}\right\rbrack\dfrac{U_e\left(x,y\right)}{V\left(x,y\right)}&\text{ if }n-1\text{ is even,} \\
\left\lbrack x^{\left\lceil n/2\right\rceil-1}\right\rbrack\dfrac{U_o\left(x,y\right)}{V\left(x,y\right)}&\text{ if }n-1\text{ is odd.}
\end{cases}
\end{aligned}
\]
#include<algorithm>#include<cassert>#include<cstdio>#include<tuple>#include<utility>#include<vector>usinguint=unsigned;usingull=unsignedlonglong;constexpruintMOD=998244353;constexpruintPowMod(uinta,ulle){for(uintres=1;;a=(ull)a*a%MOD){if(e&1)res=(ull)res*a%MOD;if((e/=2)==0)returnres;}}constexpruintInvMod(uinta){returnPowMod(a,MOD-2);}constexpruintQUAD_NONRESIDUE=3;constexprintLOG2_ORD=23;// __builtin_ctz(MOD - 1)constexpruintZETA=PowMod(QUAD_NONRESIDUE,(MOD-1)>>LOG2_ORD);constexpruintINV_ZETA=InvMod(ZETA);// 返回做 n 长 FFT 所需的单位根数组,长度为一半std::pair<std::vector<uint>,std::vector<uint>>GetFFTRoot(intn){assert((n&(n-1))==0);if(n/2==0)return{};std::vector<uint>root(n/2),inv_root(n/2);root[0]=inv_root[0]=1;for(inti=0;(1<<i)<n/2;++i)root[1<<i]=PowMod(ZETA,1LL<<(LOG2_ORD-i-2)),inv_root[1<<i]=PowMod(INV_ZETA,1LL<<(LOG2_ORD-i-2));for(inti=1;i<n/2;++i)root[i]=(ull)root[i-(i&(i-1))]*root[i&(i-1)]%MOD,inv_root[i]=(ull)inv_root[i-(i&(i-1))]*inv_root[i&(i-1)]%MOD;return{root,inv_root};}voidButterfly(intn,uinta[],constuintroot[]){assert((n&(n-1))==0);for(inti=n;i>=2;i/=2)for(intj=0;j<n;j+=i)for(intk=j;k<j+i/2;++k){constuintu=a[k];a[k+i/2]=(ull)a[k+i/2]*root[j/i]%MOD;if((a[k]+=a[k+i/2])>=MOD)a[k]-=MOD;if((a[k+i/2]=u+MOD-a[k+i/2])>=MOD)a[k+i/2]-=MOD;}}voidInvButterfly(intn,uinta[],constuintroot[]){assert((n&(n-1))==0);for(inti=2;i<=n;i*=2)for(intj=0;j<n;j+=i)for(intk=j;k<j+i/2;++k){constuintu=a[k];if((a[k]+=a[k+i/2])>=MOD)a[k]-=MOD;a[k+i/2]=(ull)(u+MOD-a[k+i/2])*root[j/i]%MOD;}}voidFFT(intn,uinta[],constuintroot[]){Butterfly(n,a,root);}voidInvFFT(intn,uinta[],constuintroot[]){InvButterfly(n,a,root);constuintinv_n=InvMod(n);for(inti=0;i<n;++i)a[i]=(ull)a[i]*inv_n%MOD;}// 形式幂级数复合,求出 f(g) mod x^n 要求 g(0) = 0std::vector<uint>FPSComposition(std::vector<uint>f,std::vector<uint>g,intn){assert(g.empty()||g[0]==0);intlen=1;while(len<n)len*=2;std::vector<uint>root,inv_root;std::tie(root,inv_root)=GetFFTRoot(len*4);// [y^(-1)] (f(y) / (-g(x) + y)) mod x^n in R[x]((y^(-1)))autoKinoshitaLi=[&](auto&&KinoshitaLi,conststd::vector<uint>&P,conststd::vector<uint>&Q,intd,intn){assert((int)P.size()==d*n);assert((int)Q.size()==d*n);if(n==1)returnP;std::vector<uint>dftQ(d*n*4);for(inti=0;i<d;++i)for(intj=0;j<n;++j)dftQ[i*n*2+j]=Q[i*n+j];dftQ[d*n*2]=1;FFT(d*n*4,dftQ.data(),root.data());std::vector<uint>V(d*n*2);for(inti=0;i<d*n*4;i+=2)V[i/2]=(ull)dftQ[i]*dftQ[i+1]%MOD;InvFFT(d*n*2,V.data(),inv_root.data());if((V[0]+=MOD-1)>=MOD)V[0]-=MOD;for(inti=1;i<d*2;++i)for(intj=0;j<n/2;++j)V[i*(n/2)+j]=V[i*n+j];V.resize(d*n);conststd::vector<uint>T=KinoshitaLi(KinoshitaLi,P,V,d*2,n/2);std::vector<uint>dftT(d*n*2);for(inti=0;i<d*2;++i)for(intj=0;j<n/2;++j)dftT[i*n+j]=T[i*(n/2)+j];FFT(d*n*2,dftT.data(),root.data());for(inti=0;i<d*n*4;i+=2){constuintu=dftQ[i];dftQ[i]=(ull)dftT[i/2]*dftQ[i+1]%MOD;dftQ[i+1]=(ull)dftT[i/2]*u%MOD;}InvFFT(d*n*4,dftQ.data(),inv_root.data());for(inti=0;i<d;++i)for(intj=0;j<n;++j)dftQ[i*n+j]=dftQ[(i+d)*(n*2)+j];dftQ.resize(d*n);returndftQ;};f.resize(len);g.resize(len);for(inti=0;i<len;++i)g[i]=(g[i]!=0?MOD-g[i]:0);std::vector<uint>res=KinoshitaLi(KinoshitaLi,f,g,1,len);res.resize(n);returnres;}// Power Projection: [x^(n-1)] (fg^i) for i=0,..,n-1 要求 g(0) = 0std::vector<uint>PowerProjection(std::vector<uint>f,std::vector<uint>g,intn){assert(g.empty()||g[0]==0);intlen=1;while(len<n)len*=2;std::vector<uint>root,inv_root;std::tie(root,inv_root)=GetFFTRoot(len*4);// [x^(n-1)] (f(x) / (-g(x) + y)) in R[x]((y^(-1)))autoKinoshitaLi=[&](auto&&KinoshitaLi,std::vector<uint>P,std::vector<uint>Q,intd,intn)->std::vector<uint>{assert((int)P.size()==d*n);assert((int)Q.size()==d*n);if(n==1)returnP;std::vector<uint>dftQ(d*n*4),dftP(d*n*4);for(inti=0;i<d;++i)for(intj=0;j<n;++j)dftP[i*n*2+j]=P[i*n+j],dftQ[i*n*2+j]=Q[i*n+j];dftQ[d*n*2]=1;FFT(d*n*4,dftP.data(),root.data());FFT(d*n*4,dftQ.data(),root.data());P.resize(d*n*2);Q.resize(d*n*2);for(inti=0;i<d*n*4;i+=2){constuintu=(ull)dftP[i]*dftQ[i+1]%MOD;constuintv=(ull)dftP[i+1]*dftQ[i]%MOD;P[i/2]=(ull)(u+MOD-v)*inv_root[i/2]%MOD;if(P[i/2]&1)P[i/2]+=MOD;P[i/2]/=2;Q[i/2]=(ull)dftQ[i]*dftQ[i+1]%MOD;}InvFFT(d*n*2,P.data(),inv_root.data());InvFFT(d*n*2,Q.data(),inv_root.data());if((Q[0]+=MOD-1)>=MOD)Q[0]-=MOD;for(inti=1;i<d*2;++i)for(intj=0;j<n/2;++j)P[i*(n/2)+j]=P[i*n+j],Q[i*(n/2)+j]=Q[i*n+j];P.resize(d*n);Q.resize(d*n);returnKinoshitaLi(KinoshitaLi,P,Q,d*2,n/2);};f.insert(f.begin(),len-n,0);f.resize(len);g.resize(len);for(inti=0;i<len;++i)g[i]=(g[i]!=0?MOD-g[i]:0);std::vector<uint>res=KinoshitaLi(KinoshitaLi,f,g,1,len);std::reverse(res.begin(),res.end());res.resize(n);returnres;}// 形式幂级数幂函数,计算 g^e mod x^n 要求 g(0) = 1std::vector<uint>FPSPow1(std::vector<uint>g,uinte,intn){assert(!g.empty()&&g[0]==1);if(n==1)returnstd::vector<uint>{1u};std::vector<uint>inv(n),f(n);inv[1]=f[0]=1;for(inti=2;i<n;++i)inv[i]=(ull)(MOD-MOD/i)*inv[MOD%i]%MOD;for(inti=1;i<n;++i)f[i]=(ull)f[i-1]*(e+MOD+1-i)%MOD*inv[i]%MOD;g[0]=0;returnFPSComposition(f,g,n);}// 形式幂级数复合逆// 计算 g mod x^n 满足 g(f) = f(g) = x 要求 g(0) = 0 且 g'(0) ≠ 0std::vector<uint>FPSReversion(std::vector<uint>f,intn){assert(f.size()>=2&&f[0]==0&&f[1]!=0);if(n==1)returnstd::vector<uint>{0u};f.resize(n);constuintinvf1=InvMod(f[1]);uintinvf1p=1;for(inti=0;i<n;++i)f[i]=(ull)f[i]*invf1p%MOD,invf1p=(ull)invf1p*invf1%MOD;std::vector<uint>inv(n);inv[1]=1;for(inti=2;i<n;++i)inv[i]=(ull)(MOD-MOD/i)*inv[MOD%i]%MOD;std::vector<uint>proj=PowerProjection(std::vector<uint>{1u},f,n);for(inti=1;i<n;++i)proj[i]=(ull)proj[i]*(n-1)%MOD*inv[i]%MOD;std::reverse(proj.begin(),proj.end());std::vector<uint>res=FPSPow1(proj,InvMod(MOD+1-n),n-1);for(inti=0;i<n-1;++i)res[i]=(ull)res[i]*invf1%MOD;res.insert(res.begin(),0);returnres;}intmain(){intn;std::scanf("%d",&n);std::vector<uint>f(n);for(inti=0;i<n;++i)std::scanf("%u",&f[i]);conststd::vector<uint>res=FPSReversion(f,n);for(inti=0;i<n;++i)std::printf("%u%c",res[i]," \n"[i+1==n]);return0;}
由转置原理导出
Power Projection 问题是 Modular Composition 的转置,Kinoshita 和 Li 指出我们前文的复合算法可以由 Power Projection 算法直接转置得到。同样的,如果优化可以应用于 Power Projection 算法,其也可以应用于 Modular Composition 算法。我们省略细节。
Daniel J. Bernstein. "Fast multiplication and its applications." Pages 325–384 in Algorithmic number theory: lattices, number fields, curves and cryptography, edited by Joe Buhler, Peter Stevenhagen, Cambridge University Press, 2008, ISBN 978-0521808545.