/* A Better Polynomial Template by zzq */ #pragma GCC optimize("-Ofast","-funroll-all-loops","-ffast-math") #pragma GCC optimize("-fno-math-errno") #pragma GCC optimize("-funsafe-math-optimizations") #pragma GCC optimize("-freciprocal-math") #pragma GCC optimize("-fno-trapping-math") #pragma GCC optimize("-ffinite-math-only") #pragma GCC optimize("-fno-stack-protector") #pragma GCC target ("avx2","sse4.2","fma") #include using namespace std; #define pb push_back #define mp make_pair typedef long long ll; #define fi first #define se second #define SS 524288 //max length of DFT #define PS (SS*20+1000) //pool for temp arrays #define PS2 (SS*7+1000) //another pool, see ocmul #define FS 666666 //length of fac, rfac const int MOD=998244353; ll qp(ll a,ll b) { ll ans=1; a%=MOD; while(b) { if(b&1) ans=ans*a%MOD; a=a*a%MOD; b>>=1; } return ans; } int getK(int n) {int s=1; while(sj) swap(p[i],p[j]); for(int l=t>>1;(j^=l)>=1);}} void prep(int n){static int t=1;rv[1]=1;for(;t>32)*ull(998244353);} #ifdef USE_INTRINSICS #warning intrinsics is on! #include inline __m128i my_mullo_epu32(const __m128i&a,const __m128i& b){ #if defined ( __SSE4_2__ ) return _mm_mullo_epi32(a,b); #else __m128i a13=_mm_shuffle_epi32(a,0xF5),b13=_mm_shuffle_epi32(b,0xF5), prod02=_mm_mul_epu32(a,b),prod13=_mm_mul_epu32(a13,b13), prod=_mm_unpacklo_epi64(_mm_unpacklo_epi32(prod02,prod13), _mm_unpackhi_epi32(prod02,prod13));return prod; #endif }inline __m128i my_mulhi_epu32(const __m128i&a,const __m128i& b){ __m128i a13=_mm_shuffle_epi32(a,0xF5),b13=_mm_shuffle_epi32(b,0xF5), prod02=_mm_mul_epu32(a,b),prod13=_mm_mul_epu32(a13, b13), prod=_mm_unpackhi_epi64(_mm_unpacklo_epi32(prod02,prod13), _mm_unpackhi_epi32(prod02,prod13));return prod;} #endif void ntt(us* x,int n,bool f=true){prep(n); int t=n; for(int m=1;m>=1;us *p=p0[m],*q=q0[m]; #ifdef USE_INTRINSICS if(t==1||t==2){ #endif us *xa=x,*xb=x+t;for(int i=0;i=us(MOD+MOD))*us(MOD+MOD); us v=my_mul(xb[j],p[i],q[i]);xa[j]=u+v;xb[j]=u-v+us(MOD+MOD);} #ifdef USE_INTRINSICS }else if(t==4){us *xa=x,*xb=x+t; for(int i=0;i=us(MOD+MOD))*us(MOD+MOD), x[i]-=(x[i]>=us(MOD))*us(MOD);if(f) bit_flip(x,n);} void intt(us* x,int n,bool f=true){prep(n); int t=1;if(f) bit_flip(x,n); for(int m=(n>>1);m;m>>=1){us *p=p1[m],*q=q1[m]; #ifdef USE_INTRINSICS if(t==1||t==2){ #endif us *xa=x,*xb=x+t;for(int i=0;i=us(MOD+MOD))*us(MOD+MOD); xb[j]=my_mul(u-v+us(MOD+MOD),p[i],q[i]);} #ifdef USE_INTRINSICS }else if(t==4){us *xa=x,*xb=x+t;for(int i=0;i=MOD)?(MOD):0)};} mi operator - (const mi& a,const mi& b) {return mi{a.w-b.w+((a.w=MOD)?MOD:0); return a;} mi& operator -= (mi& a,const mi& b) {a.w=a.w-b.w+((a.w=MOD)?MOD:0); return a;} mi& operator -- (mi& a) {a.w=a.w-1+(a.w?0:MOD); return a;} void ntt(mi* x,int n,bool f=true) {RawNTT::ntt((us*)x,n,f);} //make sure this works void intt(mi* x,int n,bool f=true) {RawNTT::intt((us*)x,n,f);} void fft(mi* x,int n,bool r,bool f=true) { if(r) intt(x,n,f); else ntt(x,n,f); } void cp(mi*t,const mi*s,int K) { if(s) memcpy(t,s,sizeof(mi)*K); else memset(t,0,sizeof(mi)*K); } void cp(mi*t,mi s,int K) { if(s.w==0) memset(t,0,sizeof(mi)*K); else for(int i=0;i>=1; } return x; } string to_string(mi f) {return to_string((int)f);} string pretty_guess(mi x,int max_dem=1000) { string s=to_string((int)x); auto upd=[&](string v) { if(v.size()> (istream& is,mi& m) { int x; is>>x; m=x; return is; } //+ basic ops mi fac[FS],rfac[FS]; struct Fac_Initer { Fac_Initer() { fac[0]=1; for(int i=1;i>1); mi*fo=alc(K,1),*fx=alc(K),*fw=alc(K); cp(fo,o,(K>>1)); fft(fo,K,0); cp(fx,x,K); fft(fx,K,0); for(int i=0;i>1),K>>1); cp(fw+(K>>1),0,K>>1); ntt(fw,K); for(int i=0;i>1);++i) o[i+(K>>1)]=-fw[i]; pt-=K+K+K; } void ginv(mi*x,mi*o,int n) { int K=getK(n); mi *fx=alc(K,1),*fo=alc(K); cp(fx,x,n); ginv_K(fx,fo,K); cp(o,fo,n); pt-=K+K; } void gdiv(mi*a,mi*b,mi*d,int n,int m) { int s=getK(max(n,m)); mi *ra=alc(s+s,1),*rb=alc(s+s,1); for(int i=0;i>1); mi*fh=alc(K,1),*gg=alc(K>>1),*rr=alc(K,1); cp(fh,h,(K>>1)); fft(fh,K,0); cp(gg,gh,K>>1); for(int i=0;i<(K>>1);++i) gg[i]=gg[i]*gg[i]; fft(gg,K>>1,1); for(int i=0;i<(K>>1);++i) rr[i+(K>>1)]=gg[i]-f[i]-f[i+(K>>1)]; fft(rr,K,0); for(int i=0;i>1);i>1);++i) fw[i]=fw[i+(K>>1)]; cp(fw+(K>>1),0,K>>1); fft(fw,K,0); for(int i=0;i>1);++i) h[i+(K>>1)]=-fw[i]; cp(gh,fg,K); pt-=K+K; } pt-=K+K+(K>>1); } void gsqrt(mi*f,mi*g,int n) { int s=getK(n); mi *mf=alc(s,1),*mg=alc(s),*mh=alc(s); cp(mf,f,n); gsqrt_K(mf,mg,mh,s,0); cp(g,mg,n); pt-=s+s+s; } void gexp_K(mi*f,mi*g,mi*h,int K,bool ch=1) { if(K==1) { g[0]=h[0]=1; return; } gexp_K(f,g,h,K>>1); mi*gg=alc(K>>1),*hh=alc(K>>1),*fh=0, *dg=alc(K>>1),*t1=alc(K,1),*t2=alc(K,1); dg[(K>>1)-1]=0; for(int i=0;i+1<(K>>1);++i) dg[i]=g[i+1]*(i+1); cp(gg,g,K>>1); mi c=0; for(int i=0;i<(K>>1);++i) c=c+dg[i]*h[((K>>1)-1)-i]; if(!ch) cp(hh,h,K>>1), fft(hh,K>>1,0); else { fh=alc(K,1); cp(fh,h,(K>>1)); fft(fh,K,0); for(int i=0;i>1]=fh[i]; } fft(gg,K>>1,0); fft(dg,K>>1,0); for(int i=0;i<(K>>1);++i) gg[i]=gg[i]*hh[i]; fft(gg,K>>1,1); for(int i=0;i<(K>>1);++i) t1[i+(K>>1)]=(i==0)-gg[i]; for(int i=0;i+1<(K>>1);++i) t2[i]=f[i+1]*(i+1); fft(t1,K,0); fft(t2,K,0); for(int i=0;i>1);++i) t1[i]=0; for(int i=0;i+1>1);++i) dg[i]=dg[i]*hh[i]; fft(dg,K>>1,1); mi r; for(int i=0;i<(K>>1);++i) r=(i+1==(K>>1))?c:(f[i+1]*(i+1)), t1[i]=t1[i]+r,t1[i+(K>>1)]=t1[i+(K>>1)]+dg[i]-r; t2[0]=0; for(int i=0;i+1>1); cp(t1+(K>>1),0,K>>1); fft(t1,K,0); fft(t2,K,0); for(int i=0;i>1);i>1)*3; if(ch) { mi *fg=alc(K),*fw=alc(K); cp(fg,g,K); fft(fg,K,0); for(int i=0;i>1);++i) fw[i]=fw[i+(K>>1)]; cp(fw+(K>>1),0,K>>1); fft(fw,K,0); for(int i=0;i>1);++i) h[i+(K>>1)]=-fw[i]; pt-=K+K+K; } } void gexp(mi*f,mi*g,int n) { int s=getK(n); mi *mf=alc(s,1),*mg=alc(s),*mh=alc(s); cp(mf,f,n); gexp_K(mf,mg,mh,s,0); cp(g,mg,n); pt-=s+s+s; } //+ mod_sqrt namespace QR{ typedef pair pll; ll pll_s; inline pll mul(pll a,pll b,ll p){ pll ans;ans.fi=a.fi*b.fi%p+a.se*b.se%p*pll_s%p; ans.se=a.fi*b.se%p+a.se*b.fi%p;ans.fi%=p; ans.se%=p;return ans;} pll qp(pll a,ll b,ll c) {pll ans(1,0);while(b) {if(b&1) ans=mul(ans,a,c); a=mul(a,a,c); b>>=1;}return ans;}ll qp(ll a,ll b,ll c) { ll ans=1;while(b) {if(b&1) ans=ans*a%c;a=a*a%c; b>>=1;} return ans;}int mod_sqrt(ll a,ll p=MOD) {if(!a) return 0; if(p==2) return 1;ll w,q;while(1) {w=rand()%p; q=w*w-a;q=(q%p+p)%p; if(qp(q,(p-1)/2,p)!=1)break;}pll_s=q;pll rst=qp(pll(w,1),(p+1)/2,p); ll ans=(rst.fi%p+p)%p;return min(ans,p-ans);} }using QR::mod_sqrt; //+ poly #include int default_shrink=-1; //mod x^n struct poly { vector coeff; int shrink_len; void rev() { fit_shrink(); reverse(coeff.begin(),coeff.end()); } void insert(mi x) { coeff.insert(coeff.begin(),x); shrink(); } mi& operator [] (int x) { if((x<0)||(shrink_len!=-1&&x>=shrink_len)) throw out_of_range("invalid offset"); if((int)coeff.size()=shrink_len)) throw out_of_range("invalid offset"); if((int)coeff.size()=shrink_len)) return 0; if((int)coeff.size() coeff_,int shrink_len_=default_shrink): coeff(coeff_),shrink_len(shrink_len_){ this->shrink(); } poly(vector coeff_,int shrink_len_=default_shrink): shrink_len(shrink_len_){ this->coeff.resize(coeff_.size()); for(int i=0;i<(int)coeff.size();++i) this->coeff[i]=coeff_[i]; this->shrink(); } void clean_maybe() { if(is_poly()) while(coeff.size()&&coeff.back()==0) coeff.pop_back(); } void clean() { assert(is_poly()); clean_maybe(); } void fit_shrink() { assert(is_series()); coeff.resize(shrink_len); } void set_shrink(int shrink_len_=default_shrink) { this->shrink_len=shrink_len_; this->shrink(); } void dump(char e=0,bool g=1,int l=9) const { auto format=[&](mi num) { return g?pretty_guess(num):to_string(num); }; int u=(int)coeff.size()-1; while(u>=0&&coeff[u]==0) --u; if(u<0) { printf("{}"); } else { for(int j=0;j<=u&&j<=l;++j) printf("%c%s","{,"[j!=0],format(coeff[j]).c_str()); if(u>l) printf("...%s(x^%d)",format(coeff[u]).c_str(),u); printf("}"); } if(shrink_len==-1) printf(" (poly)"); else printf(" (mod x^%d)",shrink_len); if(e) putchar(e); } const mi* coeff_ptr() const { if(!coeff.size()) return 0; return coeff.data(); } mi* coeff_ptr() { if(!coeff.size()) return 0; return coeff.data(); } int size() const { return coeff.size(); } void reserve(int l) { if(shrink_len!=-1) l=min(l,shrink_len); if(l>(int)coeff.size()) coeff.resize(l); } void print_shrink(char e) { fit_shrink(); for(int i=0;ishrink_len) coeff.resize(shrink_len); } bool is_poly() const {return shrink_len==-1;} bool is_series() const {return shrink_len!=-1;} mi eval(mi x) { assert(is_poly()); mi w=0; for(int i=size()-1;i>=0;--i) w=w*x+coeff[i]; return w; } }; poly polyi(mi x) { return poly(vector{x},-1); } poly operator"" _p(unsigned long long int x) { return poly(vector{int(x%MOD)},-1); } poly operator"" _p(const char *str,std::size_t len) { poly ans(-1); int sgn=1,phase=0,coeff=0,touch=0; ll cnum=0; auto clean=[&]() { if(phase==-1) ans[1]+=sgn*coeff; else if(phase==0) ans[0]+=sgn*(int)cnum; else if(phase==1) ans[cnum]+=sgn*coeff; else assert(0); phase=cnum=touch=0; }; for(int i=0;i<(int)len;++i) { if(str[i]=='+') clean(),sgn=1; else if(str[i]=='-') clean(),sgn=-1; else if(isdigit(str[i])) { assert(phase==0||phase==1); if(phase==0) touch=1,cnum=(cnum*10LL+str[i]-48)%MOD; else cnum=cnum*10LL+str[i]-48,assert(cnum<1e8); } else if(str[i]=='x') { assert(str[i+1]=='^'||str[i+1]=='+'||str[i+1]=='-'||str[i+1]==0); phase=-1; coeff=touch?cnum:1; cnum=0; } else if(str[i]=='^') { assert(phase==-1); phase=1; } } clean(); return ans; } //+ poly ops void share_shrink(poly&a,poly&b) { int l=max(a.shrink_len,b.shrink_len); a.set_shrink(l);b.set_shrink(l); } poly ginv(poly p) { p.fit_shrink(); ginv(p.coeff_ptr(),p.coeff_ptr(),p.shrink_len); return p; } poly gln(poly p) { p.fit_shrink(); gln(p.coeff_ptr(),p.coeff_ptr(),p.shrink_len); return p; } poly gsqrt(poly p,mi f0=mi(1)) { p.fit_shrink(); sqrt_f0=f0; gsqrt(p.coeff_ptr(),p.coeff_ptr(),p.shrink_len); return p; } poly gexp(poly p) { p.fit_shrink(); gexp(p.coeff_ptr(),p.coeff_ptr(),p.shrink_len); return p; } int merge_shrink(int s1,int s2) { if(s1==-1) return s2; if(s2==-1) return s1; assert(s1==s2); //usually s1=s2 return min(s1,s2); } poly operator + (const poly& a,const poly& b) { poly c(merge_shrink(a.shrink_len,b.shrink_len)); c.reserve(max(a.size(),b.size())); for(int i=0;i gdiv(poly a,poly b) { //(quotient, remainder) assert(a.is_poly()&&b.is_poly()); int n=a.size(),m=b.size(); assert(m>0); if(n0); if(n=1;--i) a[i]=a[i-1]*rfac[i]*fac[i-1]; if(a.size()) a[0]=0; return a; } poly gde(poly a) { //shrink actually changes if(!a.size()) return a; for(int i=1;i g, function gp, int f0,int len=default_shrink) { poly f(1); f[0]=f0; while(f.shrink_len!=len) { int old_len=f.shrink_len; int new_len=min(old_len*2,len); f.set_shrink(new_len); poly s=g(f); s.fit_shrink(); poly h=f; h.set_shrink(new_len-old_len); h=ginv(gp(h)); s.coeff.erase(s.coeff.begin(),s.coeff.begin()+old_len); s.set_shrink(new_len-old_len); s=h*s; s.set_shrink(new_len); s.coeff.insert(s.coeff.begin(),old_len,mi(0)); f=f-s; } return f; } poly gpow(poly p,string k) { int u=p.shrink_len,x=0; p.fit_shrink(); while(x=u*2) return poly(u); p.coeff.erase(p.coeff.begin(),p.coeff.begin()+x); mi v=p[0],s=qp(v,m1),iv=inv(v); for(mi&w:p.coeff) w=w*iv; p=gexp(m0*gln(p)); for(mi&w:p.coeff) w=w*s; p.coeff.insert(p.coeff.begin(),x*m1,mi(0)); p.fit_shrink(); return p; } poly gpow(poly p,ll k) { return gpow(p,to_string(k)); } poly gnewton_d( //solve f'=G(f), gp=(G,G') function(const poly&)> gp, int f0,int len=default_shrink) { poly f(1); f[0]=f0; while(f.shrink_len!=len) { int old_len=f.shrink_len; int new_len=min(old_len*2,len); f.set_shrink(new_len); auto fp=gp(f); poly gpf=fp.second,r=gexp(mi(-1)*gint(gpf)); f=gint((fp.first-gpf*f)*r); f[0]=f0; f=f*ginv(r); } return f; } poly gnewton_d( function g, function gp, int f0,int len=default_shrink) { return gnewton_d([&](const poly& s) { return make_pair(g(s),gp(s)); },f0,len); } poly gcompinv( //G^<-1>, gp=(G,G') function(const poly&)> gp, int f0,int len=default_shrink) { poly f(1); f[0]=f0; while(f.shrink_len!=len) { int old_len=f.shrink_len; int new_len=min(old_len*2,len); f.set_shrink(new_len); auto fp=gp(f); auto gf=fp.first; gf=mi(-1)*gf; gf.reserve(2);++gf[1]; f=f+gf*ginv(fp.second); } return f; } poly gcompinv( function g, function gp, int f0,int len=default_shrink) { return gcompinv([&](const poly& s) { return make_pair(g(s),gp(s)); },f0,len); } poly prod_recurse(poly*a,int n) { if(n==1) return *a; int m=n>>1; return prod_recurse(a,m)*prod_recurse(a+m,n-m); } poly prod(vector p) { if(!p.size()) return poly(-1); sort(p.begin(),p.end(),[&](const poly&a,const poly&b) { return a.size() b) { a.reserve(b.size()); for(int i=0;i=1); poly b(a.shrink_len); for(int i=0;i*u=1); while(n) { poly nb=b; for(int i=1;i>=1; } return a.get(0)*inv(b.get(0)); } vector BM(vector x) { vector ls,cur; int lf=0; mi ldt; for(int i=0;i c(i-lf-1); c.pb(-k); for(int j=0;j=(int)cur.size()) ls=cur,lf=i,ldt=t; cur=c; } return cur; } pair bm_poly(vector x) { vector f=BM(x); int k=f.size(); f.insert(f.begin(),mi(-1)); x.resize(k); poly r(f,-1),s(x,-1); poly u=r*s; u.coeff.resize(k); return make_pair(u,r); } mi linear_eval(vector x,ll n) { auto s=bm_poly(x); return linear_eval(s.first,s.second,n); } vector eval_tmp; void eval_build(int x,mi*a,int n) { if((int)eval_tmp.size(){mi(1),-*a},-1);return; } int m=(n+1)>>1;eval_build(x+x,a,m);eval_build(x+x+1,a+m,n-m); eval_tmp[x]=eval_tmp[x+x]*eval_tmp[x+x+1]; } poly mul_transpose(const poly& a,const poly& b) { //transposed mul assert(a.is_poly()&&b.size()>0&&b.is_poly()); if(a.size()>1; eval_recurse(x+x,mul_transpose(p,eval_tmp[x+x+1]),o,m); eval_recurse(x+x+1,mul_transpose(p,eval_tmp[x+x]),o+m,n-m); } vector multipoint_eval(poly p,vector q) { assert(p.is_poly()); if(!q.size()) return q; eval_build(1,q.data(),q.size()); int d=p.size(); p.set_shrink(d); p.rev(); poly o=eval_tmp[1]; o.set_shrink(d); p=p*ginv(o); p.rev(); p.set_shrink(-1); p.coeff.resize(q.size()); vector s(q.size()); eval_recurse(1,p,s.data(),q.size()); eval_tmp.clear(); return s; } vector interp_tmp; vector interp_y; void interp_build(int x,mi*a,int n) { if((int)interp_tmp.size(){-*a,mi(1)},-1); return; } int m=(n+1)>>1;interp_build(x+x,a,m);interp_build(x+x+1,a+m,n-m); interp_tmp[x]=interp_tmp[x+x]*interp_tmp[x+x+1]; } poly interp_calc(int x,int o,int n) { if(n==1) return poly(vector{interp_y[o]},-1); int m=(n+1)>>1; return interp_calc(x+x,o,m)*interp_tmp[x+x+1] +interp_calc(x+x+1,o+m,n-m)*interp_tmp[x+x]; } poly multipoint_interp(vector x,vector y) { assert(x.size()==y.size()); interp_build(1,x.data(),x.size()); interp_y=multipoint_eval(gde(interp_tmp[1]),x); for(int i=0;i<(int)y.size();++i) interp_y[i]=y[i]*inv(interp_y[i]); poly ans=interp_calc(1,0,x.size()); interp_tmp.clear(); interp_y.clear(); return ans; } poly powersum_recurse(mi*a,int n) { if(n==1) return poly({mi(1),-*a},-1); int m=n>>1; return powersum_recurse(a,m)*powersum_recurse(a+m,n-m); } vector powersum(vector x,int n) { poly s=powersum_recurse(x.data(),x.size()); s.set_shrink(n); poly t=s; for(int i=0;i<=(int)x.size()&&i o(n); for(int i=0;i gsincos(poly p) { assert(p.is_series()); mi j=qp(mi(3),(MOD-1)/4); poly a=gexp(j*p),b=gexp(-j*p); poly s=(a-b)*inv(2*j),c=(a+b)*inv(2); return make_pair(s,c); } //+ polygcd namespace polygcd { struct polym { poly a[2][2]; polym() { a[0][0].set_shrink(-1); a[0][1].set_shrink(-1); a[1][0].set_shrink(-1); a[1][1].set_shrink(-1); } }; pair operator * (const polym &a,const pair&b) { const poly *v[2]={&b.first,&b.second}; int mx=0; for(int i=0;i<2;++i) mx=max(mx,a.a[0][0].size()+v[0]->size()), mx=max(mx,a.a[0][1].size()+v[1]->size()), mx=max(mx,a.a[1][0].size()+v[0]->size()), mx=max(mx,a.a[1][1].size()+v[1]->size()); int k=getK(mx+1); mi*s[6]; for(int i=0;i<6;++i) s[i]=alc(k,1); for(int i=0;i<2;++i) for(int j=0;j<2;++j) cp(s[i*2+j],a.a[i][j].coeff_ptr(),a.a[i][j].size()), ntt(s[i*2+j],k,0); for(int w=0;w<2;++w) cp(s[w+4],v[w]->coeff_ptr(),v[w]->size()),ntt(s[w+4],k,0); pair op; for(int i=0;i<2;++i) { for(int j=0;j=0&&s[i*2][u]==0) --u; poly&o=i?op.second:op.first;o.set_shrink(-1); o.reserve(u+1);cp(o.coeff_ptr(),s[i*2],u+1); } pt-=k*6; return op; } polym operator * (const polym &a,const polym&b) { polym c; int mx=0; for(int i=0;i<2;++i) for(int j=0;j<2;++j) for(int k=0;k<2;++k) mx=max(mx,a.a[i][j].size()+b.a[j][k].size()); int k=getK(mx+1); mi*sa[2][2],*sb[2][2]; for(int i=0;i<2;++i) for(int j=0;j<2;++j) sa[i][j]=alc(k,1),sb[i][j]=alc(k,1); for(int i=0;i<2;++i) for(int j=0;j<2;++j) cp(sa[i][j],a.a[i][j].coeff_ptr(),a.a[i][j].size()), cp(sb[i][j],b.a[i][j].coeff_ptr(),b.a[i][j].size()), ntt(sa[i][j],k,0),ntt(sb[i][j],k,0); for(int i=0;i<2;++i) { for(int t=0;t=0&&sa[i][j][u]==0) --u; poly&o=c.a[i][j]; o.reserve(u+1);cp(o.coeff_ptr(),sa[i][j],u+1); } } pt-=k*8; return c; } polym hgcd(poly a,poly b) { assert(a.size()>b.size()); int m=a.size()/2; if(b.size()<=m) { polym w; w.a[0][0].coeff.push_back(1); w.a[1][1].coeff.push_back(1); return w; } poly a0=gshr(a,m),b0=gshr(b,m); polym r=hgcd(a0,b0); tie(a,b)=r*make_pair(a,b); if(b.size()<=m) return r; poly w; { auto d=gdiv(a,b); w=-d.first; a=b; b=d.second; } int k=m+m-(a.size()-1); assert(k>=0); a0=gshr(a,k),b0=gshr(b,k); swap(r.a[0][0].coeff,r.a[1][0].coeff); swap(r.a[0][1].coeff,r.a[1][1].coeff); r.a[1][0]=r.a[1][0]+w*r.a[0][0]; r.a[1][1]=r.a[1][1]+w*r.a[0][1]; return hgcd(a0,b0)*r; } polym cogcd(poly a,poly b) { polym r=hgcd(a,b); tie(a,b)=r*make_pair(a,b); if(!b.size()) return r; poly w; { auto d=gdiv(a,b); w=-d.first; a=b; b=d.second; } swap(r.a[0][0].coeff,r.a[1][0].coeff); swap(r.a[0][1].coeff,r.a[1][1].coeff); r.a[1][0]=r.a[1][0]+w*r.a[0][0]; r.a[1][1]=r.a[1][1]+w*r.a[0][1]; if(!b.size()) return r; return cogcd(a,b)*r; } poly ggcd(poly a,poly b) { assert(a.is_poly()&&b.is_poly()); polym s=cogcd(a,b); return s.a[0][0]*a+s.a[0][1]*b; } pair bm_poly_fast(vector x) { poly s(x,-1),t(-1); int n=x.size();t[n]=1; polym m0=hgcd(t,x); poly r=m0.a[1][1]; r.clean(); r=r*(-inv(r.get(0))); poly u=r*s; u.coeff.resize(n); u.clean(); return make_pair(u,r); } } using polygcd::ggcd; using polygcd::bm_poly_fast; //+ ocpoly namespace onlineconv { int oc_shrink=1e9; struct ocpoly { function get_handle; vector vis; vector val; string typ; //debug purpose mi get(int x) { #ifdef LOCAL if(!vis.size()) cerr<<" - Calling "<=0); if((int)vis.size()>x&&vis[x]) return val[x]; if((int)vis.size()<=x) vis.resize(x+1),val.resize(x+1); val[x]=get_handle(x); vis[x]=1; return val[x]; } poly await(int n) { poly s(-1); for(int i=0;iget(c); }; } ocpoly() {typ="custom";} ocpoly(ocpoly const&)=delete; ocpoly& operator = (ocpoly const&)=delete; }; struct ocint: public ocpoly { ocint(ocpoly*p,mi c=0) { typ="integ"; get_handle=[=](int u) { if(u==0) return c; return p->get(u-1)*rfac[u]*fac[u-1]; }; } }; struct ocde: public ocpoly { ocde(ocpoly*p) { typ="deriv";get_handle=[=](int u) {return p->get(u+1)*mi(u+1);}; } }; struct ocshr: public ocpoly { ocshr(ocpoly*p,int c) { typ="shiftr"; assert(c>=0); get_handle=[=](int u) {return p->get(u+c);}; } }; struct ocshl: public ocpoly { ocshl(ocpoly*p,int c) { typ="shiftl"; assert(c>=0); get_handle=[=](int u) { if(uget(u-c); }; } }; struct ocscale: public ocpoly { ocscale(ocpoly*p,mi c) { typ="scale";get_handle=[=](int u) {return p->get(u)*c;}; } }; struct ocadd: public ocpoly { ocadd(ocpoly*a,ocpoly*b) { typ="add"; get_handle=[=](int u) {return a->get(u)+b->get(u);}; } }; struct ocminus: public ocpoly { ocminus(ocpoly*a,ocpoly*b) { typ="minus"; get_handle=[=](int u) {return a->get(u)-b->get(u);}; } }; struct ocfixed: public ocpoly { ocfixed(const poly&p) { typ="fixed";get_handle=[=](int u) {return p.get(u);}; } }; struct occorner: public ocpoly { occorner(ocpoly*p,vector v) { typ="corner"; get_handle=[=](int u) { if(u<(int)v.size()) return v[u]; return p->get(u); }; } }; mi pool1[PS2],*ptr1=pool1; mi*alc1(int t,bool c=0) { if(c) cp(ptr1,0,t); ptr1+=t; assert(ptr1,int>> &st =*new vector,int>>(); vector,int>> &fa= *new vector,int>>(); vector &pool=*new vector(); mi*sa=new mi[128],*sb=new mi[128], *la=new mi[128],*lb=new mi[128]; get_handle=[=,&oa,&ob,&oaf,&obf,&cm,&cp,&st,&fa,&pool,&cpool] (int u) { if(u) this->get(u-1); auto alc0=[&](int s) { if(cpool+s>(int)pool.size()) { auto od=pool.data(); pool.resize(max(cpool+s,(int)pool.size()*2)); auto dt=pool.data()-od; if(dt) for(auto&x:st) if(x.first.first) x.first.first+=dt, x.first.second+=dt; } assert(cpool+s<=(int)pool.size()); mi*ptr=pool.data()+cpool; ::cp(ptr,0,s); cpool+=s; return ptr; }; auto cnt_bk=[&](int x) { int ans=0; for(int j=(int)st.size()-1;j>=0;--j) if(st[j].second==x) ++ans; else break; return ans; }; auto st_pop=[&]() { if(st.back().first.first) cpool-=st.back().second*2*(1+!(is_sqr)); assert(cpool>=0); st.pop_back(); }; //cerr<=oa+ob&&!(oaf&&obf)) { if((oa<=ob&&!oaf)||obf) { assert(!oaf); if(oa>u) break; if(a->get(oa)==0) ++oa; else oaf=1; } else { assert(!obf); if(ob>u) break; if(b->get(ob)==0) ++ob; else obf=1; } } if(uget((x)+oa)) #define gb(x) (b->get((x)+ob)) la[u&127]=ga(u), lb[u&127]=gb(u); if(u<128) sa[u]=ga(u), sb[u]=gb(u); if(u==0) cp[0]+=sa[0]*sb[0]; else cp[u]+=sa[0]*lb[u&127]+la[u&127]*sb[0]; if((cm+1)*2<=u) { int k=getK((u+1)*2); mi*tp=alc(k,1),*tq=alc(k,1); for(int i=0;i<=u;++i) tp[i]=ga(i); for(int i=0;i<=u;++i) tq[i]=gb(i); cp.coeff.clear(); cp.coeff.resize(u*2+1); ntt(tp,k,0); ntt(tq,k,0); for(int i=0;icm&&u!=cm*2+1) { auto gfa=[&](int cu,int of,int cs) { int id=cu*8+of; if((int)fa.size()(0,0),0)); if(fa[id].second) { assert(fa[id].second==cs); return fa[id].first; } fa[id].second=cs; mi*da=alc1(cs+cs,1),*db=0; if(!is_sqr) { db=alc1(cs+cs,1); for(int i=0;i16) { mi*da=alc0(cs*2*(1+(!is_sqr))),*db=da+(cs+cs); for(int i=0;i(0,0),cs)); int c=cnt_bk(cs); assert(c>=1&&c<=7); if(cs>16) { mi*tg=alc(cs+cs,1); int l=st.size(); for(int i=0;i fbd=gfa(cu,bd,cs), fbd2=gfa(cu,bd+1,cs); if(!is_sqr) { for(int j=0;j> &st =*new vector>(); vector> &fa= *new vector>(); vector &pool=*new vector(); mi*sa=new mi[128],*sb=new mi[128], *la=new mi[128],*lb=new mi[128]; get_handle=[=,&oa,&ob,&oaf,&obf,&cp,&st,&fa,&pool,&cpool] (int u) { if(u) this->get(u-1); auto alc0=[&](int s) { if(cpool+s>(int)pool.size()) { auto od=pool.data(); pool.resize(max(cpool+s,(int)pool.size()*2)); auto dt=pool.data()-od; if(dt) for(auto&x:st) if(x.first) x.first+=dt; } assert(cpool+s<=(int)pool.size()); mi*ptr=pool.data()+cpool; ::cp(ptr,0,s); cpool+=s; return ptr; }; auto cnt_bk=[&](int x) { int ans=0; for(int j=(int)st.size()-1;j>=0;--j) if(st[j].second==x) ++ans; else break; return ans; }; auto st_pop=[&]() { if(st.back().first) cpool-=st.back().second*2; assert(cpool>=0); st.pop_back(); }; while(u>=oa+ob&&!obf) { if(b->get(ob)==0) ++ob; else obf=1; } while(u>=oa+ob&&!oaf) { if(a->get(oa)==0) ++oa; else oaf=1; } if(uget((x)+oa)) #define gb(x) (b->get((x)+ob)) la[u&127]=ga(u), lb[u&127]=gb(u); if(u<128) sa[u]=ga(u); if(u<64) sb[u*2]=gb(u*2),sb[u*2+1]=gb(u*2+1); cp[u]+=la[u&127]*sb[0]; auto gfa=[&](int cu,int of,int cs) { int id=cu*8+of; if((int)fa.size()16) { mi*da=alc0(cs+cs); for(int i=0;i=1&&c<=3); if(cs>16) { mi*tg=alc(cs+cs,1); int l=st.size(); for(int i=0;ityp="exp_helper"; tmp->get_handle=[=](int u) { if(u==0) return mi(0); return a->get(u)*u; }; ocpoly*m=new ocmul(this,tmp); get_handle=[=](int u) { if(u==0) return mi(1); return m->get(u)*rfac[u]*fac[u-1]; }; } }; struct ocmpsetp: public ocpoly { //mpset without exp ocmpsetp(ocpoly*a,bool s) { typ="mpset1"; vector &tmp=*new vector(); tmp.push_back(0); get_handle=[=,&tmp](int u) { if(u) this->get(u-1); int l=u; if(u>=(int)tmp.size()) { l=1; int ns=tmp.size()*2; tmp.clear();tmp.resize(ns); } for(int j=l;j<=u;++j) { mi v=a->get(j); if(j==0) assert(v==0); if(v==0) continue; for(int i=1;i*j<(int)tmp.size();++i) { mi iv=rfac[i]*fac[i-1]; if(!(i&1)&&s) iv=-iv; tmp[i*j]+=iv*v; } } return tmp[u]; }; } }; struct ocinvmpsetp: public ocpoly { //invmpset without ln ocinvmpsetp(ocpoly*a,bool s) { typ="invmpset1"; vector &tmp=*new vector(); tmp.push_back(0); get_handle=[=,&tmp](int u) { if(u) this->get(u-1); int l=u; if(u>=(int)tmp.size()) { l=1; int ns=tmp.size()*2; tmp.clear();tmp.resize(ns); } for(int j=l;j<=u;++j) { mi v=a->get(j)-tmp[j]; if(j==0) assert(v==0); if(v==0) continue; for(int i=2;i*j<(int)tmp.size();++i) { mi iv=rfac[i]*fac[i-1]; if(!(i&1)&&s) iv=-iv; tmp[i*j]+=iv*v; } } return a->get(u)-tmp[u]; }; } }; struct ocmset: public ocpoly { ocmset(ocpoly*a) { typ="mset";copy(new ocexp(new ocmpsetp(a,0))); } }; struct ocpset: public ocpoly { ocpset(ocpoly*a) { typ="pset";copy(new ocexp(new ocmpsetp(a,1))); } }; struct ocinv: public ocpoly { ocinv(ocpoly*a) { typ="inv"; mi &oi=*new mi(); ocpoly *s=new ocmul(new occorner(a,vector{0}),this); get_handle=[=,&oi](int u) { if(!u) return oi=inv(a->get(0)); this->get(0); return s->get(u)*(-oi); }; } }; struct ocsqrt: public ocpoly { ocsqrt(ocpoly*a,mi f0=1) { typ="sqrt"; mi c=inv(f0*2); ocpoly *g=new occorner(this,vector{0}); ocpoly *s=new ocminus(a,new ocmul(g,g)); get_handle=[=](int u) { if(!u) { mi w=s->get(u); assert(f0*f0==w); return f0; } return s->get(u)*c; }; } }; struct ocquo: public ocpoly { ocquo(ocpoly*a,ocpoly*b) { typ="quo"; mi &oi=*new mi(); ocpoly *s=new ocminus(a, new ocmul(new occorner(b,vector{0}),this)); get_handle=[=,&oi](int u) { if(!u) return s->get(0)*(oi=inv(b->get(0))); this->get(0); return s->get(u)*oi; }; } }; struct ocln: public ocpoly { ocln(ocpoly*a) { typ="ln";copy(new ocint(new ocquo(new ocde(a),a))); } }; struct ocinvmset: public ocpoly { ocinvmset(ocpoly*a) { typ="invmset";copy(new ocinvmpsetp(new ocln(a),0)); } }; struct ocinvpset: public ocpoly { ocinvpset(ocpoly*a) { typ="invpset";copy(new ocinvmpsetp(new ocln(a),1)); } }; struct ocpow: public ocpoly { ocpow(ocpoly*a,string k) { typ="pow"; mi m0=0; ll m1=0,m2=0; for(auto c:k) m0=m0*10+int(c-48), m1=(m1*10+c-48)%(MOD-1), m2=min(m2*10+c-48,ll(1e9)); ocpoly *&s=*new ocpoly*(); s=0; int &pad=*new int(); pad=0; get_handle=[=,&pad,&s](int u) { if(m2==0) return (u==0)?mi(1):mi(0); if(u) this->get(u-1); if(pad==u&&a->get(u)==0) ++pad; if(uget(0),m1); } return s->get(u); }; } ocpow(ocpoly*a,ll k):ocpow(a,to_string(k)) { } }; struct ocamp: public ocpoly { ocamp(ocpoly*a,int k) { typ="amp"; get_handle=[=](int u) { if(u%k) return mi(0); return a->get(u/k); }; } }; //+ pipeline (sugar for ocpoly*) struct pipeline { ocpoly*p; pipeline() {p=new ocpoly();} pipeline(ocpoly *q) {assert(q);p=q;} pipeline(poly s) {p=new ocfixed(s);} mi get(int n) {return p->get(n);} poly await(int n) {return p->await(n);} void set(pipeline q) {p->copy(q.p);} operator ocpoly*() {return p;} }; #define pl pipeline pl operator + (pl a,pl b){return new ocadd(a,b);} pl operator - (pl a,pl b){return new ocminus(a,b);} pl operator * (pl a,pl b){return new ocmul(a,b);} pl operator * (pl a,poly b){return new ocmul_semi(a,new ocfixed(b));} pl operator * (poly a,pl b){return new ocmul_semi(b,new ocfixed(a));} pl operator * (pl a,mi b){return new ocscale(a,b);} pl operator * (mi a,pl b){return new ocscale(b,a);} pl operator / (pl a,mi b){return new ocscale(a,inv(b));} pl gcorner(pl a,vector b){return new occorner(a,b);} pl gscale(pl a,mi b){return new ocscale(a,b);} pl gshl(pl a,int b){return new ocshl(a,b);} pl gshr(pl a,int b){return new ocshr(a,b);} pl gamp(pl a,int b){return new ocamp(a,b);} pl gde(pl a){return new ocde(a);} pl gint(pl a){return new ocint(a);} pl ginv(pl a){return new ocinv(a);} pl gexp(pl a){return new ocexp(a);} pl gln(pl a){return new ocln(a);} pl gpow(pl a,string k){return new ocpow(a,k);} pl gpow(pl a,ll k){return gpow(a,to_string(k));} pl gquo(pl a,pipeline b){return new ocquo(a,b);} pl gsqrt(pl a,mi f0=mi(1)){return new ocsqrt(a,f0);} pl PSet(pl a){return new ocpset(a);} pl MSet(pl a){return new ocmset(a);} pl invPSet(pl a){return new ocinvpset(a);} pl invMSet(pl a){return new ocinvmset(a);} pl operator - (pl a){return new ocscale(a,mi(-1));} #undef pl } using namespace onlineconv; int n,m,a[233333]; int main() { scanf("%d%d",&n,&m); for(int i=0;i(a,a+n); auto w=polygcd::bm_poly_fast(s); const auto &x=w.second.coeff; for(int u=1;u<(int)x.size();++u) printf("%d%c",(int)x[u]," \n"[u+1==int(x.size())]); printf("%d\n",(int)linear_eval(w.first,w.second,m)); }