Line data Source code
1 : #line 2 "../src/kernel/none/halfgcd.c"
2 : /* Copyright (C) 2019 The PARI group.
3 :
4 : This file is part of the PARI/GP package.
5 :
6 : PARI/GP is free software; you can redistribute it and/or modify it under the
7 : terms of the GNU General Public License as published by the Free Software
8 : Foundation; either version 2 of the License, or (at your option) any later
9 : version. It is distributed in the hope that it will be useful, but WITHOUT
10 : ANY WARRANTY WHATSOEVER.
11 :
12 : Check the License for details. You should have received a copy of it, along
13 : with the package; see the file 'COPYING'. If not, write to the Free Software
14 : Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
15 :
16 : GEN
17 7110502 : ZM2_mul(GEN A, GEN B)
18 : {
19 7110502 : const long t = ZM2_MUL_LIMIT+2;
20 7110502 : GEN A11=gcoeff(A,1,1),A12=gcoeff(A,1,2), B11=gcoeff(B,1,1),B12=gcoeff(B,1,2);
21 7110502 : GEN A21=gcoeff(A,2,1),A22=gcoeff(A,2,2), B21=gcoeff(B,2,1),B22=gcoeff(B,2,2);
22 7110502 : if (lgefint(A11) < t || lgefint(B11) < t || lgefint(A22) < t || lgefint(B22) < t
23 112134 : || lgefint(A12) < t || lgefint(B12) < t || lgefint(A21) < t || lgefint(B21) < t)
24 : {
25 7001776 : GEN a = mulii(A11, B11), b = mulii(A12, B21);
26 7001137 : GEN c = mulii(A11, B12), d = mulii(A12, B22);
27 7001187 : GEN e = mulii(A21, B11), f = mulii(A22, B21);
28 7001228 : GEN g = mulii(A21, B12), h = mulii(A22, B22);
29 7001104 : retmkmat2(mkcol2(addii(a,b), addii(e,f)), mkcol2(addii(c,d), addii(g,h)));
30 : } else
31 : {
32 108726 : GEN M1 = mulii(addii(A11,A22), addii(B11,B22));
33 108726 : GEN M2 = mulii(addii(A21,A22), B11);
34 108726 : GEN M3 = mulii(A11, subii(B12,B22));
35 108726 : GEN M4 = mulii(A22, subii(B21,B11));
36 108726 : GEN M5 = mulii(addii(A11,A12), B22);
37 108726 : GEN M6 = mulii(subii(A21,A11), addii(B11,B12));
38 108726 : GEN M7 = mulii(subii(A12,A22), addii(B21,B22));
39 108726 : GEN T1 = addii(M1,M4), T2 = subii(M7,M5);
40 108726 : GEN T3 = subii(M1,M2), T4 = addii(M3,M6);
41 108726 : retmkmat2(mkcol2(addii(T1,T2), addii(M2,M4)),
42 : mkcol2(addii(M3,M5), addii(T3,T4)));
43 : }
44 : }
45 :
46 : static GEN
47 652 : matid2(void)
48 : {
49 652 : retmkmat2(mkcol2(gen_1,gen_0),
50 : mkcol2(gen_0,gen_1));
51 : }
52 :
53 : /* Return M*[q,1;1,0] */
54 : static GEN
55 2516808 : mulq(GEN M, GEN q)
56 : {
57 2516808 : GEN u, v, res = cgetg(3, t_MAT);
58 2516826 : u = addii(mulii(gcoeff(M,1,1), q), gcoeff(M,1,2));
59 2516802 : v = addii(mulii(gcoeff(M,2,1), q), gcoeff(M,2,2));
60 2516807 : gel(res,1) = mkcol2(u, v);
61 2516812 : gel(res,2) = gel(M,1);
62 2516812 : return res;
63 : }
64 : static GEN
65 64 : mulqab(GEN M, GEN q, GEN *ap, GEN *bp)
66 : {
67 64 : GEN b = subii(*ap, mulii(*bp, q));
68 64 : *ap = *bp; *bp = b;
69 64 : return mulq(M,q);
70 : }
71 :
72 : /* Return M*[q,1;1,0]^-1 */
73 :
74 : static GEN
75 1114 : mulqi(GEN M, GEN q, GEN *ap, GEN *bp)
76 : {
77 : GEN u, v, res, a;
78 1114 : a = addii(mulii(*ap, q), *bp);
79 1114 : *bp = *ap; *ap = a;
80 1114 : res = cgetg(3, t_MAT);
81 1114 : u = subii(gcoeff(M,1,1),mulii(gcoeff(M,1,2), q));
82 1114 : v = subii(gcoeff(M,2,1),mulii(gcoeff(M,2,2), q));
83 1114 : gel(res,1) = gel(M,2);
84 1114 : gel(res,2) = mkcol2(u,v);
85 1114 : return res;
86 : }
87 :
88 : /* test whether n is a power of 2 */
89 : static long
90 12812557 : isint2n(GEN n)
91 : {
92 : GEN x;
93 12812557 : long lx = lgefint(n), i;
94 12812557 : if (lx == 2) return 0;
95 12812557 : x = int_MSW(n);
96 12812557 : if (*(ulong*)x != 1UL<<expu(*(ulong*)x) ) return 0;
97 543779 : for (i = 3; i < lx; i++)
98 : {
99 536471 : x = int_precW(x); if (*x) return 0;
100 : }
101 7308 : return 1;
102 : }
103 :
104 : static long
105 12812911 : uexpi(GEN a)
106 12812911 : { return expi(a)+!isint2n(a); }
107 :
108 : static GEN
109 107953 : FIXUP0(GEN M, GEN *a, GEN *b, long m)
110 : {
111 107953 : long cnt=0;
112 137449 : while (expi(*b) >= m)
113 : {
114 29496 : GEN r, q = dvmdii(*a, *b, &r);
115 29496 : *a = *b; *b = r;
116 29496 : M = mulq(M, q);
117 29496 : cnt++;
118 : };
119 107953 : if (cnt>6) pari_err_BUG("FIXUP0");
120 107953 : return M;
121 : }
122 :
123 : static long
124 5809039 : signdet(GEN Q)
125 : {
126 5809039 : long a = Mod4(gcoeff(Q,1,1)), b = Mod4(gcoeff(Q,1,2));
127 5809672 : long c = Mod4(gcoeff(Q,2,1)), d = Mod4(gcoeff(Q,2,2));
128 5810484 : return ((a*d-b*c)&3)==1 ? 1 : -1;
129 : }
130 :
131 : static GEN
132 5642750 : ZM_inv2(GEN M)
133 : {
134 5642750 : long e = signdet(M);
135 9040750 : if (e==1) return mkmat22(gcoeff(M,2,2),negi(gcoeff(M,1,2)),
136 3397259 : negi(gcoeff(M,2,1)),gcoeff(M,1,1));
137 2246473 : else return mkmat22(negi(gcoeff(M,2,2)),gcoeff(M,1,2),
138 2246467 : gcoeff(M,2,1),negi(gcoeff(M,1,1)));
139 : }
140 :
141 : static GEN
142 1114 : lastq(GEN Q)
143 : {
144 1114 : GEN p = gcoeff(Q,1,1), q = gcoeff(Q,1,2), s = gcoeff(Q,2,2);
145 1114 : if (signe(q)==0) pari_err_BUG("halfgcd");
146 1114 : if (signe(s)==0) return p;
147 1114 : if (equali1(q)) return subiu(p,1);
148 1114 : return divii(p, q);
149 : }
150 :
151 : static GEN
152 6842 : mulT(GEN Q, GEN *ap, GEN *bp)
153 : {
154 6842 : *ap = addii(*ap, *bp);
155 6842 : *bp = negi(*bp);
156 6842 : return mkmat2(gel(Q,1),
157 6842 : mkcol2(subii(gcoeff(Q,1,1), gcoeff(Q,1,2))
158 6842 : , subii(gcoeff(Q,2,1), gcoeff(Q,2,2))));
159 : }
160 :
161 : static GEN
162 166493 : FIXUP1(GEN M, GEN a, GEN b, long m, long t, GEN *ap, GEN *bp)
163 : {
164 166493 : GEN Q = gel(M,1), a0 = gel(M,2), b0 = gel(M,3);
165 166493 : GEN q, am = remi2n(a, m), bm = remi2n(b, m);
166 166638 : if (signdet(Q)==-1)
167 : {
168 112285 : *ap = subii(mulii(bm, gcoeff(Q,1,2)),mulii(am, gcoeff(Q,2,2)));
169 112170 : *bp = subii(mulii(am, gcoeff(Q,2,1)),mulii(bm, gcoeff(Q,1,1)));
170 112464 : *ap = addii(*ap, shifti(addii(a0, gcoeff(Q,2,2)), m));
171 112349 : *bp = addii(*bp, shifti(subii(b0, gcoeff(Q,2,1)), m));
172 111802 : if (signe(*bp) >= 0)
173 104884 : return Q;
174 6918 : if (expi(addii(*ap,*bp)) >= m+t)
175 6842 : return mulT(Q, ap ,bp);
176 76 : q = lastq(Q);
177 76 : Q = mulqi(Q, q, ap, bp);
178 76 : if (cmpiu(q, 2)>=0)
179 64 : return mulqab(Q, subiu(q,1), ap, bp);
180 : else
181 12 : return mulqi(Q, lastq(Q), ap, bp);
182 : }
183 : else
184 : {
185 54291 : *ap = subii(mulii(am, gcoeff(Q,2,2)),mulii(bm, gcoeff(Q,1,2)));
186 54291 : *bp = subii(mulii(bm, gcoeff(Q,1,1)),mulii(am, gcoeff(Q,2,1)));
187 54291 : *ap = addii(*ap, shifti(subii(a0, gcoeff(Q,2,2)), m));
188 54291 : *bp = addii(*bp, shifti(addii(b0, gcoeff(Q,2,1)), m));
189 54291 : if (expi(*ap) >= m+t)
190 53265 : return FIXUP0(Q, ap, bp, m+t);
191 : else
192 1026 : return signe(gcoeff(Q,1,2))==0? Q: mulqi(Q, lastq(Q), ap, bp);
193 : }
194 : }
195 :
196 : static long
197 12758307 : magic_threshold(GEN a)
198 12758307 : { return (3+uexpi(a))>>1; }
199 :
200 : static GEN
201 9530368 : HGCD_basecase(GEN y, GEN x)
202 : {
203 9530368 : pari_sp av = avma;
204 : GEN d, d1, q, r;
205 : GEN u, u1, v, v1;
206 : ulong xu, xu1, xv, xv1; /* Lehmer stage recurrence matrix */
207 : int lhmres; /* Lehmer stage return value */
208 :
209 9530368 : long m = magic_threshold(y);
210 :
211 : /* There is no special case for single-word numbers since this is
212 : * mainly meant to be used with large moduli. */
213 9530286 : if (cmpii(y,x) <= 0)
214 : {
215 92217 : d = x; d1 = y;
216 92217 : u = gen_1; u1 = gen_0;
217 92217 : v = gen_0; v1 = gen_1;
218 : } else
219 : {
220 9438066 : d = y; d1 = x;
221 9438066 : u = gen_0; u1 = gen_1;
222 9438066 : v = gen_1; v1 = gen_0;
223 : }
224 28388194 : while (lgefint(d) > 3 && expi(d1) >= m + BITS_IN_LONG + 1)
225 : {
226 : /* do a Lehmer-Jebelean round */
227 19192728 : lhmres = lgcdii((ulong *)d, (ulong *)d1, &xu, &xu1, &xv, &xv1, 0);
228 :
229 19198807 : if (lhmres)
230 : {
231 18222109 : if (lhmres == 1 || lhmres == -1)
232 : {
233 448178 : if (xv1 == 1)
234 : {
235 386219 : r = subii(d,d1); d = d1; d1 = r;
236 386209 : r = addii(u,u1); u = u1; u1 = r;
237 386036 : r = addii(v,v1); v = v1; v1 = r;
238 : }
239 : else
240 : {
241 61959 : r = subii(d, mului(xv1,d1)); d = d1; d1 = r;
242 61959 : r = addii(u, mului(xv1,u1)); u = u1; u1 = r;
243 61959 : r = addii(v, mului(xv1,v1)); v = v1; v1 = r;
244 : }
245 : }
246 : else
247 : {
248 17773931 : r = subii(muliu(d,xu), muliu(d1,xv));
249 17770991 : d1 = subii(muliu(d,xu1), muliu(d1,xv1)); d = r;
250 17770942 : r = addii(muliu(u,xu), muliu(u1,xv));
251 17767611 : u1 = addii(muliu(u,xu1), muliu(u1,xv1)); u = r;
252 17767568 : r = addii(muliu(v,xu), muliu(v1,xv));
253 17767546 : v1 = addii(muliu(v,xu1), muliu(v1,xv1)); v = r;
254 17770700 : if (lhmres&1) togglesign(d); else togglesign(d1);
255 : }
256 : } /* lhmres != 0 */
257 19195976 : if (expi(d1) < m) break;
258 :
259 18858879 : if (lhmres <= 0 && signe(d1))
260 : {
261 1020890 : q = dvmdii(d,d1,&r);
262 1020902 : d = d1; d1 = r;
263 1020902 : r = addii(u, mulii(q,u1)); u = u1; u1 = r;
264 1020430 : r = addii(v, mulii(q,v1)); v = v1; v1 = r;
265 : }
266 18858451 : if (gc_needed(av,1))
267 : {
268 0 : if(DEBUGMEM>1) pari_warn(warnmem,"ratlift");
269 0 : gerepileall(av, 6, &d, &d1, &u, &u1, &v, &v1);
270 : }
271 : }
272 76506332 : while (expi(d1) >= m)
273 : {
274 66980969 : GEN r, q = dvmdii(d,d1, &r);
275 66991769 : d = d1; d1 = r; swap(u,u1); swap(v,v1);
276 66991769 : u1 = addii(mulii(u, q), u1);
277 66975658 : v1 = addii(mulii(v, q), v1);
278 : }
279 9523410 : return gerepilecopy(av, mkvec3(mkmat22(u1,u,v1,v), d, d1));
280 : }
281 :
282 : static GEN HGCD(GEN x, GEN y);
283 :
284 : /*
285 : Based on
286 : Klaus Thull and Chee K. Yap,
287 : A unified approach to HGCD algorithms for polynomials andintegers,
288 : 1990, Manuscript.
289 : URL: http://cs.nyu.edu/cs/faculty/yap/papers.
290 : */
291 :
292 : static GEN
293 112310 : HGCD_split(GEN a, GEN b)
294 : {
295 112310 : pari_sp av = avma;
296 112310 : long m = magic_threshold(a), t, l, k, tp;
297 : GEN a0, b0, ap, bp, c, d, c0, d0, cp, dp, R, S, T, q, r;
298 112202 : if (signe(b) < 0 || cmpii(a,b)<0) pari_err_BUG("HGCD_split");
299 112298 : if (expi(b) < m)
300 430 : return gerepilecopy(av, mkvec3(matid2(), a, b));
301 111801 : a0 = addiu(shifti(a, -m), 1);
302 111927 : if (cmpiu(a0,7) <= 0)
303 : {
304 0 : R = FIXUP0(matid2(), &a, &b, m);
305 0 : return gerepilecopy(av, mkvec3(R, a, b));
306 : }
307 111880 : b0 = shifti(b,-m);
308 112050 : t = magic_threshold(a0);
309 111767 : R = FIXUP1(HGCD(a0,b0),a, b, m, t, &ap, &bp);
310 111379 : if (expi(bp) < m)
311 56660 : return gerepilecopy(av, mkvec3(R, ap, bp));
312 54688 : q = dvmdii(ap, bp, &r);
313 54688 : c = bp; d = r;
314 54688 : if (cmpiu(shifti(c,-m),6) <= 0)
315 : {
316 21 : R = FIXUP0(mulq(R, q), &c, &d, m);
317 21 : return gerepilecopy(av, mkvec3(R, c, d));
318 : }
319 54667 : l = uexpi(c);
320 54667 : k = 2*m-l-1; if (k<0) pari_err_BUG("halfgcd");
321 54667 : c0 = addiu(shifti(c, -k), 1); if (cmpiu(c0,8)<0) pari_err_BUG("halfgcd");
322 54667 : d0 = shifti(d, -k);
323 54667 : tp = magic_threshold(c0);
324 54667 : S = FIXUP1(HGCD(c0,d0), c, d, k, tp, &cp, &dp);
325 54667 : if (!(expi(cp)>=m+1 && m+1 > expi(dp))) pari_err_BUG("halfgcd");
326 54667 : T = FIXUP0(ZM2_mul(mulq(R, q), S), &cp, &dp, m);
327 54667 : return gerepilecopy(av, mkvec3(T, cp, dp));
328 : }
329 :
330 : static GEN
331 9642443 : HGCD(GEN x, GEN y)
332 : {
333 9642443 : if (lgefint(y)-2 < HALFGCD_LIMIT)
334 9530360 : return HGCD_basecase(x, y);
335 : else
336 112083 : return HGCD_split(x, y);
337 : }
338 :
339 : static GEN
340 16839162 : HGCD0(GEN x, GEN y)
341 : {
342 16839162 : if (signe(y) >= 0 && cmpii(x, y) >= 0)
343 9472262 : return HGCD(x, y);
344 7366918 : if (cmpii(x, y) < 0)
345 : {
346 6829448 : GEN M = HGCD0(y, x), Q = gel(M,1);
347 6828370 : return mkvec3(mkmat22(gcoeff(Q,2,1),gcoeff(Q,2,2),gcoeff(Q,1,1),gcoeff(Q,1,2)),
348 6829622 : gel(M,2),gel(M,3));
349 : } /* Now y <= x*/
350 537716 : if (signe(x) <= 0)
351 : { /* y <= x <=0 */
352 3898 : GEN M = HGCD(negi(y), negi(x)), Q = gel(M,1);
353 3898 : return mkvec3(mkmat22(negi(gcoeff(Q,2,1)),negi(gcoeff(Q,2,2)),
354 3898 : negi(gcoeff(Q,1,1)),negi(gcoeff(Q,1,2))),
355 3898 : gel(M,2),gel(M,3));
356 : }
357 : else /* y <= 0 <=x */
358 : {
359 533818 : GEN M = HGCD0(x, negi(y)), Q = gel(M,1);
360 533818 : return mkvec3(mkmat22(gcoeff(Q,1,1),gcoeff(Q,1,2),negi(gcoeff(Q,2,1)),negi(gcoeff(Q,2,2))),
361 533818 : gel(M,2),gel(M,3));
362 : }
363 : }
364 :
365 : GEN
366 5643327 : halfgcdii(GEN A, GEN B)
367 : {
368 5643327 : pari_sp av = avma;
369 5643327 : GEN M, Q, a, b, m = abscmpii(A, B)>0 ? A: B;
370 5643404 : M = HGCD0(A,B); Q = gel(M,1); a = gel(M,2); b = gel(M,3);
371 8074733 : while (signe(b) && abscmpii(sqri(b), m) >= 0)
372 : {
373 2432418 : GEN r, q = dvmdii(a, b, &r);
374 2432416 : a = b; b = r;
375 2432416 : Q = mulq(Q, q);
376 : }
377 5642611 : return gerepilecopy(av, mkvec2(ZM_inv2(Q),mkcol2(a,b)));
378 : }
|