Let $f(l, r) = gcd(a_l, \dots, a_r)$.
As $a_i \le 10^9$, there are at most $30$ distinct values among $f(1, r), \dots, f(r, r)$.
Similiarly, there are at most $30$ distinct values among $f(l, l), \dots, f(l, n)$.
Record the partition points, and computing $f$ can be replaced by binary search.
This lead to a $O(n\log C + qn^2\log\log C)$ algorithm.
int a[100000];
struct P { int idx, val; };
std::vector<P> v[100000];
void build(int n) {
std::vector<P> vc;
for (int i = 0; i < n; i++) {
for (P &p : vc)
p.val = gcd(p.val, a[i]);
vc.push_back({i, a[i]});
vc.erase(vc.begin(), std::unique(vc.rbegin(), vc.rend(),
[](P lhs, P rhs){ return lhs.val == rhs.val; }).base());
v[i] = vc;
}
}
long solve(int l, int r) {
long ans = 0;
for (int i = l; i < r; i++) {
for (int j = l; j <= i; j++)
ans += std::partition_point(v[i].begin(), v[i].end(),
[j](P p) { return p.idx < j; })->val;
}
return ans;
}
The nested loops in solve
can be optimized. For example:
long ans = 0;
for (int i = l; i < r; i++) {
int lb = l;
for (P p : v[i])
if (lb <= p.idx) {
ans += p.val * (p.idx - lb + 1);
lb = p.idx + 1;
}
}
return ans;
This improves the time complexity to $O(qn\log C)$.
It is not hard to improve it to $O((n+q)\log C)$.