CometOJ Contest #3 C

题目链接：https://cometoj.com/contest/38/problem/C?problem_id=1542&myself=0&result=0&page=1&contestID=38&problemID=C

题目大意

　　略

准备工作

　　已知原序列为 a₁, a₂……a_n。

　　设 b₁, b₂……b_p 为原序列的一个子序列。

　　定义 Ans(b₁, b₂……b_p) 为对应序列的完美子序列种数。

　　定义 Sum(b₁, b₂……b_p) 为对应序列的所有非空子序列的整数之和。

　　定义 Sum[b₁, b₂……b_p] = b₁ + b₂……+ b_p。

　　找一下规律：Sum(b₁, b₂……b_p) = 2^p-1 * Sum[b₁, b₂……b_p]。

分析1

　　首先说一下，分析1是为分析2服务的，分析1给出的代码很暴力，不仅超时，还超内存，但是最容易理解。

　　一开始我的DP思路是搞一个二维数组，即 dp[i][j] 表示前 i 个元素中，满足序列长度为 j 的完美序列个数。如果我这样定义，那么答案就为$\sum_{i = 1}^n dp[n][i]$。然而转移方程根本不晓得怎么推，还是得把模 m 余数为 0 的子序列都找出来，并且你在算完 dp[i][j] 之后算 dp[i + 1][j + 1] 的时候，之前某些模 m 余数不为 0 的子序列在加入 a[i + 1] 之后就有可能为 0，因此子序列模 m 的余数也应该作为一个维度。

　　重新定义一下dp数组：dp[i][j][k]，表示前 i 个元素中，满足序列长度为 j ，模 m 余数为 k 的子序列个数。这样的话答案就为$\sum_{j = 1}^n dp[n][j][0]$。

　　在这个定义下，如果我们已经算出来了 dp[i][j][k]，那么对于 a[i + 1]，有拿和不拿两种选择，对应递推公式如下：

拿：dp[i + 1][j + 1][(2 * k + a[i+1] * 2^j) % m] += dp[i][j][k]。
不拿：dp[i + 1][j][k] = dp[i][j][k]。

　　初始 dp[0][0][0] = 1，因为 m | 0 。

　　为了减少空间占用，可以使用滚动数组。

代码如下

 #include <bits/stdc++.h>

 using namespace std;

 #define INIT() std::ios::sync_with_stdio(false);std::cin.tie(0);

 #define Rep(i,n) for (int i = 0; i < (n); ++i)

 #define For(i,s,t) for (int i = (s); i <= (t); ++i)

 #define rFor(i,t,s) for (int i = (t); i >= (s); --i)

 #define ForLL(i, s, t) for (LL i = LL(s); i <= LL(t); ++i)

 #define rForLL(i, t, s) for (LL i = LL(t); i >= LL(s); --i)

 #define foreach(i,c) for (__typeof(c.begin()) i = c.begin(); i != c.end(); ++i)

 #define rforeach(i,c) for (__typeof(c.rbegin()) i = c.rbegin(); i != c.rend(); ++i)

 #define pr(x) cout << #x << " = " << x << "  "

 #define prln(x) cout << #x << " = " << x << endl

 #define LOWBIT(x) ((x)&(-x))

 #define ALL(x) x.begin(),x.end()

 #define INS(x) inserter(x,x.begin())

 #define ms0(a) memset(a,0,sizeof(a))

 #define msI(a) memset(a,inf,sizeof(a))

 #define msM(a) memset(a,-1,sizeof(a))

 #define mcy(d,s) memcpy(d, s, sizeof(s))

 #define MP make_pair

 #define PB push_back

 #define ft first

 #define sd second

 template<typename T1, typename T2>

 istream &operator>>(istream &in, pair<T1, T2> &p) {

     in >> p.first >> p.second;

     return in;

 }

 template<typename T>

 istream &operator>>(istream &in, vector<T> &v) {

     for (auto &x: v)

         in >> x;

     return in;

 }

 template<typename T1, typename T2>

 ostream &operator<<(ostream &out, const std::pair<T1, T2> &p) {

     out << "[" << p.first << ", " << p.second << "]" << "\n";

     return out;

 }

 inline int gc(){

     static const int BUF = 1e7;

     static char buf[BUF], *bg = buf + BUF, *ed = bg;

     if(bg == ed) fread(bg = buf, , BUF, stdin);

     return *bg++;

 } 

 inline int ri(){

     int x = , f = , c = gc();

     for(; c<||c>; f = c=='-'?-:f, c=gc());

     for(; c>&&c<; x = x* + c - , c=gc());

     return x*f;

 }

 typedef long long LL;

 typedef unsigned long long uLL;

 typedef pair< double, double > PDD;

 typedef pair< int, int > PII;

 typedef pair< string, int > PSI;

 typedef set< int > SI;

 typedef vector< int > VI;

 typedef map< int, int > MII;

 typedef pair< LL, LL > PLL;

 typedef vector< LL > VL;

 typedef vector< VL > VVL;

 const double EPS = 1e-;

 const LL inf = 0x7fffffff;

 const LL infLL = 0x7fffffffffffffffLL;

 const LL mod = 1e9 + ;

 const int maxN = 5e3 + ;

 const LL ONE = ;

 const LL evenBits = 0xaaaaaaaaaaaaaaaa;

 const LL oddBits = 0x5555555555555555;

 // Calculate x^y % p

 inline LL pow_mod(LL x, LL y, LL p = mod){

     LL ret = ;

     while(y){

         if(y & ) ret = (ret * x) % p;

         x = (x * x) % p;

         y >>= ;

     }

     return ret;

 } 

 int n, m, a[maxN];

 LL ans;

 // dp[i][j][k] 表示在前i个元素组成的序列中，选择了j个数，结果为k(mod m)的序列种数

 LL dp[][maxN][maxN];

 int main(){

     INIT();

     cin >> n >> m;

     For(i, , n) cin >> a[i];

     int now; //< 滚动数组标识

     dp[][][] = ; //< 算上空序列，方便计算

     For(i, , n) {

         now = i % ;

         mcy(dp[now], dp[!now]);

         For(j, , n) { // 枚举子数列长度

             Rep(k, m) { // 枚举余数

                 // 选a[i]

                 int tmp = ( * k + a[i] * pow_mod(, j, m)) % m; //< 产生新的余数

                 dp[now][j + ][tmp] += dp[!now][j][k];

                 // 不选a[i]不必讨论，因为 mcy 函数复制的同时搞定了这个分支

                 // dp[now][j][k] = dp[!now][j][k];

             }

         }

     }

     For(j, , n) ans = (ans + dp[now][j][]) % mod;

     cout << ans << endl;

     return ;

 }

分析2

　　分析2提供了两点优化，但程序仍然通过不了（爆内存），分析3将讨论如何降低空间复杂度。

　　设 x 为 m 中质因子 2 的个数。

　　由于 m <= 5000，所以 0 <= x <= 12。

　　则 m 可写成：

$$
\begin{align*}
m &= 2^0 * q_0 \\
&= 2^1 * q_1 \\
&……… \\
&……… \\
&= 2^x * q_x
\end{align*}
$$

　　如果序列 (b₁, b₂……b_p) 是完美的，一定有 m | Sum(b₁, b₂……b_p)，也就是说一定有：

$$
\begin{align*}
&2^0 | 2^{p-1}\quad and\quad q_0 | Sum[b1, b2……bp] \\
or\quad &2^1 | 2^{p-1}\quad and\quad q_1 | Sum[b1, b2……bp] \\
or\quad &……………………………… \\
or\quad &……………………………… \\
or\quad &2^x | 2^{p-1}\quad and\quad q_x | Sum[b1, b2……bp]
\end{align*}
$$

　　上面至少有一个式子是成立的。

　　可以发现，当 p > x 时，要证明序列 (b₁, b₂……b_p) 是完美的，只要$q_x | Sum[b1, b2……bp]$成立即可。

　　又，在 0 <= p <= x 时，要证明序列 (b₁, b₂……b_p) 是完美的，只要$q_p | Sum[b1, b2……bp]$成立即可。

　　因此，当 p > x 时，只要讨论 $m = 2^x * q_x$的拆分情况即可；当 0 <= p <= x 时，只要讨论 $m = 2^p * q_p$ 的拆分情况即可。

　　也就是说，对于每一个子序列，都有唯一一个判定式$q_j | Sum[b1, b2……bp]$(0 <= j <= x) 能判断这个序列是否是完美的。

　　于是 dp 数组的第三维就不用算出整个和然后模 m 了，而只要算出 Sum[b1, b2……bp] 然后去模对应的 q_j 即可。

　　以上是第一个优化。

　　第二个优化是关于枚举余数部分，其实并没有必要从 0 枚举到 m - 1，只需要枚举到 min(m, q_j * 2) 即可，由于第一个优化的关系，随着 j 的增加， q_j 两倍两倍地减小，上一级产生的余数刚好为这一级的两倍，所以在不超过 m 的情况下，只需枚举到 q_j * 2。

代码如下

 #include <bits/stdc++.h>

 using namespace std;

 #define INIT() std::ios::sync_with_stdio(false);std::cin.tie(0);

 #define Rep(i,n) for (int i = 0; i < (n); ++i)

 #define For(i,s,t) for (int i = (s); i <= (t); ++i)

 #define rFor(i,t,s) for (int i = (t); i >= (s); --i)

 #define ForLL(i, s, t) for (LL i = LL(s); i <= LL(t); ++i)

 #define rForLL(i, t, s) for (LL i = LL(t); i >= LL(s); --i)

 #define foreach(i,c) for (__typeof(c.begin()) i = c.begin(); i != c.end(); ++i)

 #define rforeach(i,c) for (__typeof(c.rbegin()) i = c.rbegin(); i != c.rend(); ++i)

 #define pr(x) cout << #x << " = " << x << "  "

 #define prln(x) cout << #x << " = " << x << endl

 #define LOWBIT(x) ((x)&(-x))

 #define ALL(x) x.begin(),x.end()

 #define INS(x) inserter(x,x.begin())

 #define ms0(a) memset(a,0,sizeof(a))

 #define msI(a) memset(a,inf,sizeof(a))

 #define msM(a) memset(a,-1,sizeof(a))

 #define mcy(d,s) memcpy(d, s, sizeof(s))

 #define MP make_pair

 #define PB push_back

 #define ft first

 #define sd second

 template<typename T1, typename T2>

 istream &operator>>(istream &in, pair<T1, T2> &p) {

     in >> p.first >> p.second;

     return in;

 }

 template<typename T>

 istream &operator>>(istream &in, vector<T> &v) {

     for (auto &x: v)

         in >> x;

     return in;

 }

 template<typename T1, typename T2>

 ostream &operator<<(ostream &out, const std::pair<T1, T2> &p) {

     out << "[" << p.first << ", " << p.second << "]" << "\n";

     return out;

 }

 inline int gc(){

     static const int BUF = 1e7;

     static char buf[BUF], *bg = buf + BUF, *ed = bg;

     if(bg == ed) fread(bg = buf, , BUF, stdin);

     return *bg++;

 } 

 inline int ri(){

     int x = , f = , c = gc();

     for(; c<||c>; f = c=='-'?-:f, c=gc());

     for(; c>&&c<; x = x* + c - , c=gc());

     return x*f;

 }

 typedef long long LL;

 typedef unsigned long long uLL;

 typedef pair< double, double > PDD;

 typedef pair< int, int > PII;

 typedef pair< string, int > PSI;

 typedef set< int > SI;

 typedef vector< int > VI;

 typedef map< int, int > MII;

 typedef pair< LL, LL > PLL;

 typedef vector< LL > VL;

 typedef vector< VL > VVL;

 const double EPS = 1e-;

 const LL inf = 0x7fffffff;

 const LL infLL = 0x7fffffffffffffffLL;

 const LL mod = 1e9 + ;

 const int maxN = 5e3 + ;

 const LL ONE = ;

 const LL evenBits = 0xaaaaaaaaaaaaaaaa;

 const LL oddBits = 0x5555555555555555;

 // 计算x的二进制位数

 inline int getBits(LL x) {

     int cnt = ;

     while(x >>= ) ++cnt;

     return cnt;

 } 

 int n, m, a[maxN];

 LL ans;

 // dp[i][j][k] 表示在前i个元素组成的序列中，选择了j个数，结果为k(mod m)的序列种数

 LL dp[][maxN][maxN];

 // pow2[i] = 2^i

 int pow2[] = {, , , , , , , , , , , , , };

 int main(){

     INIT();

     cin >> n >> m;

     For(i, , n) cin >> a[i];

     int x = getBits(LOWBIT(m)) - ; //< x = m中质因子2的个数

     int now; //< 滚动数组标识 

     dp[][][] = ; //< 算上空序列，方便计算

     For(i, , n) {

         now = i % ;

         mcy(dp[now], dp[!now]);

         For(j, , n) { // 枚举子数列长度

             int q = m / pow2[x];

             if(j < x) q = m / pow2[j];

             Rep(k, min(m, q << )) { // 枚举余数

                 // 选a[i]

                 int tmp = (k + a[i]) % q;

                 if(j < x) dp[now][j + ][tmp] += dp[!now][j][k];

                 else dp[now][j + ][tmp] += dp[!now][j][k];

                 // 不选a[i]不必讨论，因为 mcy 函数复制的同时搞定了这个分支

                 // dp[now][j][k] = dp[!now][j][k];

             }

         }

     }

     For(j, , n) ans = (ans + dp[now][j][]) % mod;

     cout << ans << endl;

     return ;

 }

分析3

　　分析2中，当 p >= x 时，序列的完美只与$q_x | Sum[b1, b2……bp]$有关，即不同 dp[i][j][k] （x <= j <= n）的 k 都是序列对 q_x 取模得到的，余数相同是可以合并的，并不需要专门有个 j 来标识不同，因此可以用 dp[i][x][k] 来代表 $\sum_{j = x}^n dp[i][j][k]$。于是，dp数组就有如下分段定义：

当 0 <= j < x，时，dp[i][j][k] 表示前 i 个元素中，满足序列长度为 j ，模 q_j 余数为 k 的子序列个数。
当 j == x，时，dp[i][j][k] 表示前 i 个元素中，满足所有序列长度大于等于 j ，模 q_x 余数为 k 的子序列个数。

　　这里只叙述一下 j == x 时的状态转移方程：

拿：dp[i + 1][j][(k + a[i+1]) % q_x] += dp[i][j][k]。
不拿：dp[i + 1][j][k] = dp[i][j][k]。

　　PS：x 是能等于 0 的，所以在计算答案时要变化一下。

代码如下

 #include <bits/stdc++.h>

 using namespace std;

 #define INIT() std::ios::sync_with_stdio(false);std::cin.tie(0);

 #define Rep(i,n) for (int i = 0; i < (n); ++i)

 #define For(i,s,t) for (int i = (s); i <= (t); ++i)

 #define rFor(i,t,s) for (int i = (t); i >= (s); --i)

 #define ForLL(i, s, t) for (LL i = LL(s); i <= LL(t); ++i)

 #define rForLL(i, t, s) for (LL i = LL(t); i >= LL(s); --i)

 #define foreach(i,c) for (__typeof(c.begin()) i = c.begin(); i != c.end(); ++i)

 #define rforeach(i,c) for (__typeof(c.rbegin()) i = c.rbegin(); i != c.rend(); ++i)

 #define pr(x) cout << #x << " = " << x << "  "

 #define prln(x) cout << #x << " = " << x << endl

 #define LOWBIT(x) ((x)&(-x))

 #define ALL(x) x.begin(),x.end()

 #define INS(x) inserter(x,x.begin())

 #define ms0(a) memset(a,0,sizeof(a))

 #define msI(a) memset(a,inf,sizeof(a))

 #define msM(a) memset(a,-1,sizeof(a))

 #define mcy(d,s) memcpy(d, s, sizeof(s))

 #define MP make_pair

 #define PB push_back

 #define ft first

 #define sd second

 template<typename T1, typename T2>

 istream &operator>>(istream &in, pair<T1, T2> &p) {

     in >> p.first >> p.second;

     return in;

 }

 template<typename T>

 istream &operator>>(istream &in, vector<T> &v) {

     for (auto &x: v)

         in >> x;

     return in;

 }

 template<typename T1, typename T2>

 ostream &operator<<(ostream &out, const std::pair<T1, T2> &p) {

     out << "[" << p.first << ", " << p.second << "]" << "\n";

     return out;

 }

 inline int gc(){

     static const int BUF = 1e7;

     static char buf[BUF], *bg = buf + BUF, *ed = bg;

     if(bg == ed) fread(bg = buf, , BUF, stdin);

     return *bg++;

 } 

 inline int ri(){

     int x = , f = , c = gc();

     for(; c<||c>; f = c=='-'?-:f, c=gc());

     for(; c>&&c<; x = x* + c - , c=gc());

     return x*f;

 }

 typedef long long LL;

 typedef unsigned long long uLL;

 typedef pair< double, double > PDD;

 typedef pair< int, int > PII;

 typedef pair< string, int > PSI;

 typedef set< int > SI;

 typedef vector< int > VI;

 typedef map< int, int > MII;

 typedef pair< LL, LL > PLL;

 typedef vector< LL > VL;

 typedef vector< VL > VVL;

 const double EPS = 1e-;

 const LL inf = 0x7fffffff;

 const LL infLL = 0x7fffffffffffffffLL;

 const LL mod = 1e9 + ;

 const int maxN = 5e3 + ;

 const LL ONE = ;

 const LL evenBits = 0xaaaaaaaaaaaaaaaa;

 const LL oddBits = 0x5555555555555555;

 // 计算x的二进制位数

 inline int getBits(LL x) {

     int cnt = ;

     while(x >>= ) ++cnt;

     return cnt;

 } 

 int n, m, a[maxN];

 LL ans;

 LL dp[][][maxN];

 // pow2[i] = 2^i

 int pow2[] = {, , , , , , , , , , , , , };

 int main(){

     INIT();

     cin >> n >> m;

     For(i, , n) cin >> a[i];

     int x = getBits(LOWBIT(m)) - ; //< x = m中质因子2的个数

     int now; //< 滚动数组标识 

     dp[][][] = ; //< 算上空序列，方便计算

     For(i, , n) {

         now = i % ;

         mcy(dp[now], dp[!now]);

         For(j, , min(x, i)) {

             int q = m / pow2[j];

             Rep(k, min(m, q << )) { //< k表示可能用到的余数

                 if(dp[!now][j][k]) {

                     int tmp = (k + a[i]) % q; //< 产生新余数

                     // j < x 代表取了 j 个的情况

                     // j == x 代表取了 j 个以上的情况

                     if(j < x) dp[now][j + ][tmp] += dp[!now][j][k] % mod;

                     else dp[now][j][tmp] += dp[!now][j][k] % mod;

                 }

             }

         }

     }

     // 这里做了一下改变，因为x可能等于0，如果从1开始加的话会漏掉很多情况

     For(j, , x) ans = (ans + dp[now][j][]) % mod;

     cout << ans -  << endl; //< 最后要减去空序列的一种

     return ;

 }

 /*

 15 11

 1 2 5 7 8 4 3 7 8 3 3 9 9 7 5

 2979

 16 8

 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1

 64839

 15 6

 1 2 5 7 8 4 3 7 8 3 3 9 9 7 5

 6547

 15 6

 1 2 5 7 8 4 3 7 8 3 3 9 9 7 5

 10938

 */