【无聊乱搞】如何用 std::set 过 gamma

一道毒瘤题

\(\gamma\) by DPair

题目描述

维护一个正整数集 \(S\)，元素 \(\in\) 值域 \(U\)，需要支持：

\(\texttt{1 l r}\)：\(S\gets S\cup [l,r]\)；
\(\texttt{2 l r}\)：\(S \gets \{x|x\in S \land x\notin [l,r]\}\)；
\(\texttt{3 l r}\)：求满足 \(x\in [l,r]\land x\notin S\) 的最小 \(x\)；
\(\texttt{4 l r}\)：求 \(\sum_{x\in[l,r]}[x\in S]\)。

数据规模

\(1\le U \le 10^{18}\)
\(1\le Q\le 5\times 10^6\)
\(1000\ ms,\texttt{-O2}\)
随机数据

Naive Solution

注意到操作 1、2 相当于区间赋值。

那么不难想到 ODT。然而基于 std::set 的 ODT 实现常数过大，不过手写链表可以通过。

但是为了挑战自我笔者决定使用 std::set 通过这道题。

下面是一份来自 DPair 的 Naive ODT 实现（我自己懒得写）：

struct NODE{

    LL l, r;

    mutable int val;

    NODE (LL tmp1, LL tmp2 = -1, int tmp3 = 0) : l(tmp1), r(tmp2), val(tmp3){}

    inline bool operator < (const NODE &tmp) const{return l < tmp.l;}

};

set <NODE> ODT;

typedef set <NODE> :: iterator IT;

inline IT split(LL x){

    IT it = ODT.lower_bound(NODE(x));

    if(it != ODT.end() && it -> l == x) return it;

    -- it;

    LL L = it -> l, R = it -> r;

    int Val = it -> val;

    ODT.erase(it);

    ODT.insert(NODE(L, x - 1, Val));

    return ODT.insert(NODE(x, R, Val)).first;

}

inline void assign(LL l, LL r, int val){

    IT R = split(r + 1), L = split(l);

    ODT.erase(L, R);

    ODT.insert(NODE(l, r, val));

}

inline LL query1(LL l, LL r){

    IT R = split(r + 1), L = split(l);

    LL ret = 0;

    while(L != R){

        ret += (L -> r - L -> l + 1) * (L -> val);

        ++ L;

    }

    return ret;

}

inline LL query2(LL l, LL r){

    IT R = split(r + 1), L = split(l);

    LL ret = 0;

    while(L != R){

        if(!(L -> val)) return L -> l;

        ++ L;

    }

    return ret;

}

上面这份代码复杂度为 \(O(Q\log U)\)，但由于常数被链表吊打。

Improved Solution

我们并不打算更换算法，而是在原来的代码上优化实现。

Improvement #1：只维护一种颜色

考虑到我们的值只有两种：\(0,1\)。那么考虑只保留其中一种值，这样 set 维护的连续段数理论上会减少一半。

那么到底维护 \(0\) 还是 \(1\) 呢？看询问：4 操作其实 \(0,1\) 都差不多，但是 3 操作就不太一样了，如果维护 \(1\) 的话需要找到第一个不连续的位置，如果存在大量虚假的断点（即两个不同的连续段实际上相邻）就很浪费些时间，不过维护 \(0\) 就不太一样了，我们只要找第一个迭代器的左端点就是第一个 \(0\) 的位置，或者左右迭代器相等判断无解。

下面是在原来基础上略加修改的 split 函数：

std::set<std::pair<LL, LL> > odt;

setIt CutItv(LL p) { // make breakpoint in front of position p.(split)

  setIt it = odt.lower_bound(std::make_pair(p, 0));

  if (it == odt.begin()) return it;

  else --it;

  if (it->second >= p) {

    std::pair<LL, LL> rec = *it; odt.erase(it);

    odt.insert(std::make_pair(rec.first, p - 1));

    return odt.insert(std::make_pair(p, rec.second)).first;

  }

  return ++it;

}

Improvement #2：`mutable`

所谓 mutable，即“可变的”，具体解释如下：

mutable 的意思是“可变的”，让我们可以在后面的操作中修改 v 的值。在 C++ 中，mutable 是为了突破 const 的限制而设置的。被 mutable 修饰的变量（mutable 只能用于修饰类中的非静态数据成员），将永远处于可变的状态，即使在一个 const 函数中。这意味着，我们可以直接修改已经插入 set 的元素的 v 值，而不用将该元素取出后重新加入 set 。

——OI Wiki

其中上面 DPair 的实现中也用到了 multable，不过，如上所说，仅仅是修饰了值的变量。

然而其实 r 也是可以 mutable 的，并且在新的 split(CutItv) 实现中也没有用好这个特性，可以发现它可以使我们的 split 少一次 erase、少一次 insert，是非常可观的一个优化。

Improvement #3：`emplace`

在 C++11 中，std::set 中有了一种新的插入元素的方法：emplace。

它和 insert 的功能集合一样（包括返回值），但是 emplace 是原位构造元素，相比 insert 可以避免大量的不必要的复制移动，从而常数进一步得到优化。

详情可见 cppreference - std::set<Key,Compare,Allocator>::emplace

结合优化#2 的代码：

struct Interval {

  LL l; mutable LL r;

  inline Interval(LL l, LL r) : l(l), r(r) { }

  inline bool operator < (const Interval& rhs) const { return l < rhs.l; }

};

std::set<Interval> odt({Interval(1, (LL)1e18)});

std::set<Interval>::iterator CutItv(LL p) {

  auto it = odt.lower_bound(Interval(p, 0ll));

  if (it == odt.begin()) return it;

  else --it;

  if (it->r >= p) {

    LL tr = it->r; it->r = p - 1;

    return odt.emplace(p, tr).first;

  }

  return ++it;

}

Improvement #4：`emplace_hint`

emplace 很快，但 emplace_hint 更快，前提是在用的好的时候。

emplace_hint 相比 emplace 又多了一个参数 hint（一个迭代器），插入操作会在容器中尽可能接近于 hint 的位置进行。这意味着插入操作可以节约很大一部分查找的时间。

emplace_hint 改良实现：

std::set<Interval>::iterator CutItv(LL p) {

  auto it = odt.lower_bound(Interval(p, 0ll));

  if (it == odt.begin()) return it;

  else --it;

  if (it->r >= p) {

    LL tr = it->r; it->r = p - 1;

    return odt.emplace_hint(it, p, tr);

  }

  return ++it;

}

不仅仅是 split 部分，其他设计插入操作的都可以这样操作：

void Insert(LL l, LL r) {

  auto itr = CutItv(r + 1), itl = CutItv(l);

  odt.emplace_hint(odt.erase(itl, itr), l, r);//其实 erase 也有返回值

}

Improvement #5：及时合并虚假断点

也许现在的连续段应该是这样：\([1,100]\)；

但可能你的 std::set 中是这样：\([1, 15],[16,51],\cdots,[81,89] , [89,100]\)。这很难受，白白增大了 set 的大小。

于是我们在 Insert、getMex、getSum 三个操作之后都加一个机制，把 set 中与区间对应的两个迭代器周围相邻的段合并。

实测 \([1, 10^{18}]\) 这样的区间，随机数据下所有时刻 set 的大小的平均值仅为 \(12\)（Navie 的 ODT 实现大小为 \(100\) 左右）。

Final Version

最后又发现 set 中的元素只按左端点排序，右端点有事可变的，于是又有了 std::map 的版本，详见第二个代码：

#include <algorithm>

#include <cstdio>

#include <iterator>

#include <set>

typedef long long LL;

namespace My_Rand{

  int index, MT[624];

  inline void sd(int seed){

    index = 0;

    MT[0] = seed;

    for (register int i = 1;i < 624;i ++){

      int t = 1812433253 * (MT[i - 1] ^ (MT[i - 1] >> 30)) + i;

      MT[i] = t & 0xffffffff;

    }

  }

  inline void rotate(){

    for (register int i = 0;i < 624;i ++){

      int tmp = (MT[i] & 0x80000000) + (MT[(i + 1) % 624] & 0x7fffffff);

      MT[i] = MT[(i + 397) % 624] ^ (tmp >> 1);

      if(tmp & 1) MT[i] ^= 2567483615;

    }

  }

  inline int rd(){

    if(!index) rotate();

    int ret = MT[index];

    ret = ret ^ (ret >> 11);

    ret = ret ^ ((ret << 7) & 2636928640);

    ret = ret ^ ((ret << 15) & 4022730752);

    ret = ret ^ (ret >> 18);

    index = (index + 1) % 624;

    return ret;

  }

  const LL limit = 1000000000;

  inline void gen(int &opt, LL &l, LL &r, LL ans){

    opt = rd() % 4 + 1;

    ans = ans % limit;

    l = ((rd() ^ ans) % limit) * limit + ((rd() ^ ans) % limit);

    r = ((rd() ^ ans) % limit) * limit + ((rd() ^ ans) % limit);

    if(l > r) std::swap(l, r);

  }

} // namespace My_Rand

struct Interval {

  LL l; mutable LL r;

  inline Interval(LL l, LL r) : l(l), r(r) { }

  inline bool operator < (const Interval& rhs) const { return l < rhs.l; }

};

std::set<Interval> odt({Interval(1, (LL)1e18)});

std::set<Interval>::iterator CutItv(LL p) { // make breakpoint in front of position p.

  auto it = odt.lower_bound(Interval(p, 0ll));

  if (it == odt.begin()) return it;

  else --it;

  if (it->r >= p) {

    LL tr = it->r; it->r = p - 1;

    return odt.emplace_hint(it, p, tr);

  }

  return ++it;

}

void Insert(LL l, LL r) {

  auto itr = CutItv(r + 1), itl = CutItv(l);

  auto it = odt.emplace_hint(odt.erase(itl, itr), l, r);

  if (it != odt.begin())

    if (prev(it)->r + 1 == l) prev(it)->r = it->r, it = odt.erase(it);

  if (it != odt.begin())

    if (prev(it)->r + 1 == l) prev(it)->r = it->r, it = odt.erase(it);

}

void Erase(LL l, LL r) {

  auto itr = CutItv(r + 1), itl = CutItv(l);

  odt.erase(itl, itr);

}

LL getMex(LL l, LL r) {

  auto itr = CutItv(r + 1), itl = CutItv(l);

  if (itl == itr) return 0;

  LL ans = itl->l;

  if (itl != odt.begin())

    if (prev(itl)->r + 1 == l) prev(itl)->r = itl->r, odt.erase(itl);

  if (itr != odt.end())

    if (itr->l == r + 1) prev(itr)->r = itr->r, odt.erase(itr);

  return ans;

}

LL getSum(LL l, LL r) {

  auto itr = CutItv(r + 1), itl = CutItv(l);

  LL ret = 0;

  for (auto it = itl; it != itr; it++) ret += it->r - it->l + 1;

  if (itl != odt.begin())

    if (prev(itl)->r + 1 == l) prev(itl)->r = itl->r, odt.erase(itl);

  if (itr != odt.end())

    if (itr->l == r + 1) prev(itr)->r = itr->r, odt.erase(itr);

  return r - l + 1 - ret;

}

signed main() {

  int seed, Q;

  scanf("%d%d", &Q, &seed);

  My_Rand::sd(seed);

  LL last = 0ll, axor = 0ll;

  while (Q--) {

    int opt; LL l, r;

    My_Rand::gen(opt, l, r, last);

    if (opt == 2) Insert(l, r);

    else if (opt == 1) Erase(l, r);

    else if (opt == 3) axor ^= (last = getMex(l, r));

    else axor ^= (last = getSum(l, r));

  }

  printf("%lld\n", axor);

  return 0;

}

#include <algorithm>

#include <cstdio>

#include <map>

typedef long long LL;

namespace My_Rand{

  int index, MT[624];

  inline void sd(int seed){

    index = 0;

    MT[0] = seed;

    for (register int i = 1;i < 624;i ++){

      int t = 1812433253 * (MT[i - 1] ^ (MT[i - 1] >> 30)) + i;

      MT[i] = t & 0xffffffff;

    }

  }

  inline void rotate(){

    for (register int i = 0;i < 624;i ++){

      int tmp = (MT[i] & 0x80000000) + (MT[(i + 1) % 624] & 0x7fffffff);

      MT[i] = MT[(i + 397) % 624] ^ (tmp >> 1);

      if(tmp & 1) MT[i] ^= 2567483615;

    }

  }

  inline int rd(){

    if(!index) rotate();

    int ret = MT[index];

    ret ^= (ret >> 11);

    ret ^= ((ret << 7) & 2636928640);

    ret ^= ((ret << 15) & 4022730752);

    ret ^= (ret >> 18);

    (++index) %= 624;

    return ret;

  }

  const LL limit = 1000000000;

  inline void gen(int &opt, LL &l, LL &r, LL ans){

    opt = (rd() & 3) + 1;

    ans = ans % limit;

    l = ((rd() ^ ans) % limit) * limit + ((rd() ^ ans) % limit);

    r = ((rd() ^ ans) % limit) * limit + ((rd() ^ ans) % limit);

    if(l > r) std::swap(l, r);

  }

} // namespace My_Rand

std::map<LL, LL> odt({std::make_pair(1, (LL)1e18)});

std::map<LL, LL>::iterator CutItv(LL p) {

  auto it = odt.upper_bound(p);

  if (it == odt.begin()) return it;

  if ((--it)->second >= p) {

    LL tr = it->second; it->second = p - 1;

    return odt.emplace_hint(it, p, tr);

  }

  return ++it;

}

void Insert(LL l, LL r) {

  auto itr = CutItv(r + 1), itl = CutItv(l);

  auto it = odt.emplace_hint(--odt.erase(itl, itr), l, r);

  if (it != odt.begin()) if (prev(it)->second + 1 == l)

    prev(it)->second = it->second, it = odt.erase(it);

  if (it != odt.begin()) if (prev(it)->second + 1 == l)

    prev(it)->second = it->second, it = odt.erase(it);

}

void Erase(LL l, LL r) {

  auto itr = CutItv(r + 1), itl = CutItv(l);

  odt.erase(itl, itr);

}

LL getMex(LL l, LL r) {

  auto itr = CutItv(r + 1), itl = CutItv(l);

  if (itl == itr) return 0;

  LL ans = itl->first;

  if (itl != odt.begin()) if (prev(itl)->second + 1 == l)

    prev(itl)->second = itl->second, odt.erase(itl);

  if (itr != odt.end()) if (itr->first == r + 1)

    prev(itr)->second = itr->second, odt.erase(itr);

  return ans;

}

LL getSum(LL l, LL r) {

  auto itr = CutItv(r + 1), itl = CutItv(l);

  LL ans = 0;

  for (auto it = itl; it != itr; it++)

    ans += it->second - it->first + 1;

  if (itl != odt.begin()) if (prev(itl)->second + 1 == l)

    prev(itl)->second = itl->second, odt.erase(itl);

  if (itr != odt.end()) if (itr->first == r + 1)

    prev(itr)->second = itr->second, odt.erase(itr);

  return r - l + 1 - ans;

}

signed main() {

  int seed, Q;

  scanf("%d%d", &Q, &seed);

  My_Rand::sd(seed);

  LL last = 0ll, axor = 0ll;

  ++Q; while (--Q) {

    int opt; LL l, r;

    My_Rand::gen(opt, l, r, last);

    if (opt == 2) Insert(l, r);

    else if (opt == 1) Erase(l, r);

    else if (opt == 3) axor ^= (last = getMex(l, r));

    else axor ^= (last = getSum(l, r));

  }

  return printf("%lld\n", axor), 0;

}

End

这道题就这样卡过去了，甚至比链表还快一点。

也许有人问：为什么不手写平衡树？然而开了 O2 的 std::set 说实话并不比手写慢，而且手写实现难度更大。

所以千万不要低估 STL 的实力，在用得好的情况下并不会逊色于手写 DS。

当然前提是对 STL 足够熟悉，并且能够灵活运用。

后记

原文地址：https://www.cnblogs.com/-Wallace-/p/14019091.html
本文作者：@-Wallace-
转载请附上出处。