The C++ toolkit you'll keep reusing

Every snippet below is something you'll write in real contest rounds. Copy them into a personal template, type them by hand once or twice, then trust the muscle memory.

Why C++. Python is fine for Bronze, tolerable for Silver, and an uphill battle at Gold. By Platinum, almost every accepted solution is C++. Switching languages mid-contest is too expensive — pick C++ now.

Personal template

A reasonable starting main.cpp. Copy at the start of every contest problem.

#include <bits/stdc++.h>
using namespace std;

#define all(x) (x).begin(), (x).end()
#define rall(x) (x).rbegin(), (x).rend()
#define sz(x) (int)(x).size()

using ll  = long long;
using ull = unsigned long long;
using pii = pair<int,int>;
using pll = pair<ll,ll>;

const int INF = 0x3f3f3f3f;
const ll  LINF = 0x3f3f3f3f3f3f3f3fLL;

int main() {
    ios_base::sync_with_stdio(false);
    cin.tie(nullptr);

    int n; cin >> n;
    vector<int> a(n);
    for (auto& x : a) cin >> x;

    // ... solve ...
}

<bits/stdc++.h> pulls in everything; that's fine in contests. The sync_with_stdio(false) plus cin.tie(nullptr) pair speeds cin/cout up to roughly scanf/printf levels.

Fast I/O

Always start with ios_base::sync_with_stdio(false); cin.tie(nullptr);.
Don't mix scanf with cin after the sync is off.
For very large output (10⁶+ tokens), prefer a single ostringstream or pre-built string, then one cout <<.
Use "\n" instead of endl — endl flushes after every line.

STL containers, in order of how often you use them

Container	What it is	When
`vector<T>`	Dynamic array, contiguous	Almost everything. Default container.
`array<T,N>`	Fixed-size array	When N is a compile-time constant and you want stack allocation.
`pair<A,B>` / `tuple`	Lightweight bundle	Storing edges `(u,v,w)`, points `(x,y)`, sorting by composite key.
`set<T>` / `multiset`	Balanced BST, O(log n) ops	Dynamic ordered set, `lower_bound` on changing data, dynamic kth element.
`map<K,V>` / `unordered_map`	Keyed storage	Counting, grouping, lookup. `unordered_map` is faster on average but vulnerable to anti-hash tests.
`priority_queue<T>`	Binary heap (max by default)	Dijkstra, greedy "pick smallest/largest next".
`deque<T>`	Double-ended queue	BFS queue, monotonic deque for sliding-window min/max.
`stack<T>` / `queue<T>`	Single-end wrappers over deque	DFS via stack, BFS via queue (or just use deque directly).
`bitset<N>`	Packed bool array, 64× faster	Subset DP up to N≈10⁵, "reachability" queries.

Snippets you'll copy a lot

Iterating with index

for (int i = 0; i < (int)a.size(); ++i) { /* ... */ }
for (int i = 0; i < n; ++i) for (int j = 0; j < m; ++j) { /* ... */ }

Reading a graph (adjacency list)

int n, m;
cin >> n >> m;
vector<vector<int>> adj(n + 1);
for (int i = 0; i < m; ++i) {
    int u, v; cin >> u >> v;
    adj[u].push_back(v);
    adj[v].push_back(u);  // omit for directed
}

Iterative BFS

vector<int> dist(n + 1, -1);
queue<int> q;
q.push(start); dist[start] = 0;
while (!q.empty()) {
    int u = q.front(); q.pop();
    for (int v : adj[u]) if (dist[v] == -1) {
        dist[v] = dist[u] + 1;
        q.push(v);
    }
}

Iterative DFS (avoids stack overflow at n=10⁵)

vector<bool> seen(n + 1, false);
stack<int> st;
st.push(start);
while (!st.empty()) {
    int u = st.top(); st.pop();
    if (seen[u]) continue;
    seen[u] = true;
    for (int v : adj[u]) if (!seen[v]) st.push(v);
}

Dijkstra (non-negative weights)

vector<ll> dist(n + 1, LINF);
priority_queue<pair<ll,int>, vector<pair<ll,int>>, greater<>> pq;
dist[s] = 0; pq.push({0, s});
while (!pq.empty()) {
    auto [d, u] = pq.top(); pq.pop();
    if (d > dist[u]) continue;
    for (auto [v, w] : adj[u]) {
        if (dist[u] + w < dist[v]) {
            dist[v] = dist[u] + w;
            pq.push({dist[v], v});
        }
    }
}

Union-Find (DSU)

struct DSU {
    vector<int> p, r;
    DSU(int n) : p(n), r(n, 0) { iota(p.begin(), p.end(), 0); }
    int find(int x) { return p[x] == x ? x : p[x] = find(p[x]); }
    bool unite(int a, int b) {
        a = find(a); b = find(b);
        if (a == b) return false;
        if (r[a] < r[b]) swap(a, b);
        p[b] = a;
        if (r[a] == r[b]) r[a]++;
        return true;
    }
};

Fenwick tree (BIT) for prefix sums

struct BIT {
    int n; vector<ll> f;
    BIT(int n) : n(n), f(n + 1, 0) {}
    void add(int i, ll v) { for (; i <= n; i += i & -i) f[i] += v; }
    ll sum(int i) { ll s = 0; for (; i > 0; i -= i & -i) s += f[i]; return s; }
    ll range(int l, int r) { return sum(r) - sum(l - 1); }
};

Binary search on the answer

auto ok = [&](ll x) -> bool { /* can we achieve x? */ };
ll lo = 1, hi = 1e18;
while (lo < hi) {
    ll mid = lo + (hi - lo) / 2;
    if (ok(mid)) hi = mid;
    else         lo = mid + 1;
}
// lo is the smallest x for which ok(x) is true

Coordinate compression

vector<int> vals = a;  // copy of values to compress
sort(all(vals));
vals.erase(unique(all(vals)), vals.end());
auto cidx = [&](int x) { return lower_bound(all(vals), x) - vals.begin(); };

Segment tree (point update, range sum)

struct SegTree {
    int n; vector<ll> t;
    SegTree(int n) : n(n), t(4 * n, 0) {}
    void update(int p, int l, int r, int i, ll v) {
        if (l == r) { t[p] = v; return; }
        int m = (l + r) / 2;
        if (i <= m) update(2*p, l, m, i, v);
        else        update(2*p+1, m+1, r, i, v);
        t[p] = t[2*p] + t[2*p+1];
    }
    ll query(int p, int l, int r, int ql, int qr) {
        if (qr < l || r < ql) return 0;
        if (ql <= l && r <= qr) return t[p];
        int m = (l + r) / 2;
        return query(2*p, l, m, ql, qr) + query(2*p+1, m+1, r, ql, qr);
    }
    void update(int i, ll v) { update(1, 0, n - 1, i, v); }
    ll query(int l, int r) { return query(1, 0, n - 1, l, r); }
};

Swap + for any associative op (min, max, gcd) to get other range queries.

Sliding window min / max (monotonic deque)

// minimum of every window of size k in array a[0..n-1]
deque<int> dq;
vector<int> res;
for (int i = 0; i < n; ++i) {
    while (!dq.empty() && a[dq.back()] >= a[i]) dq.pop_back();
    dq.push_back(i);
    if (dq.front() <= i - k) dq.pop_front();
    if (i >= k - 1) res.push_back(a[dq.front()]);
}

Two pointers (longest window satisfying a predicate)

int l = 0;
ll cur = 0, best = 0;
for (int r = 0; r < n; ++r) {
    cur += a[r];                       // expand right
    while (cur > K) cur -= a[l++];     // shrink while invalid
    best = max(best, (ll)(r - l + 1));
}

Prefix sums (1-D and 2-D)

// 1-D
vector<ll> ps(n + 1, 0);
for (int i = 0; i < n; ++i) ps[i + 1] = ps[i] + a[i];
// range [l, r] inclusive: ps[r + 1] - ps[l]

// 2-D
vector<vector<ll>> ps2(R + 1, vector<ll>(C + 1, 0));
for (int i = 0; i < R; ++i) for (int j = 0; j < C; ++j)
    ps2[i + 1][j + 1] = a[i][j] + ps2[i][j + 1] + ps2[i + 1][j] - ps2[i][j];
// rectangle [r1, c1] - [r2, c2] inclusive:
// ps2[r2 + 1][c2 + 1] - ps2[r1][c2 + 1] - ps2[r2 + 1][c1] + ps2[r1][c1]

String hashing (polynomial, single mod)

const ll HMOD = (1LL << 61) - 1;  // Mersenne; use uint64_t math carefully
const ll BASE = 131;
vector<ll> pw, h;

void buildHash(const string& s) {
    int n = s.size();
    pw.assign(n + 1, 1); h.assign(n + 1, 0);
    for (int i = 0; i < n; ++i) {
        pw[i + 1] = pw[i] * BASE % HMOD;
        h[i + 1]  = (h[i] * BASE + s[i]) % HMOD;
    }
}
ll subHash(int l, int r) {  // s[l..r] inclusive, 0-indexed
    return (h[r + 1] - h[l] * pw[r - l + 1] % HMOD + HMOD * HMOD) % HMOD;
}

For real contests, double-hash (two independent moduli) is safer against anti-hash test data.

Modular exponentiation

const ll MOD = 1'000'000'007;
ll mpow(ll b, ll e, ll m = MOD) {
    ll r = 1 % m;
    b %= m;
    while (e > 0) {
        if (e & 1) r = r * b % m;
        b = b * b % m;
        e >>= 1;
    }
    return r;
}
ll minv(ll a, ll m = MOD) { return mpow(a, m - 2, m); }  // m prime

Common gotchas

Integer overflow. When values reach 10⁹ and you multiply two of them, you need long long. Watch out for a * b where both are int — cast at least one with (ll)a * b.
1-indexed vs 0-indexed. Pick one per problem and stick with it. Mixing is the #1 source of off-by-ones.
Uninitialized arrays. Global arrays are zeroed; local arrays are garbage. Initialize explicitly: vector<int> a(n, 0);.
Recursive DFS depth. Default stack is ~1 MB ≈ 64K calls. At n=10⁵ on a chain graph, you'll segfault. Use iterative DFS or compile with a bigger stack.
unordered_map collisions. Adversarial test data can force quadratic behavior. If you use it, salt with a random seed.
cout precision. For floating-point answers, set cout << fixed << setprecision(9);.

Local testing

Build a tiny harness so you can run sample tests without copying into the judge:

// compile with sanitizers when debugging
g++ -std=c++17 -O2 -Wall -Wextra -fsanitize=undefined,address -o sol sol.cpp

// run with input from file
./sol < in.txt

A 5-line brute.cpp that solves small n by brute force, plus a 10-line generator, will catch 90% of "WA on test 4" bugs before submitting. Diff the brute output against the real solution on random inputs.