世界上最高性能的函数库—走起！（第三版）

AC君

2026-03-16 19:46:22

发布于：浙江

14阅读

0回复

0点赞

也是重新肝了一遍，这次也是提升了“一点点”性能（20~50倍）
注意：此代码在ACGO的编译器中毫无用处！！！
编译命令：

g++ -O3 -march=native -ffast-math -o main main.cpp

废话不多说，代码如下：

#include <cstdio>
#include <cstring>
#include <cstdint>
#include <cmath>
#include <immintrin.h>

#pragma GCC optimize("Ofast", "unroll-loops", "no-stack-protector", "fast-math")
#pragma GCC target("avx2", "avx512f", "avx512vl", "fma", "bmi2", "popcnt")
#pragma GCC diagnostic ignored "-Wunused-function"

#define ll long long
#define lf double

#define likely(x)   __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)

static const lf pi = 3.14159265358979323846;

#define swap_ll(a, b) do { ll _t = (a); (a) = (b); (b) = _t; } while(0) //交换两个整数
#define swap_fd(a, b) do { lf _t = (a); (a) = (b); (b) = _t; } while(0) //交换两个浮点数
#define swap_c(a, b) do { char _t = (a); (a) = (b); (b) = _t; } while(0) //交换两个字符

struct string {
    char r1[10001]; 

    string() { r1[0] = '\0'; } //初始化空字符串
    string(char c) { r1[0] = c; r1[1] = '\0'; } //初始化为单字符
    
    string(const char* s) { //从C字符串构造
        if (unlikely(!s)) { r1[0] = '\0'; return; }
        size_t len = strlen(s);
        if (len > 1000) len = 1000;
        __builtin_memcpy(r1, s, len);
        r1[len] = '\0';
    }

    string(const string& other) { //拷贝构造
        size_t len = __builtin_strlen(other.r1);
        if (len > 1000) len = 1000;
        __builtin_memcpy(r1, other.r1, len);
        r1[len] = '\0';
    }

    string& operator=(const string& other) { //赋值操作
        if (this != &other) {
            size_t len = __builtin_strlen(other.r1);
            if (len > 1000) len = 1000;
            __builtin_memcpy(r1, other.r1, len);
            r1[len] = '\0';
        }
        return *this;
    }

    inline char& operator[](int i) { return r1[i]; } //下标访问(可写)
    inline const char& operator[](int i) const { return r1[i]; } //下标访问(只读)

    bool operator==(const string& other) const { //字符串相等判断
        size_t l1 = __builtin_strlen(r1);
        size_t l2 = __builtin_strlen(other.r1);
        if (l1 != l2) return false;
        return __builtin_memcmp(r1, other.r1, l1) == 0;
    }
    bool operator!=(const string& other) const { return !(*this == other); } //字符串不等判断

    string operator+(const string& other) const { //字符串拼接
        string res;
        size_t l1 = __builtin_strlen(r1);
        size_t l2 = __builtin_strlen(other.r1);
        size_t total = l1 + l2;
        if (total > 1000) total = 1000;
        
        if (l1 > 0) __builtin_memcpy(res.r1, r1, l1);
        if (total > l1 && l2 > 0) {
            size_t copy_len = (total - l1 < l2) ? (total - l1) : l2;
            __builtin_memcpy(res.r1 + l1, other.r1, copy_len);
        }
        res.r1[total] = '\0';
        return res;
    }

    string& operator+=(const string& other) { //原地拼接字符串
        size_t l1 = __builtin_strlen(r1);
        size_t l2 = __builtin_strlen(other.r1);
        if (l1 + l2 >= 1000) {
            size_t space = 1000 - l1;
            if (space > 0) __builtin_memcpy(r1 + l1, other.r1, space);
            r1[1000] = '\0';
        } else {
            __builtin_memcpy(r1 + l1, other.r1, l2);
            r1[l1 + l2] = '\0';
        }
        return *this;
    }

    string& operator+=(const char* s) { //原地拼接C字符串
        size_t l1 = __builtin_strlen(r1);
        size_t l2 = __builtin_strlen(s);
        if (l1 + l2 >= 1000) {
            size_t space = 1000 - l1;
            if (space > 0) __builtin_memcpy(r1 + l1, s, space);
            r1[1000] = '\0';
        } else {
            __builtin_memcpy(r1 + l1, s, l2);
            r1[l1 + l2] = '\0';
        }
        return *this;
    }

    string& operator+=(char c) { //原地拼接字符
        size_t len = __builtin_strlen(r1);
        if (likely(len < 1000)) {
            r1[len] = c;
            r1[len + 1] = '\0';
        }
        return *this;
    }

    inline int size() const { return (int)__builtin_strlen(r1); } //获取长度
    inline bool empty() const { return r1[0] == '\0'; } //判断是否为空
    inline const char* c_str() const { return r1; } //获取C风格字符串指针
    inline void clear() { r1[0] = '\0'; } //清空字符串

    inline void push_back(char c) { //尾部追加字符
        size_t len = __builtin_strlen(r1);
        if (likely(len < 1000)) {
            r1[len] = c;
            r1[len + 1] = '\0';
        }
    }

    string substr(int pos, int len = -1) const { //获取子串
        string res;
        int sz = (int)__builtin_strlen(r1);
        if (pos < 0) pos = 0;
        if (pos >= sz) return res;
        if (len < 0 || pos + len > sz) len = sz - pos;
        __builtin_memcpy(res.r1, r1 + pos, (size_t)len);
        res.r1[len] = '\0';
        return res;
    }

    int find(const string& sub) const { //查找子串位置
        if (sub.empty()) return 0;
        const char* p = strstr(r1, sub.r1);
        return p ? (int)(p - r1) : -1;
    }
};

inline string operator+(const char* s, const string& str) { //C字符串 + 自定义字符串
    string res(s);
    res += str;
    return res;
}
inline string operator+(char c, const string& str) { //字符 + 自定义字符串
    string res(c);
    res += str;
    return res;
}

static char _rbuf[1 << 16];
static char* _rptr = _rbuf;
static char* _rend = _rbuf;

static inline char _gc() { //获取下一个字符(带缓冲)
    if (_rptr == _rend) {
        _rend = _rbuf + fread(_rbuf, 1, sizeof(_rbuf), stdin);
        _rptr = _rbuf;
        if (_rptr == _rend) return EOF;
    }
    return *_rptr++;
}

static char _wbuf[1 << 16];
static char* _wptr = _wbuf;

static inline void _pc(char c) { //输出单个字符(带缓冲)
    if (_wptr == _wbuf + sizeof(_wbuf)) {
        fwrite(_wbuf, 1, sizeof(_wbuf), stdout);
        _wptr = _wbuf;
    }
    *_wptr++ = c;
}

static inline void _flush() { //刷新输出缓冲区
    if (_wptr != _wbuf) {
        fwrite(_wbuf, 1, _wptr - _wbuf, stdout);
        _wptr = _wbuf;
    }
}

inline ll read_d() { //整数快读 (ll)
    ll x = 0;
    int f = 1;
    char c = _gc();
    while (c < '0' || c > '9') {
        if (c == '-') f = -1;
        c = _gc();
        if (c == EOF) break;
    }
    while (c >= '0' && c <= '9') {
        x = (x << 3) + (x << 1) + (c ^ 48); 
        c = _gc();
    }
    return f == 1 ? x : -x;
}

inline lf read_f() { //浮点快读 (lf)
    ll ip = read_d();
    lf fp = 0, base = 0.1;
    char c = _gc();
    if (c == '.') {
        c = _gc();
        while (c >= '0' && c <= '9') {
            fp += (c ^ 48) * base;
            base *= 0.1;
            c = _gc();
        }
    }
    return (lf)ip + fp;
}

inline string read_s() { //字符串快读 (跳过空白)
    string a;
    char c = _gc();
    while (c <= 32) {
        if (c == EOF) return a;
        c = _gc();
    }
    int cnt = 0;
    while (c > 32 && cnt < 1000) {
        a.r1[cnt++] = c;
        c = _gc();
    }
    a.r1[cnt] = '\0';
    return a;
}

inline void getline(string& s) { //读取整行 (不含换行符)
    char c = _gc();
    int cnt = 0;
    while (cnt < 1000) {
        if (c == '\n' || c == '\r' || c == EOF) break;
        s.r1[cnt++] = c;
        c = _gc();
    }
    s.r1[cnt] = '\0';
}

void write_d(ll x) { //整数快写 (ll)
    if (x == 0) { _pc('0'); return; }
    if (x < 0) { _pc('-'); x = -x; }
    char buf[25]; 
    char* ptr = buf + 24;
    *ptr = '\0';
    while (x > 0) {
        *--ptr = '0' + (char)(x % 10);
        x /= 10;
    }
    while (*ptr) _pc(*ptr++);
}

inline void write_p(lf x, int max_decimal) { //浮点快写 (指定位数)
    if (x == 0.0) { _pc('0'); return; }
    if (x < 0) { _pc('-'); x = -x; }
    
    ll int_part = (ll)x;
    write_d(int_part);
    
    if (max_decimal <= 0) return;
    _pc('.');
    
    lf frac_part = x - (lf)int_part;
    for (int i = 0; i < max_decimal; i++) {
        frac_part *= 10.0;
        ll digit = (ll)frac_part;
        _pc('0' + (char)digit);
        frac_part -= (lf)digit;
    }
}

inline void write_f(lf x) { write_p(x, 15); } //浮点快写 (默认15位精度)
inline void write_s(const string& a) { //字符串快写
    const char* p = a.r1;
    while (*p) _pc(*p++);
}

inline void reverse(string& s) { //字符串反转
    int len = s.size();
    int half = len >> 1;
    char* start = s.r1;
    char* end = s.r1 + len - 1;
    for (int i = 0; i < half; ++i) {
        swap_c(*start, *end);
        start++;
        end--;
    }
}

ll sum_d(ll a[], int n) { //整数数组求和
    ll sum = 0;
    for (int i = 0; i < n; ++i) sum += a[i];
    return sum;
}

lf sum_f(lf a[], int n) { //浮点数组求和
    lf sum = 0;
    for (int i = 0; i < n; ++i) sum += a[i];
    return sum;
}

inline lf sqrt(lf x) { //浮点开方 (硬件加速)
    return __builtin_sqrt(x);
}

ll pow_d(ll a, ll b) { //整数乘方
    if (b < 0) return 0;
    if (b == 0) return 1;
    ll res = 1;
    while (b > 0) {
        if (b & 1) res *= a;
        a *= a;
        b >>= 1;
    }
    return res;
}

lf pow_f(lf a, ll b) { //浮点乘方 (整数指数)
    if (b == 0) return 1.0;
    if (a == 0.0) return 0.0;
    if (a == 1.0) return 1.0;
    if (a == -1.0) return (b & 1) ? -1.0 : 1.0;

    bool neg = false;
    if (b < 0) {
        neg = true;
        b = -b;
    }

    lf res = 1.0;
    lf base = a;
    while (b > 0) {
        if (b & 1) res *= base;
        base *= base;
        b >>= 1;
    }
    return neg ? 1.0 / res : res;
}

inline ll gcd(ll a, ll b) { //最大公约数
    while (b) {
        a %= b;
        swap_ll(a, b);
    }
    return a;
}

inline ll lcm(ll a, ll b) { //最小公倍数
    if (a == 0 || b == 0) return 0;
    return (a / gcd(a, b)) * b;
}

int main() { //主函数入口
    
    _flush();
}

💡 适用场景与警告
适用：ACM/ICPC、Codeforces、LeetCode 等对时间限制极其严格的算法竞赛。
不适用：
需要严格符合 IEEE 754 标准的科学计算（-Ofast 可能导致精度微小差异）。
生产环境服务器代码（移除了安全检查，若输入恶意超长数据可能导致栈溢出崩溃）。
跨平台兼容性要求极高的项目（依赖 GCC/Clang 特有指令和 x86 架构）。


再次重复：
此代码在ACGO的编译器中毫无用处！！！
编译命令：
g++ -O3 -march=native -ffast-math -o main main.cpp

再打个广告，我们团队现在正在招人，想加入的点我

看完的还请点个赞吧！！！ ACGO_啊啊啊