无垠之码

深度剖析代码之道


跨平台崩溃报告收集系统-breakpad

Breakpad是一个由Google开发的开源跨平台崩溃报告收集系统,主要作用是在程序崩溃时生成崩溃转储minidump文件,供开发者进行后续分析和调试。

breakpad

上图摘自官方文档,可以看出在开发者在构建系统中将调试信息从可执行文件中分离,分发给用户运行的是一个包含breakpad客户端的可执行文件,crash收集平台存储原始版本的调试信息及用户自动上传的minidumps文件。

git clone https://chromium.googlesource.com/chromium/tools/depot_tools.git
export PATH=/path/to/depot_tools:$PATH
fetch breakpad && cd src
./configure && make && make check && make install

使用Breakpad捕获崩溃时,有两种常见的集成模式,进程内(崩溃处理逻辑运行在发生崩溃的同一进程中),进程外(崩溃捕获由另一个监控进程完成,发生崩溃的进程将信号或信息传递给守护进程,由后者写minidump)。下面通过两份简短的代码片段介绍,breakpad的使用方法和两种集成模式。

1.In-process崩溃收集


示例函数crash对空地址进行赋值导致进程coredump,在回调函数dump_callback中会自动将minidump文件上传至预设服务器对应的版本目录,供开发者分析崩溃原因

#include "client/linux/handler/exception_handler.h"
#include "common/linux/http_upload.h"

using google_breakpad::ExceptionHandler;
using google_breakpad::HTTPUpload;
using google_breakpad::MinidumpDescriptor;

static bool upload_minidump(const MinidumpDescriptor& descriptor) {
    std::map<string, string> parameters;
    std::map<string, string> files = {{"upload_file_minidump", descriptor.path()}};
    std::string url = "http://192.168.5.170:1127/post";

    parameters["prod"] = "test";
    parameters["ver"] = "0.1.3";

    std::string response, error;
    bool success = HTTPUpload::SendRequest(url, parameters, files, "", "", "", &response, NULL, &error);
    return success;
}

static bool dump_callback(const MinidumpDescriptor& descriptor, void* context, bool succeeded) {
    printf("Dump path: %s\n", descriptor.path());
    if (succeeded) upload_minidump(descriptor);
    return succeeded;
}

void crash() {
  volatile int *a = (int *)(NULL);
  *a = 1;
}

int main(int argc, char *argv[]) {
  MinidumpDescriptor descriptor("/tmp/test");
  ExceptionHandler eh(descriptor, NULL, dump_callback, NULL, true, -1);
  crash();
  return 0;
}

注意: dump_callback的运行上下文在信号处理函数中,可调用函数需满足async-signal-safe要求,printf不属于其中,同时upload_minidump调用网络请求可能不安全,推荐的做法是异步通知,在单独线程或进程中上传.(man 7 signal-safety)

下面命令行展示如何编译breakpad程序,导出符号信息以及如何分析minidump文件将其转为coredump文件

g++ -g main.cpp -o test -I/usr/local/include/breakpad -lbreakpad_client -lpthread -ldl
dump_syms ./test > test.sym
head -n 1 test.sym # MODULE Linux x86_64 73E8BB4A39577BA8E1D961E5BC79884E0 test
mkdir -p ./symbols/test/73E8BB4A39577BA8E1D961E5BC79884E0
mv test.sym ./symbols/test/73E8BB4A39577BA8E1D961E5BC79884E0
./test # 运行测试程序
minidump_stackwalk 536cf784-f4d3-42d1-6d6eee8d-aec67373.dmp ./symbols
minidump-2-core 9c6af92c-29ba-4043-979cee99-de95fce3.dmp -o ./test.core # 将dmp类型转换为coredump类型

2.Out-process崩溃收集


进程外的崩溃收集略微麻烦,在用户进程中创建CreateReportChannel,在coredump时调用filter_callback创建子进程等待子进程启动crashserver处理此次crash事件,子进程执行exec命令替换成minidump执行文件继续执行启动crashserver,父子进程通过pipe通信。

#include <fcntl.h>
#include <linux/limits.h>
#include <poll.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <unistd.h>
#include <iostream>
#include <string>
#include "client/linux/crash_generation/crash_generation_server.h"
#include "client/linux/handler/exception_handler.h"
#include "common/linux/eintr_wrapper.h"
#include "common/linux/linux_libc_support.h"
#include "third_party/lss/linux_syscall_support.h"

using google_breakpad::CrashGenerationServer;
using google_breakpad::ExceptionHandler;
using google_breakpad::MinidumpDescriptor;
using std::string;
using std::to_string;

struct exc_dump_resp {
    int status;
};
struct self_exc_ctx {
    string exc_elf_path;
    int server_fd;
    int client_fd;
};
void crash() {
    volatile int* a = (int*)(NULL);
    *a = 1;
}

/*
 * fork to exec exception dump
 * argv[0]: elf
 * argv[1]: pipe fd
 * argv[2]: server fd
 */
void self_exec_exc_dump(int pipe, struct self_exc_ctx* ctx) {
    char* pipe_string = strdup(to_string(pipe).c_str());
    char* server_string = strdup(to_string(ctx->server_fd).c_str());
    char* exc_elf_path = strdup(ctx->exc_elf_path.c_str());
    char* argv[] = {exc_elf_path, pipe_string, server_string, NULL};
    execv(exc_elf_path, argv);
    // perhaps, never reach
    printf("%s: execv failed: %s \r\n", __FUNCTION__, strerror(errno));
    struct exc_dump_resp exc_dump_resp = {.status = -1};
    write(pipe, &exc_dump_resp, sizeof(exc_dump_resp));
    close(pipe);
    exit(-1);
};

static bool wait_for_exc_dump_startup(int fd) {
    struct pollfd pfd = {0, 0, 0};
    pfd.fd = fd;
    pfd.events = POLLIN | POLLERR;
    bool result = false;
    bool recv_msg = false;
    struct exc_dump_resp exc_dump_resp;
    do {
        switch (poll(&pfd, 1, 5000)) {
            case -1:
                if (errno == EINTR) continue;
                printf("%s: poll failed: %s \r\n", __FUNCTION__, strerror(errno));
                goto done;
            case 0:  // timeout reach
                continue;
            default:
                recv_msg = true;
                break;
        }
    } while (!recv_msg);
    if ((pfd.revents & POLLIN) != POLLIN) goto done;
    read(fd, &exc_dump_resp, sizeof(exc_dump_resp));
    result = exc_dump_resp.status == 0 ? true : false;
done:
    return result;
}

static bool filter_callback(void* context) {
    struct self_exc_ctx* ctx = reinterpret_cast<struct self_exc_ctx*>(context);
    bool result = false;
    int pipe_fd[2];
    pid_t pid;
    int flags = fcntl(ctx->server_fd, F_GETFD, 0) & ~FD_CLOEXEC;
    fcntl(ctx->server_fd, F_SETFD, flags);
    if (pipe(pipe_fd) < 0) {
        printf("%s: create pipe failed: %s \r\n", __FUNCTION__, strerror(errno));
        goto done;
    }
    switch (pid = fork()) {
        case -1:
            printf("%s: fork failed: %s \r\n", __FUNCTION__, strerror(errno));
            goto done;
        case 0:
            self_exec_exc_dump(pipe_fd[1], ctx);
            break;
        default:;
    }
    if (!wait_for_exc_dump_startup(pipe_fd[0])) goto done;
    result = true;
done:
    // close(pipe_fd[0]);
    return result;
}

static string get_self_exc_elf_path() {
    char self_elf_path[PATH_MAX] = {0};
    string self_exc_elf_path;
    if (readlink("/proc/self/exe", self_elf_path, sizeof(self_elf_path) - 1) < 0) {
        printf("%s: readlink failed: %s \r\n", __FUNCTION__, strerror(errno));
        memset(self_elf_path, 0, PATH_MAX);
        goto done;
    }
    self_exc_elf_path = self_elf_path;
    self_exc_elf_path.erase(self_exc_elf_path.rfind('/') + 1);
    self_exc_elf_path += "minidump";
done:
    return self_exc_elf_path;
}

int main(int argc, char* argv[]) {
    struct self_exc_ctx* exc_ctx = new struct self_exc_ctx;
    exc_ctx->exc_elf_path = get_self_exc_elf_path();
    MinidumpDescriptor dump_desp(".");
    if (!CrashGenerationServer::CreateReportChannel(&exc_ctx->server_fd, &exc_ctx->client_fd)) {
        printf("crasher: CreateReportChannel failed! \r\n");
        return 1;
    }
    ExceptionHandler eh(dump_desp, filter_callback, NULL, exc_ctx, true, exc_ctx->client_fd);
    crash();
    delete exc_ctx;
    return 0;
}
#include <pthread.h>
#include <stdint.h>
#include <stdlib.h>
#include <unistd.h>
#include "client/linux/crash_generation/client_info.h"
#include "client/linux/crash_generation/crash_generation_server.h"

using google_breakpad::ClientInfo;
using google_breakpad::CrashGenerationServer;
using std::string;

pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t condition = PTHREAD_COND_INITIALIZER;
struct exc_dump_resp {
    int status;
};

void dump_reqs(void* context, const ClientInfo* client_info, const string* file_path) {
    pid_t pid = client_info->pid();
    const char* dump_path = file_path->c_str();
    printf("%s: dump client %d, %s\r\n", __FUNCTION__, pid, dump_path);
    pthread_mutex_lock(&mutex);
    pthread_cond_signal(&condition);
    pthread_mutex_unlock(&mutex);
};

void exit_callback(void* context, const ClientInfo* client_info) {
    pid_t pid = client_info->pid();
    printf("%s: dump client %d\r\n", __FUNCTION__, pid);
};

int main(int argc, char** argv) {
    const int required_args = 3;
    if (argc < required_args) {
        fprintf(stderr, "usage: handler: <pipe fd> <server fd>\n");
        return -1;
    }
    int pipe = atoi(argv[1]);
    int server_fd = atoi(argv[2]);
    string dump_path = "/tmp";
    pthread_mutex_lock(&mutex);
    CrashGenerationServer crash_server(server_fd, dump_reqs, NULL, exit_callback, NULL, true, &dump_path);
    if (!crash_server.Start()) {
        fprintf(stderr, "fail to start crash server");
        return -1;
    }
    struct exc_dump_resp exc_dump_resp = {.status = 0};
    write(pipe, &exc_dump_resp, sizeof(exc_dump_resp));
    pthread_cond_wait(&condition, &mutex);
    pthread_mutex_unlock(&mutex);
    crash_server.Stop();
    return 0;
}

3.参考文献

  1. https://chromium.googlesource.com/breakpad/breakpad/+/master/docs/getting_started_with_breakpad.md
  2. https://github.com/google/breakpad
  3. https://github.com/electron/mini-breakpad-server
comments powered by Disqus