Linux单实例程序使用文件锁时子进程残留导致锁无法释放的解决方案咨询

阿华AIGC实验室

2026-4-28

我来给你几个靠谱的解决方案，解决子进程残留导致锁文件无法释放的问题——核心根源其实是system()创建的子进程会继承主进程的锁文件描述符，主进程被kill后子进程还攥着这个fd，导致flock锁一直被持有。

1. 用fork()+exec替换system()，主动关闭子进程的锁文件描述符

system()会让子进程继承父进程所有打开的文件描述符，这是问题的关键。换成fork()+exec的方式，就能在子进程里主动关闭锁文件的fd，断绝它和锁的关联：

// 替换原来的system(command)调用
pid_t pid = fork();
if (pid == 0) {
    // 子进程：关闭锁文件描述符，不再持有锁
    close(pidFile);
    // 用execl调用shell解析命令，和system的行为保持一致
    execl("/bin/sh", "sh", "-c", command, (char*)NULL);
    // 只有exec失败才会走到这里
    perror("execl failed");
    exit(EXIT_FAILURE);
} else if (pid > 0) {
    // 父进程：根据业务需求选择是否等待子进程结束
    int status;
    waitpid(pid, &status, 0);
} else {
    perror("fork failed");
}

这样主进程被kill后，锁文件的fd只有主进程持有，进程退出后系统会自动回收fd，flock锁也就随之释放了，不会被子进程卡住。

2. 给锁文件加上PID校验，处理异常退出场景

万一主进程被kill -9强制终止（这种情况信号处理函数也救不了），或者系统出现异常导致锁文件残留，我们可以在锁文件里写入当前进程的PID，启动时验证这个PID对应的进程是否还活着：

#include <unistd.h>
#include <sys/file.h>
#include <sys/wait.h>
#include <cstdio>
#include <iostream>
#include <cstdlib>
#include <errno.h>

using namespace std;

int main() {
    // 初始化内容...

    int pidFile = open("/var/run/my-app.lock", O_CREAT | O_RDWR, 0666);
    if (pidFile == -1) {
        perror("open lock file failed");
        exit(EXIT_FAILURE);
    }

    int rc = flock(pidFile, LOCK_EX | LOCK_NB);
    if (rc != 0) {
        if (errno == EWOULDBLOCK) {
            // 读取锁文件里的旧PID
            char pidBuf[16];
            lseek(pidFile, 0, SEEK_SET);
            ssize_t readLen = read(pidFile, pidBuf, sizeof(pidBuf)-1);
            if (readLen > 0) {
                pidBuf[readLen] = '\0';
                pid_t oldPid = atoi(pidBuf);
                // 用kill(pid,0)检查进程是否存活：不发送信号，仅验证存在性
                if (kill(oldPid, 0) == -1 && errno == ESRCH) {
                    // 旧进程已死，尝试重新获取锁
                    close(pidFile);
                    pidFile = open("/var/run/my-app.lock", O_CREAT | O_RDWR | O_TRUNC, 0666);
                    rc = flock(pidFile, LOCK_EX | LOCK_NB);
                    if (rc == 0) {
                        // 获取锁成功，写入当前PID
                        char currPidBuf[16];
                        snprintf(currPidBuf, sizeof(currPidBuf), "%d\n", getpid());
                        ftruncate(pidFile, 0);
                        write(pidFile, currPidBuf, strlen(currPidBuf));
                        // 继续执行程序逻辑
                    } else {
                        cout << "Program is already running!\n";
                        exit(EXIT_SUCCESS);
                    }
                } else {
                    cout << "Program is already running!\n";
                    exit(EXIT_SUCCESS);
                }
            } else {
                cout << "Program is already running!\n";
                exit(EXIT_SUCCESS);
            }
        } else {
            perror("flock failed");
            exit(EXIT_FAILURE);
        }
    } else {
        // 首次获取锁成功，写入当前PID
        char currPidBuf[16];
        snprintf(currPidBuf, sizeof(currPidBuf), "%d\n", getpid());
        ftruncate(pidFile, 0);
        write(pidFile, currPidBuf, strlen(currPidBuf));
    }

    // 执行其他操作或进入主循环...
    // 这里用fork+exec替换原来的system调用
    const char* command = "your_command_here";
    pid_t pid = fork();
    if (pid == 0) {
        close(pidFile);
        execl("/bin/sh", "sh", "-c", command, (char*)NULL);
        perror("execl failed");
        exit(EXIT_FAILURE);
    } else if (pid > 0) {
        int status;
        waitpid(pid, &status, 0);
    } else {
        perror("fork failed");
    }

    exit(EXIT_SUCCESS);
}

这种方式能处理旧进程异常退出、锁文件残留的情况，避免程序因此无法启动。

3. 捕获终止信号做清理（针对正常终止场景）

如果主进程是被SIGTERM、SIGINT等常规信号终止，可以注册信号处理函数，在退出前主动关闭锁文件（注意：kill -9无法被捕获，所以这只是辅助手段）：

#include <signal.h>

int g_pidFile = -1;

void signalHandler(int sig) {
    if (g_pidFile != -1) {
        close(g_pidFile);
    }
    exit(EXIT_SUCCESS);
}

int main() {
    // 注册信号处理
    signal(SIGTERM, signalHandler);
    signal(SIGINT, signalHandler);

    // 初始化锁文件
    g_pidFile = open("/var/run/my-app.lock", O_CREAT | O_RDWR, 0666);
    // 后续逻辑...
}

内容的提问来源于stack exchange，提问作者DEKKER