我写了第二个测试用例。我不得不添加一个单独的答案,因为它太长了,无法放入包含示例输出的第一个答案。
首先,这里是
tracer.c:
#include <unistd.h>#include <stdlib.h>#include <sys/types.h>#include <sys/ptrace.h>#include <sys/prctl.h>#include <sys/wait.h>#include <sys/user.h>#include <dirent.h>#include <string.h>#include <signal.h>#include <errno.h>#include <stdio.h>#ifndef SINGLESTEPS#define SINGLESTEPS 10#endifsize_t get_tids(pid_t **const listptr, size_t *const sizeptr, const pid_t pid){ char dirname[64]; DIR *dir; pid_t *list; size_t size, used = 0; if (!listptr || !sizeptr || pid < (pid_t)1) { errno = EINVAL; return (size_t)0; } if (*sizeptr > 0) { list = *listptr; size = *sizeptr; } else { list = *listptr = NULL; size = *sizeptr = 0; } if (snprintf(dirname, sizeof dirname, "/proc/%d/task/", (int)pid) >= (int)sizeof dirname) { errno = ENOTSUP; return (size_t)0; } dir = opendir(dirname); if (!dir) { errno = ESRCH; return (size_t)0; } while (1) { struct dirent *ent; int value; chardummy; errno = 0; ent = readdir(dir); if (!ent) break; if (sscanf(ent->d_name, "%d%c", &value, &dummy) != 1) continue; if (value < 1) continue; if (used >= size) { size = (used | 127) + 128; list = realloc(list, size * sizeof list[0]); if (!list) { closedir(dir); errno = ENOMEM; return (size_t)0; } *listptr = list; *sizeptr = size; } list[used++] = (pid_t)value; } if (errno) { const int saved_errno = errno; closedir(dir); errno = saved_errno; return (size_t)0; } if (closedir(dir)) { errno = EIO; return (size_t)0; } if (used < 1) { errno = ESRCH; return (size_t)0; } if (used >= size) { size = used + 1; list = realloc(list, size * sizeof list[0]); if (!list) { errno = ENOMEM; return (size_t)0; } *listptr = list; *sizeptr = size; } list[used] = (pid_t)0; errno = 0; return used;}static int wait_process(const pid_t pid, int *const statusptr){ int status; pid_t p; do { status = 0; p = waitpid(pid, &status, WUNTRACED | WCONTINUED); } while (p == (pid_t)-1 && errno == EINTR); if (p != pid) return errno = ESRCH; if (statusptr) *statusptr = status; return errno = 0;}static int continue_process(const pid_t pid, int *const statusptr){ int status; pid_t p; do { if (kill(pid, SIGCONT) == -1) return errno = ESRCH; do { status = 0; p = waitpid(pid, &status, WUNTRACED | WCONTINUED); } while (p == (pid_t)-1 && errno == EINTR); if (p != pid) return errno = ESRCH; } while (WIFSTOPPED(status)); if (statusptr) *statusptr = status; return errno = 0;}void show_registers(FILE *const out, pid_t tid, const char *const note){ struct user_regs_struct regs; long r; do { r = ptrace(PTRACE_GETREGS, tid, ®s, ®s); } while (r == -1L && errno == ESRCH); if (r == -1L) return;#if (defined(__x86_64__) || defined(__i386__)) && __WORDSIZE == 64 if (note && *note) fprintf(out, "Task %d: RIP=0x%016lx, RSP=0x%016lx. %sn", (int)tid, regs.rip, regs.rsp, note); else fprintf(out, "Task %d: RIP=0x%016lx, RSP=0x%016lx.n", (int)tid, regs.rip, regs.rsp);#elif (defined(__x86_64__) || defined(__i386__)) && __WORDSIZE == 32 if (note && *note) fprintf(out, "Task %d: EIP=0x%08lx, ESP=0x%08lx. %sn", (int)tid, regs.eip, regs.esp, note); else fprintf(out, "Task %d: EIP=0x%08lx, ESP=0x%08lx.n", (int)tid, regs.eip, regs.esp);#endif}int main(int argc, char *argv[]){ pid_t *tid = 0; size_t tids = 0; size_t tids_max = 0; size_t t, s; long r; pid_t child; int status; if (argc < 2 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) { fprintf(stderr, "n"); fprintf(stderr, "Usage: %s [ -h | --help ]n", argv[0]); fprintf(stderr, " %s COMMAND [ ARGS ... ]n", argv[0]); fprintf(stderr, "n"); fprintf(stderr, "This program executes COMMAND in a child process,n"); fprintf(stderr, "and waits for it to stop (via a SIGSTOP signal).n"); fprintf(stderr, "When that occurs, the register state of each threadn"); fprintf(stderr, "is dumped to standard output, then the child processn"); fprintf(stderr, "is sent a SIGCONT signal.n"); fprintf(stderr, "n"); return 1; } child = fork(); if (child == (pid_t)-1) { fprintf(stderr, "fork() failed: %s.n", strerror(errno)); return 1; } if (!child) { prctl(PR_SET_DUMPABLE, (long)1); prctl(PR_SET_PTRACER, (long)getppid()); fflush(stdout); fflush(stderr); execvp(argv[1], argv + 1); fprintf(stderr, "%s: %s.n", argv[1], strerror(errno)); return 127; } fprintf(stderr, "Tracer: Waiting for child (pid %d) events.nn", (int)child); fflush(stderr); while (1) { if (wait_process(child, &status)) break; if (WIFEXITED(status) || WIFSIGNALED(status)) { errno = 0; break; } if (!WIFSTOPPED(status)) continue; tids = get_tids(&tid, &tids_max, child); if (!tids) break; printf("Process %d has %d tasks,", (int)child, (int)tids); fflush(stdout); for (t = 0; t < tids; t++) { do { r = ptrace(PTRACE_ATTACH, tid[t], (void *)0, (void *)0); } while (r == -1L && (errno == EBUSY || errno == EFAULT || errno == ESRCH)); if (r == -1L) { const int saved_errno = errno; while (t-->0) do { r = ptrace(PTRACE_DETACH, tid[t], (void *)0, (void *)0); } while (r == -1L && (errno == EBUSY || errno == EFAULT || errno == ESRCH)); tids = 0; errno = saved_errno; break; } } if (!tids) { const int saved_errno = errno; if (continue_process(child, &status)) break; printf(" failed to attach (%s).n", strerror(saved_errno)); fflush(stdout); if (WIFConTINUED(status)) continue; errno = 0; break; } printf(" attached to all.nn"); fflush(stdout); for (t = 0; t < tids; t++) show_registers(stdout, tid[t], ""); printf("n"); fflush(stdout); for (s = 0; s < SINGLESTEPS; s++) { do { r = ptrace(PTRACE_SINGLESTEP, tid[tids-1], (void *)0, (void *)0); } while (r == -1L && errno == ESRCH); if (!r) { for (t = 0; t < tids - 1; t++) show_registers(stdout, tid[t], ""); show_registers(stdout, tid[tids-1], "Advanced by one step."); printf("n"); fflush(stdout); } else { fprintf(stderr, "Single-step failed: %s.n", strerror(errno)); fflush(stderr); } } for (t = 0; t < tids; t++) do { r = ptrace(PTRACE_DETACH, tid[t], (void *)0, (void *)0); } while (r == -1 && (errno == EBUSY || errno == EFAULT || errno == ESRCH)); tids = 0; if (continue_process(child, &status)) break; if (WIFConTINUED(status)) { printf("Detached. Waiting for new stop events.nn"); fflush(stdout); continue; } errno = 0; break; } if (errno) fprintf(stderr, "Tracer: Child lost (%s)n", strerror(errno)); else if (WIFEXITED(status)) fprintf(stderr, "Tracer: Child exited (%d)n", WEXITSTATUS(status)); else if (WIFSIGNALED(status)) fprintf(stderr, "Tracer: Child died from signal %dn", WTERMSIG(status)); else fprintf(stderr, "Tracer: Child vanishedn"); fflush(stderr); return status;}
tracer.c执行指定的命令,等待命令接收
SIGSTOP信号。(
tracer.c本身不发送;您可以让跟踪停止自身,或从外部发送信号。)
命令停止后,
tracer.c将ptrace附加到每个线程,并以固定数量的步长(
SINGLESTEPS编译时常数)单步执行其中一个线程,从而显示每个线程的相关寄存器状态。
之后,它将与命令分离,并向其发送
SIGCONT信号以使其继续正常运行。
这是一个简单的测试程序
worker.c,我用于测试:
#include <pthread.h>#include <signal.h>#include <string.h>#include <errno.h>#include <stdio.h>#ifndef THREADS#define THREADS 2#endifvolatile sig_atomic_t done = 0;void catch_done(int signum){ done = signum;}int install_done(const int signum){ struct sigaction act; sigemptyset(&act.sa_mask); act.sa_handler = catch_done; act.sa_flags = 0; if (sigaction(signum, &act, NULL)) return errno; else return 0;}void *worker(void *data){ volatile unsigned long *const counter = data; while (!done) __sync_add_and_fetch(counter, 1UL); return (void *)(unsigned long)__sync_or_and_fetch(counter, 0UL);}int main(void){ unsigned long counter = 0UL; pthread_t thread[THREADS]; pthread_attr_t attrs; size_t i; if (install_done(SIGHUP) || install_done(SIGTERM) || install_done(SIGUSR1)) { fprintf(stderr, "Worker: Cannot install signal handlers: %s.n", strerror(errno)); return 1; } pthread_attr_init(&attrs); pthread_attr_setstacksize(&attrs, 65536); for (i = 0; i < THREADS; i++) if (pthread_create(&thread[i], &attrs, worker, &counter)) { done = 1; fprintf(stderr, "Worker: Cannot create thread: %s.n", strerror(errno)); return 1; } pthread_attr_destroy(&attrs); worker(&counter); for (i = 0; i < THREADS; i++) pthread_join(thread[i], NULL); return 0;}
使用eg编译两者
gcc -W -Wall -O3 -fomit-frame-pointer worker.c -pthread -o workergcc -W -Wall -O3 -fomit-frame-pointer tracer.c -o tracer
并在单独的终端或后台运行,例如
./tracer ./worker &
跟踪器显示工作者的PID:
Tracer: Waiting for child (pid 24275) events.
此时,孩子正在正常运行。当您发送
SIGSTOP给孩子时,动作开始。跟踪器检测到它,进行所需的跟踪,然后分离并让孩子正常继续:
kill -STOP 24275Process 24275 has 3 tasks, attached to all.Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.Task 24277: RIP=0x0000000000400a5d, RSP=0x00007f399cfa6ee8.Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.Task 24277: RIP=0x0000000000400a5d, RSP=0x00007f399cfa6ee8. Advanced by one step.Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.Task 24277: RIP=0x0000000000400a63, RSP=0x00007f399cfa6ee8. Advanced by one step.Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.Task 24277: RIP=0x0000000000400a65, RSP=0x00007f399cfa6ee8. Advanced by one step.Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.Task 24277: RIP=0x0000000000400a58, RSP=0x00007f399cfa6ee8. Advanced by one step.Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.Task 24277: RIP=0x0000000000400a5d, RSP=0x00007f399cfa6ee8. Advanced by one step.Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.Task 24277: RIP=0x0000000000400a63, RSP=0x00007f399cfa6ee8. Advanced by one step.Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.Task 24277: RIP=0x0000000000400a65, RSP=0x00007f399cfa6ee8. Advanced by one step.Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.Task 24277: RIP=0x0000000000400a58, RSP=0x00007f399cfa6ee8. Advanced by one step.Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.Task 24277: RIP=0x0000000000400a5d, RSP=0x00007f399cfa6ee8. Advanced by one step.Task 24275: RIP=0x0000000000400a5d, RSP=0x00007fff6895c428.Task 24276: RIP=0x0000000000400a5d, RSP=0x00007f399cfb7ee8.Task 24277: RIP=0x0000000000400a63, RSP=0x00007f399cfa6ee8. Advanced by one step.Detached. Waiting for new stop events.
您可以根据需要多次重复上述 *** 作。请注意,我选择了该
SIGSTOP信号作为触发器,因为这种方式
tracer.c还可以用作为每个请求生成复杂的多线程核心转储的基础(因为多线程进程可以通过向自身发送a来简单地触发它
SIGSTOP)。
worker()在上面的示例中,线程的反汇编功能都在旋转:
0x400a50: eb 0b jmp 0x400a5d0x400a52: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)0x400a58: f0 48 83 07 01 lock addq 尚x1,(%rdi) = fourth step0x400a5d: 8b 05 00 00 00 00 mov 0x0(%rip),%eax = first step0x400a63: 85 c0 test %eax,%eax = second step0x400a65: 74 f1 je0x400a58 = third step0x400a67: 48 8b 07 mov (%rdi),%rax0x400a6a: 48 89 c2 mov %rax,%rdx0x400a6d: f0 48 0f b1 07 lock cmpxchg %rax,(%rdi)0x400a72: 75 f6 jne 0x400a6a0x400a74: 48 89 d0 mov %rdx,%rax0x400a77: c3 retq
现在,此测试程序仅显示了如何停止进程,将其附加到其所有线程,单步执行所需数量的指令,然后让所有线程正常继续;
PTRACE_CONT
不能证明同样适用于让特定线程正常继续(通过
PTRACE_CONT)。但是,我在下面描述的细节向我表明,相同的方法应适用于
long r;do { r = ptrace(PTRACE_cmd, tid, ...);} while (r == -1L && (errno == EBUSY || errno == EFAULT || errno == ESRCH));。
我在编写上述测试程序时遇到的主要问题或惊奇是:
ESRCH
循环,特别是对于这种
可能添加毫秒级的nanosleep或类似 *** 作以避免浪费CPU-确保在尝试提供新的ptrace命令之前,该命令已经完成(因此任务已停止)。情况(由于ptrace手册页描述而仅添加了其他情况)。
您会看到,大多数ptrace命令仅在任务停止时才被允许。但是,当任务仍在完成时,例如单步命令,它不会停止。因此,使用上述循环-D’oh!
Kerrek SB,我相信至少您在测试程序中遇到的一些麻烦是由于此问题引起的吗?对我而言,这真是一种
意识到这一点当然是必要的,因为追踪本质上是异步的,而不是同步的。
SIGCONT
(这是异步也为事业
PTRACE_CONT-
include/linux/sched.h。相互作用我上面提到的我不使用上面的循环,这种互动已经不再是一个问题,妥善处理相信-
实际上是可以理解的)
在此答案的注释中添加:
Linux内核在task_struct结构中使用一组任务状态标志(请参阅参考资料
ptrace()以了解定义)来跟踪每个任务的状态。的面向用户空间的一面在
kernel/ptrace.c中定义
PTRACE_SINGLESTEP。
当
PTRACE_CONT或者
kernel/ptrace.c被调用时,
ptrace_continue():
wake_up_state(child,__TASK_TRACED)处理大部分的细节。通过调用
kernel/sched/core.c::try_to_wake_up(child, __TASK_TRACED,0)(
SIGSTOP)完成。
当通过“已停止,未跟踪”信号停止一个进程时,所有任务将被停止,并最终处于
kernel/ptrace.c状态。
附加到每一个任务(通过PTRACE_ATTACH或PTRACE_SEIZE,请参阅
ptrace_attach():
include/linux/ptrace.h:PT_)修改任务状态。然而,ptrace的状态位(参见
include/linux/sched.h:TASK_常数)是从任务可运行状态位(参见单独
SIGCONT常数)。
附加到任务并向进程发送
kernel/sched/core.c::try_to_wake_up(child,__TASK_TRACED,0)信号后,停止状态不会立即被修改(我相信),因为也正在跟踪任务。执行PTRACE_SINGLESTEP或PTRACE_CONT最终以结束
wait(),这将更新任务状态,并将任务移至运行队列。
现在,我还没有找到代码路径的复杂部分是下次计划任务时如何在内核中更新任务状态。我的测试表明,通过单步执行(这是另一个任务状态标志),只有任务状态被更新,并且清除了单步标志。看来PTRACE_CONT不那么可靠;我相信这是因为单步标记“强制”了任务状态的改变。也许有一个“竞赛条件”。继续传递信号和改变状态?
(进一步编辑:内核开发人员肯定希望
/proc/PID/stat调用它,例如参见此线程。)
In other words, after noticing that the process has stopped (note that you can
use
/proc/PID/statusor
pid_t pid, p; tid_t *tid; size_t tids; long result;int status;size_t i;for (i = 0; i < tids; i++) { while (1) { result = ptrace(PTRACE_ATTACH, tid[i], (void *)0, (void *)0); if (result == -1L && (errno == ESRCH || errno == EBUSY || errno == EFAULT || errno == EIO)) { sched_yield(); continue; } break; }if (result == -1L) { }}if (kill(pid, SIGCONT)) { }while (1) { errno = 0; status = 0; p = waitpid(pid, &status, WCONTINUED); if (p == (pid_t)-1) { if (errno == EINTR) continue; else break; } else if (p != pid) { errno = ESRCH; break; } else if (WIFConTINUED(status)) { errno = 0; break; }}if (errno) { }for (i = 0; i < tids; i++) { while (1) { result = ptrace(PTRACE_SINGLESTEP, tid[i], (void *)0, (void *)0); if (result == -1L && errno == ESRCH) { sched_yield(); continue; } break; }if (result == -1L) { }}for (i = 0; i < tids; i++) { struct user_regs_struct regs; while (1) { result = ptrace(PTRACE_GETREGS, tid[i], ®s, ®s); if (result == -1L && (errno == ESRCH || errno == EBUSY || errno == EFAULT || errno == EIO)) { sched_yield(); continue; } break; }if (result == -1L) { }}if the process is not a child, and
not yet attached to), I believe the following procedure is the most robust
one:
the above procedure will still work robustly
After the above, all tasks should be attached and in the expected state, so
that e.g. PTRACE_CONT works without further tricks.
If the behaviour changes in future kernels – I do believe the interaction
between the STOP/CONT signals and ptracing is something that might change; at
least a question to the LKML developers about this behaviour would be
warranted! –,
ptrace(). (Erring on the
side of caution, by using a loop to PTRACE_SINGLESTEP a few times, might also
be a good idea.)
The difference to PTRACE_CONT is that if the behaviour changes in the future,
the initial PTRACE_CONT might actually continue the process, causing the
will that follow it to fail. With PTRACE_SINGLESTEP, the process
ptrace()
stop, allowing further calls to succeed.
Questions?
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)