Error[8]: Undefined offset: 126, File: /www/wwwroot/outofmemory.cn/tmp/plugin_ss_superseo_model_superseo.php, Line: 114
File: /www/wwwroot/outofmemory.cn/tmp/plugin_ss_superseo_model_superseo.php, Line: 473, decode(

文章托管在gitee上 Android Notes , 同步csdn

Trace生成流程

从第2篇可知,Watchdog的Trace生成过程如下:

下面看判断条件

} else if (waitState == WAITED_HALF) {
    if (!waitedHalf) {
        Slog.i(TAG, "WAITED_HALF");
        waitedHalf = true;  // 设置标志,防止反复进入
        // We've waited half, but we'd need to do the stack trace dump w/o the lock.
        pids = new ArrayList<>(mInterestingJavaPids); // 需要dump的进程
        doWaitedHalfDump = true; // 设置标志,会生成trace
    } else {
        continue;
    }
} else {  // OVERDUE, 会dump 第二次
    // something is overdue!
    blockedCheckers = getBlockedCheckersLocked();
    subject = describeCheckersLocked(blockedCheckers);
    allowRestart = mAllowRestart;
    pids = new ArrayList<>(mInterestingJavaPids);
}
输出第一次Trace
[+++]
添加 dump Java进程

pids 表示需要dump的Java进程信息,通过mInterestingJavaPids所得,默认添加系统进程pid到mInterestingJavaPids,会输出system_server进程的trace。在一些特殊进程启动、退出时,会从mInterestingJavaPids添加、移除pid

[+++]
添加 dump native进程

getInterestingNativePids() 参数获取要打印的native进程

static ArrayList getInterestingNativePids() {
    HashSet pids = new HashSet<>();
    addInterestingAidlPids(pids); // 添加 AIDL_INTERFACE_PREFIXES_OF_INTEREST 中的进程
    addInterestingHidlPids(pids); // 添加 HAL_INTERFACES_OF_INTEREST 中的进程

    // 添加NATIVE_STACKS_OF_INTEREST中的进程
    int[] nativePids = Process.getPidsForCommands(NATIVE_STACKS_OF_INTEREST);
    if (nativePids != null) {
        for (int i : nativePids) {
            pids.add(i);
        }
    }

    return new ArrayList(pids);
}
addInterestingAidlPids
private static void addInterestingAidlPids(HashSet pids) {
    ServiceDebugInfo[] infos = ServiceManager.getServiceDebugInfo();
    if (infos == null) return;

    for (ServiceDebugInfo info : infos) { // 添加 aidl 进程
        for (String prefix : AIDL_INTERFACE_PREFIXES_OF_INTEREST) {
            if (info.name.startsWith(prefix)) {
                pids.add(info.debugPid);
            }
        }
    }
}
addInterestingHidlPids
private static void addInterestingHidlPids(HashSet pids) {
    try {
        IServiceManager serviceManager = IServiceManager.getService();
        ArrayList dump =
                serviceManager.debugDump();
        for (IServiceManager.InstanceDebugInfo info : dump) {
            if (info.pid == IServiceManager.PidConstant.NO_PID) {
                continue;
            }
            // 添加 hidl 进程
            if (!HAL_INTERFACES_OF_INTEREST.contains(info.interfaceName)) {
                continue;
            }

            pids.add(info.pid);
        }
    } catch (RemoteException e) {
        Log.w(TAG, e);
    }
}
输出第二次Trace
long anrTime = SystemClock.uptimeMillis();
StringBuilder report = new StringBuilder();
report.append(MemoryPressureUtil.currentPsiState());
ProcessCpuTracker processCpuTracker = new ProcessCpuTracker(false);
StringWriter tracesFileException = new StringWriter();
final File stack = ActivityManagerService.dumpStackTraces(
        pids, processCpuTracker, new SparseArray<>(), getInterestingNativePids(),
        tracesFileException, subject);

subject 是卡顿原因的主题,通过 Watchdog.HandlerChecker#describeBlockedStateLocked 获取

[+++]
dump kernel log
[+++]
输出Watchdog到dropbox
[+++]
合并Trace

这部分将会合并两次的Trace。通过流将两个trace文件写入到新文件,生成一个新文件。这部分本地还没有最新代码,等更新了S最新代码,再上代码。

ActivityManagerService.dumpStackTraces 实现

从上面的分析来看,trace生成的关键实现是AMS#dumpStackTraces。这部分调用比较长,实际上是system_server通过socket连接debuggerd模块,让后者dump相关进程的信息,然后将trace信息写入指定的文件。

/**
 * If a stack trace dump file is configured, dump process stack traces.
 * @param firstPids of dalvik VM processes to dump stack traces for first
 * @param lastPids of dalvik VM processes to dump stack traces for last
 * @param nativePids optional list of native pids to dump stack crawls
 * @param logExceptionCreatingFile optional writer to which we log errors creating the file
 * @param subject optional line related to the error
 */
public static File dumpStackTraces(ArrayList firstPids,
        ProcessCpuTracker processCpuTracker, SparseArray lastPids,
        ArrayList nativePids, StringWriter logExceptionCreatingFile,
        String subject) {
    return dumpStackTraces(firstPids, processCpuTracker, lastPids, nativePids,
            logExceptionCreatingFile, null, subject);
}

调用重载方法

/**
 * @param firstPidOffsets Optional, when it's set, it receives the start/end offset
 *                        of the very first pid to be dumped.
 */
/* package */ static File dumpStackTraces(ArrayList firstPids,
        ProcessCpuTracker processCpuTracker, SparseArray lastPids,
        ArrayList nativePids, StringWriter logExceptionCreatingFile,
        long[] firstPidOffsets, String subject) {
    ArrayList extraPids = null;

    Slog.i(TAG, "dumpStackTraces pids=" + lastPids + " nativepids=" + nativePids);

    // Measure CPU usage as soon as we're called in order to get a realistic sampling
    // of the top users at the time of the request.
    ...

    final File tracesDir = new File(ANR_TRACE_DIR); // ANR_TRACE_DIR = "/data/anr";
    // Each set of ANR traces is written to a separate file and dumpstate will process
    // all such files and add them to a captured bug report if they're recent enough.
    maybePruneOldTraces(tracesDir); // 删除过旧的trace文件

    // NOTE: We should consider creating the file in native code atomically once we've
    // gotten rid of the old scheme of dumping and lot of the code that deals with paths
    // can be removed.
    File tracesFile;
    try { // 创建trace文件 anr_yyyy-MM-dd-HH-mm-ss-SSS
        tracesFile = createAnrDumpFile(tracesDir);
    } catch (IOException e) {
        Slog.w(TAG, "Exception creating ANR dump file:", e);
        if (logExceptionCreatingFile != null) {
            logExceptionCreatingFile.append("----- Exception creating ANR dump file -----\n");
            e.printStackTrace(new PrintWriter(logExceptionCreatingFile));
        }
        return null;
    }
    // 标题,Trace文件第一行,Subject: Blocked in monitor *** on xxx thread
    if (subject != null) {
        try (FileOutputStream fos = new FileOutputStream(tracesFile, true)) {
            String header = "Subject: " + subject + "\n";
            fos.write(header.getBytes(StandardCharsets.UTF_8));
        } catch (IOException e) {
            Slog.w(TAG, "Exception writing subject to ANR dump file:", e);
        }
    }
    // 开始执行dump
    Pair offsets = dumpStackTraces(
            tracesFile.getAbsolutePath(), firstPids, nativePids, extraPids);
    if (firstPidOffsets != null) {
        if (offsets == null) {
            firstPidOffsets[0] = firstPidOffsets[1] = -1;
        } else {
            firstPidOffsets[0] = offsets.first; // Start offset to the ANR trace file
            firstPidOffsets[1] = offsets.second; // End offset to the ANR trace file
        }
    }
    return tracesFile;
}
dumpStackTraces
/**
 * @return The start/end offset of the trace of the very first PID
 */
public static Pair dumpStackTraces(String tracesFile, ArrayList firstPids,
        ArrayList nativePids, ArrayList extraPids) {

    Slog.i(TAG, "Dumping to " + tracesFile);

    // We don't need any sort of inotify based monitoring when we're dumping traces via
    // tombstoned. Data is piped to an "intercept" FD installed in tombstoned so we're in full
    // control of all writes to the file in question.

    // We must complete all stack dumps within 20 seconds.
    long remainingTime = 20 * 1000;

    // As applications are usually interested with the ANR stack traces, but we can't share with
    // them the stack traces other than their own stacks. So after the very first PID is
    // dumped, remember the current file size.
    long firstPidStart = -1;
    long firstPidEnd = -1;

    // First collect all of the stacks of the most important pids.
    if (firstPids != null) {
        int num = firstPids.size();
        for (int i = 0; i < num; i++) {
            final int pid = firstPids.get(i);
            // We don't copy ANR traces from the system_server intentionally.
            final boolean firstPid = i == 0 && MY_PID != pid;
            File tf = null;
            if (firstPid) {
                tf = new File(tracesFile);
                firstPidStart = tf.exists() ? tf.length() : 0;
            }

            Slog.i(TAG, "Collecting stacks for pid " + pid);
            // dump Java 进程信息
            final long timeTaken = dumpJavaTracesTombstoned(pid, tracesFile,
                                                            remainingTime);

            remainingTime -= timeTaken;
            if (remainingTime <= 0) {
                Slog.e(TAG, "Aborting stack trace dump (current firstPid=" + pid
                        + "); deadline exceeded.");
                return firstPidStart >= 0 ? new Pair<>(firstPidStart, firstPidEnd) : null;
            }

            if (firstPid) {
                firstPidEnd = tf.length();
            }
            if (DEBUG_ANR) {
                Slog.d(TAG, "Done with pid " + firstPids.get(i) + " in " + timeTaken + "ms");
            }
        }
    }

    // Next collect the stacks of the native pids
    if (nativePids != null) {
        for (int pid : nativePids) {
            Slog.i(TAG, "Collecting stacks for native pid " + pid);
            final long nativeDumpTimeoutMs = Math.min(NATIVE_DUMP_TIMEOUT_MS, remainingTime);

            final long start = SystemClock.elapsedRealtime();
            // dump native 进程
            Debug.dumpNativeBacktraceToFileTimeout(
                    pid, tracesFile, (int) (nativeDumpTimeoutMs / 1000));
            final long timeTaken = SystemClock.elapsedRealtime() - start;

            remainingTime -= timeTaken;
            if (remainingTime <= 0) {
                Slog.e(TAG, "Aborting stack trace dump (current native pid=" + pid +
                    "); deadline exceeded.");
                return firstPidStart >= 0 ? new Pair<>(firstPidStart, firstPidEnd) : null;
            }

            if (DEBUG_ANR) {
                Slog.d(TAG, "Done with native pid " + pid + " in " + timeTaken + "ms");
            }
        }
    }

    // Lastly, dump stacks for all extra PIDs from the CPU tracker.
    if (extraPids != null) {
        for (int pid : extraPids) {
            Slog.i(TAG, "Collecting stacks for extra pid " + pid);

            final long timeTaken = dumpJavaTracesTombstoned(pid, tracesFile, remainingTime);

            remainingTime -= timeTaken;
            if (remainingTime <= 0) {
                Slog.e(TAG, "Aborting stack trace dump (current extra pid=" + pid +
                        "); deadline exceeded.");
                return firstPidStart >= 0 ? new Pair<>(firstPidStart, firstPidEnd) : null;
            }

            if (DEBUG_ANR) {
                Slog.d(TAG, "Done with extra pid " + pid + " in " + timeTaken + "ms");
            }
        }
    }
    Slog.i(TAG, "Done dumping");
    return firstPidStart >= 0 ? new Pair<>(firstPidStart, firstPidEnd) : null;
}

下面以 dumpJavaTracesTombstoned 为例。

dumpJavaTracesTombstoned
[+++]

接下来通过 Debug实现连接 debuggerd 来完成dump

Debug#dumpJavaBacktraceToFileTimeout

Debug的dumpJavaBacktraceToFileTimeout方法是一个native方法

[+++]

该native的实现在 android_os_Debug.cpp, 对应的jni函数是 android_os_Debug_dumpJavaBacktraceToFileTimeout

[+++]
android_os_Debug_dumpJavaBacktraceToFileTimeout
[+++]

此jni实现直接调用了 dumpTraces

[+++]
dump_backtrace_to_file_timeout
/// @system/core/debuggerd/client/debuggerd_client.cpp
int dump_backtrace_to_file_timeout(pid_t tid, DebuggerdDumpType dump_type, int timeout_secs,
                                   int fd) {
  android::base::unique_fd copy(dup(fd));
  if (copy == -1) {
    return -1;
  }

  // debuggerd_trigger_dump results in every thread in the process being interrupted
  // by a signal, so we need to fetch the wchan data before calling that.
  std::string wchan_data = get_wchan_data(tid);

  int timeout_ms = timeout_secs > 0 ? timeout_secs * 1000 : 0;
  // 开始连接 debuggerd的 tombstoned
  int ret = debuggerd_trigger_dump(tid, dump_type, timeout_ms, std::move(copy)) ? 0 : -1;

  // Dump wchan data, since only privileged processes (CAP_SYS_ADMIN) can read
  // kernel stack traces (/proc/*/stack).
  dump_wchan_data(wchan_data, fd, tid);

  return ret;
}
debuggerd_trigger_dump
/// @system/core/debuggerd/client/debuggerd_client.cpp
bool debuggerd_trigger_dump(pid_t tid, DebuggerdDumpType dump_type, unsigned int timeout_ms,
                            unique_fd output_fd) {
  pid_t pid = tid;
  if (dump_type == kDebuggerdJavaBacktrace) {
    // Java dumps always get sent to the tgid, so we need to resolve our tid to a tgid.
    android::procinfo::ProcessInfo procinfo;
    std::string error;
    if (!android::procinfo::GetProcessInfo(tid, &procinfo, &error)) {
      LOG(ERROR) << "libdebugged_client: failed to get process info: " << error;
      return false;
    }
    pid = procinfo.pid;
  }

  LOG(INFO) << "libdebuggerd_client: started dumping process " << pid;
  unique_fd sockfd;
  const auto end = std::chrono::steady_clock::now() + std::chrono::milliseconds(timeout_ms);
  auto time_left = [&end]() { return end - std::chrono::steady_clock::now(); };
  auto set_timeout = [timeout_ms, &time_left](int sockfd) { // 定义设置 timeout 函数
    if (timeout_ms <= 0) {
      return sockfd;
    }

    auto remaining = time_left();
    if (remaining < decltype(remaining)::zero()) {
      LOG(ERROR) << "libdebuggerd_client: timeout expired";
      return -1;
    }

    struct timeval timeout;
    populate_timeval(&timeout, remaining);

    if (setsockopt(sockfd, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof(timeout)) != 0) {
      PLOG(ERROR) << "libdebuggerd_client: failed to set receive timeout";
      return -1;
    }
    if (setsockopt(sockfd, SOL_SOCKET, SO_SNDTIMEO, &timeout, sizeof(timeout)) != 0) {
      PLOG(ERROR) << "libdebuggerd_client: failed to set send timeout";
      return -1;
    }

    return sockfd;
  };

  sockfd.reset(socket(AF_LOCAL, SOCK_SEQPACKET, 0)); // 创建 socket
  if (sockfd == -1) {
    PLOG(ERROR) << "libdebugger_client: failed to create socket";
    return false;
  }
  // 连接 socket tombstoned_intercept
  if (socket_local_client_connect(set_timeout(sockfd.get()), kTombstonedInterceptSocketName,
                                  ANDROID_SOCKET_NAMESPACE_RESERVED, SOCK_SEQPACKET) == -1) {
    PLOG(ERROR) << "libdebuggerd_client: failed to connect to tombstoned";
    return false;
  }

  InterceptRequest req = {
      .dump_type = dump_type,
      .pid = pid,
  };
  if (!set_timeout(sockfd)) {
    PLOG(ERROR) << "libdebugger_client: failed to set timeout";
    return false;
  }

  // Create an intermediate pipe to pass to the other end.
  unique_fd pipe_read, pipe_write;
  if (!Pipe(&pipe_read, &pipe_write)) { // 创建 pipe
    PLOG(ERROR) << "libdebuggerd_client: failed to create pipe";
    return false;
  }

  std::string pipe_size_str;
  int pipe_buffer_size = 1024 * 1024;
  // 读取 pipe max size
  if (android::base::ReadFileToString("/proc/sys/fs/pipe-max-size", &pipe_size_str)) {
    pipe_size_str = android::base::Trim(pipe_size_str);

    if (!android::base::ParseInt(pipe_size_str.c_str(), &pipe_buffer_size, 0)) {
      LOG(FATAL) << "failed to parse pipe max size '" << pipe_size_str << "'";
    }
  }

  if (fcntl(pipe_read.get(), F_SETPIPE_SZ, pipe_buffer_size) != pipe_buffer_size) {
    PLOG(ERROR) << "failed to set pipe buffer size";
  }
  // 发送pipe的write fd传给对端
  ssize_t rc = SendFileDescriptors(set_timeout(sockfd), &req, sizeof(req), pipe_write.get());
  pipe_write.reset();
  if (rc != sizeof(req)) {
    PLOG(ERROR) << "libdebuggerd_client: failed to send output fd to tombstoned";
    return false;
  }

  // Check to make sure we've successfully registered.
  InterceptResponse response; // 接受服务端的响应
  rc = TEMP_FAILURE_RETRY(recv(set_timeout(sockfd.get()), &response, sizeof(response), MSG_TRUNC));
  if (rc == 0) {
    LOG(ERROR) << "libdebuggerd_client: failed to read initial response from tombstoned: "
               << "timeout reached?";
    return false;
  } else if (rc == -1) {
    PLOG(ERROR) << "libdebuggerd_client: failed to read initial response from tombstoned";
    return false;
  } else if (rc != sizeof(response)) {
    LOG(ERROR) << "libdebuggerd_client: received packet of unexpected length from tombstoned while "
                  "reading initial response: expected "
               << sizeof(response) << ", received " << rc;
    return false;
  }

  if (response.status != InterceptStatus::kRegistered) {
    LOG(ERROR) << "libdebuggerd_client: unexpected registration response: "
               << static_cast(response.status);
    return false;
  }

  if (!send_signal(tid, dump_type)) {
    return false;
  }
  // 读取服务端响应
  rc = TEMP_FAILURE_RETRY(recv(set_timeout(sockfd.get()), &response, sizeof(response), MSG_TRUNC));
  if (rc == 0) {
    LOG(ERROR) << "libdebuggerd_client: failed to read status response from tombstoned: "
                  "timeout reached?";
    return false;
  } else if (rc == -1) {
    PLOG(ERROR) << "libdebuggerd_client: failed to read status response from tombstoned";
    return false;
  } else if (rc != sizeof(response)) {
    LOG(ERROR) << "libdebuggerd_client: received packet of unexpected length from tombstoned while "
                  "reading confirmation response: expected "
               << sizeof(response) << ", received " << rc;
    return false;
  }

  if (response.status != InterceptStatus::kStarted) {
    response.error_message[sizeof(response.error_message) - 1] = '<= 0) {
      remaining_ms = -1;
    } else if (remaining_ms < 0) {
      LOG(ERROR) << "libdebuggerd_client: timeout expired";
      return false;
    }

    struct pollfd pfd = {
        .fd = pipe_read.get(), .events = POLLIN, .revents = 0,
    };

    rc = poll(&pfd, 1, remaining_ms); // 通过 poll 监听 read fd
    if (rc == -1) {
      if (errno == EINTR) {
        continue;
      } else {
        PLOG(ERROR) << "libdebuggerd_client: error while polling";
        return false;
      }
    } else if (rc == 0) {
      LOG(ERROR) << "libdebuggerd_client: timeout expired";
      return false;
    }

    char buf[1024];
    rc = TEMP_FAILURE_RETRY(read(pipe_read.get(), buf, sizeof(buf))); // 读取数据。
    if (rc == 0) {
      // Done.
      break;
    } else if (rc == -1) {
      PLOG(ERROR) << "libdebuggerd_client: error while reading";
      return false;
    }

    if (!android::base::WriteFully(output_fd.get(), buf, rc)) { // 写数据
      PLOG(ERROR) << "libdebuggerd_client: error while writing";
      return false;
    }
  }

  LOG(INFO) << "libdebuggerd_client: done dumping process " << pid;

  return true;
}
';
    LOG(ERROR) << "libdebuggerd_client: tombstoned reported failure: " << response.error_message;
    return false;
  }

  // Forward output from the pipe to the output fd.
  while (true) { // 读取服务端写过来的数据,然后输出到dump文件
    auto remaining_ms = std::chrono::duration_cast(time_left()).count();
    if (timeout_ms 
<===>)
File: /www/wwwroot/outofmemory.cn/tmp/route_read.php, Line: 126, InsideLink()
File: /www/wwwroot/outofmemory.cn/tmp/index.inc.php, Line: 165, include(/www/wwwroot/outofmemory.cn/tmp/route_read.php)
File: /www/wwwroot/outofmemory.cn/index.php, Line: 30, include(/www/wwwroot/outofmemory.cn/tmp/index.inc.php)
Android 12 Watchdog_java_内存溢出

Android 12 Watchdog

Android 12 Watchdog,第1张

文章托管在gitee上 Android Notes , 同步csdn

Trace生成流程

从第2篇可知,Watchdog的Trace生成过程如下:

  • 当等待时间 >Max/2 , 即评估状态为 WAITED_HALF,则会输出第一次Trace
  • 当等待时间 >Max, 即评估状态为 OVERDUE,则会输出第二次Trace,以及一些其他的信息(kernel log,binder相关信息,dropbox等)
  • 最终,会将两次生成的Trace合二为一,生成一个最终的Trace。

下面看判断条件

} else if (waitState == WAITED_HALF) {
    if (!waitedHalf) {
        Slog.i(TAG, "WAITED_HALF");
        waitedHalf = true;  // 设置标志,防止反复进入
        // We've waited half, but we'd need to do the stack trace dump w/o the lock.
        pids = new ArrayList<>(mInterestingJavaPids); // 需要dump的进程
        doWaitedHalfDump = true; // 设置标志,会生成trace
    } else {
        continue;
    }
} else {  // OVERDUE, 会dump 第二次
    // something is overdue!
    blockedCheckers = getBlockedCheckersLocked();
    subject = describeCheckersLocked(blockedCheckers);
    allowRestart = mAllowRestart;
    pids = new ArrayList<>(mInterestingJavaPids);
}
输出第一次Trace
if (doWaitedHalfDump) {
    // We've waited half the deadlock-detection interval.  Pull a stack
    // trace and wait another half.
    ActivityManagerService.dumpStackTraces(pids, null, null,
            getInterestingNativePids(), null, subject);
    continue;
}
添加 dump Java进程

pids 表示需要dump的Java进程信息,通过mInterestingJavaPids所得,默认添加系统进程pid到mInterestingJavaPids,会输出system_server进程的trace。在一些特殊进程启动、退出时,会从mInterestingJavaPids添加、移除pid

/**
 * Notifies the watchdog when a Java process with {@code pid} is started.
 * This process may have its stack trace dumped during an ANR.
 */
public void processStarted(String processName, int pid) { // app 启动后回调
    if (isInterestingJavaProcess(processName)) {
        Slog.i(TAG, "Interesting Java process " + processName + " started. Pid " + pid);
        synchronized (mLock) {
            mInterestingJavaPids.add(pid);
        }
    }
}

// 兴趣进程判断
private static boolean isInterestingJavaProcess(String processName) {
    return processName.equals(StorageManagerService.sMediaStoreAuthorityProcessName)
            || processName.equals("com.android.phone");
}


/**
 * Notifies the watchdog when a Java process with {@code pid} dies.
 */
public void processDied(String processName, int pid) { // app 退出时回调
    if (isInterestingJavaProcess(processName)) {
        Slog.i(TAG, "Interesting Java process " + processName + " died. Pid " + pid);
        synchronized (mLock) {
            mInterestingJavaPids.remove(Integer.valueOf(pid));
        }
    }
}
添加 dump native进程

getInterestingNativePids() 参数获取要打印的native进程

static ArrayList getInterestingNativePids() {
    HashSet pids = new HashSet<>();
    addInterestingAidlPids(pids); // 添加 AIDL_INTERFACE_PREFIXES_OF_INTEREST 中的进程
    addInterestingHidlPids(pids); // 添加 HAL_INTERFACES_OF_INTEREST 中的进程

    // 添加NATIVE_STACKS_OF_INTEREST中的进程
    int[] nativePids = Process.getPidsForCommands(NATIVE_STACKS_OF_INTEREST);
    if (nativePids != null) {
        for (int i : nativePids) {
            pids.add(i);
        }
    }

    return new ArrayList(pids);
}
addInterestingAidlPids
private static void addInterestingAidlPids(HashSet pids) {
    ServiceDebugInfo[] infos = ServiceManager.getServiceDebugInfo();
    if (infos == null) return;

    for (ServiceDebugInfo info : infos) { // 添加 aidl 进程
        for (String prefix : AIDL_INTERFACE_PREFIXES_OF_INTEREST) {
            if (info.name.startsWith(prefix)) {
                pids.add(info.debugPid);
            }
        }
    }
}
addInterestingHidlPids
private static void addInterestingHidlPids(HashSet pids) {
    try {
        IServiceManager serviceManager = IServiceManager.getService();
        ArrayList dump =
                serviceManager.debugDump();
        for (IServiceManager.InstanceDebugInfo info : dump) {
            if (info.pid == IServiceManager.PidConstant.NO_PID) {
                continue;
            }
            // 添加 hidl 进程
            if (!HAL_INTERFACES_OF_INTEREST.contains(info.interfaceName)) {
                continue;
            }

            pids.add(info.pid);
        }
    } catch (RemoteException e) {
        Log.w(TAG, e);
    }
}
输出第二次Trace
long anrTime = SystemClock.uptimeMillis();
StringBuilder report = new StringBuilder();
report.append(MemoryPressureUtil.currentPsiState());
ProcessCpuTracker processCpuTracker = new ProcessCpuTracker(false);
StringWriter tracesFileException = new StringWriter();
final File stack = ActivityManagerService.dumpStackTraces(
        pids, processCpuTracker, new SparseArray<>(), getInterestingNativePids(),
        tracesFileException, subject);

subject 是卡顿原因的主题,通过 Watchdog.HandlerChecker#describeBlockedStateLocked 获取

String describeBlockedStateLocked() {
    if (mCurrentMonitor == null) {
        return "Blocked in handler on " + mName + " (" + getThread().getName() + ")";
    } else {
        return "Blocked in monitor " + mCurrentMonitor.getClass().getName()
                + " on " + mName + " (" + getThread().getName() + ")";
    }
}
dump kernel log
// Trigger the kernel to dump all blocked threads, and backtraces on all CPUs to the kernel log
doSysRq('w');
doSysRq('l');

private void doSysRq(char c) {
    try { // 往 sysrq-trigger 写命令
        FileWriter sysrq_trigger = new FileWriter("/proc/sysrq-trigger");
        sysrq_trigger.write(c);
        sysrq_trigger.close();
    } catch (IOException e) {
        Slog.w(TAG, "Failed to write to /proc/sysrq-trigger", e);
    }
}

输出Watchdog到dropbox
// Try to add the error to the dropbox, but assuming that the ActivityManager
// itself may be deadlocked.  (which has happened, causing this statement to
// deadlock and the watchdog as a whole to be ineffective)
Thread dropboxThread = new Thread("watchdogWriteToDropbox") {
        public void run() {
            // If a watched thread hangs before init() is called, we don't have a
            // valid mActivity. So we can't log the error to dropbox.
            if (mActivity != null) {
                mActivity.addErrorToDropBox( // 实际通过DropBoxManager向DropBoxManagerService发起调用
                        "watchdog", null, "system_server", null, null, null,
                        null, report.toString(), stack, null, null, null,
                        errorId);
            }
        }
    };
dropboxThread.start();
try {
    dropboxThread.join(2000);  // wait up to 2 seconds for it to return.
} catch (InterruptedException ignored) {}
合并Trace

这部分将会合并两次的Trace。通过流将两个trace文件写入到新文件,生成一个新文件。这部分本地还没有最新代码,等更新了S最新代码,再上代码。

ActivityManagerService.dumpStackTraces 实现

从上面的分析来看,trace生成的关键实现是AMS#dumpStackTraces。这部分调用比较长,实际上是system_server通过socket连接debuggerd模块,让后者dump相关进程的信息,然后将trace信息写入指定的文件。

/**
 * If a stack trace dump file is configured, dump process stack traces.
 * @param firstPids of dalvik VM processes to dump stack traces for first
 * @param lastPids of dalvik VM processes to dump stack traces for last
 * @param nativePids optional list of native pids to dump stack crawls
 * @param logExceptionCreatingFile optional writer to which we log errors creating the file
 * @param subject optional line related to the error
 */
public static File dumpStackTraces(ArrayList firstPids,
        ProcessCpuTracker processCpuTracker, SparseArray lastPids,
        ArrayList nativePids, StringWriter logExceptionCreatingFile,
        String subject) {
    return dumpStackTraces(firstPids, processCpuTracker, lastPids, nativePids,
            logExceptionCreatingFile, null, subject);
}

调用重载方法

/**
 * @param firstPidOffsets Optional, when it's set, it receives the start/end offset
 *                        of the very first pid to be dumped.
 */
/* package */ static File dumpStackTraces(ArrayList firstPids,
        ProcessCpuTracker processCpuTracker, SparseArray lastPids,
        ArrayList nativePids, StringWriter logExceptionCreatingFile,
        long[] firstPidOffsets, String subject) {
    ArrayList extraPids = null;

    Slog.i(TAG, "dumpStackTraces pids=" + lastPids + " nativepids=" + nativePids);

    // Measure CPU usage as soon as we're called in order to get a realistic sampling
    // of the top users at the time of the request.
    ...

    final File tracesDir = new File(ANR_TRACE_DIR); // ANR_TRACE_DIR = "/data/anr";
    // Each set of ANR traces is written to a separate file and dumpstate will process
    // all such files and add them to a captured bug report if they're recent enough.
    maybePruneOldTraces(tracesDir); // 删除过旧的trace文件

    // NOTE: We should consider creating the file in native code atomically once we've
    // gotten rid of the old scheme of dumping and lot of the code that deals with paths
    // can be removed.
    File tracesFile;
    try { // 创建trace文件 anr_yyyy-MM-dd-HH-mm-ss-SSS
        tracesFile = createAnrDumpFile(tracesDir);
    } catch (IOException e) {
        Slog.w(TAG, "Exception creating ANR dump file:", e);
        if (logExceptionCreatingFile != null) {
            logExceptionCreatingFile.append("----- Exception creating ANR dump file -----\n");
            e.printStackTrace(new PrintWriter(logExceptionCreatingFile));
        }
        return null;
    }
    // 标题,Trace文件第一行,Subject: Blocked in monitor *** on xxx thread
    if (subject != null) {
        try (FileOutputStream fos = new FileOutputStream(tracesFile, true)) {
            String header = "Subject: " + subject + "\n";
            fos.write(header.getBytes(StandardCharsets.UTF_8));
        } catch (IOException e) {
            Slog.w(TAG, "Exception writing subject to ANR dump file:", e);
        }
    }
    // 开始执行dump
    Pair offsets = dumpStackTraces(
            tracesFile.getAbsolutePath(), firstPids, nativePids, extraPids);
    if (firstPidOffsets != null) {
        if (offsets == null) {
            firstPidOffsets[0] = firstPidOffsets[1] = -1;
        } else {
            firstPidOffsets[0] = offsets.first; // Start offset to the ANR trace file
            firstPidOffsets[1] = offsets.second; // End offset to the ANR trace file
        }
    }
    return tracesFile;
}
dumpStackTraces
  • 执行 dump Java 进程trace, 调用 dumpJavaTracesTombstoned
  • 执行 dump native 进程trace, 调用Debug.dumpNativeBacktraceToFileTimeout
  • 执行 extra 进程trace, 调用dumpJavaTracesTombstoned
/**
 * @return The start/end offset of the trace of the very first PID
 */
public static Pair dumpStackTraces(String tracesFile, ArrayList firstPids,
        ArrayList nativePids, ArrayList extraPids) {

    Slog.i(TAG, "Dumping to " + tracesFile);

    // We don't need any sort of inotify based monitoring when we're dumping traces via
    // tombstoned. Data is piped to an "intercept" FD installed in tombstoned so we're in full
    // control of all writes to the file in question.

    // We must complete all stack dumps within 20 seconds.
    long remainingTime = 20 * 1000;

    // As applications are usually interested with the ANR stack traces, but we can't share with
    // them the stack traces other than their own stacks. So after the very first PID is
    // dumped, remember the current file size.
    long firstPidStart = -1;
    long firstPidEnd = -1;

    // First collect all of the stacks of the most important pids.
    if (firstPids != null) {
        int num = firstPids.size();
        for (int i = 0; i < num; i++) {
            final int pid = firstPids.get(i);
            // We don't copy ANR traces from the system_server intentionally.
            final boolean firstPid = i == 0 && MY_PID != pid;
            File tf = null;
            if (firstPid) {
                tf = new File(tracesFile);
                firstPidStart = tf.exists() ? tf.length() : 0;
            }

            Slog.i(TAG, "Collecting stacks for pid " + pid);
            // dump Java 进程信息
            final long timeTaken = dumpJavaTracesTombstoned(pid, tracesFile,
                                                            remainingTime);

            remainingTime -= timeTaken;
            if (remainingTime <= 0) {
                Slog.e(TAG, "Aborting stack trace dump (current firstPid=" + pid
                        + "); deadline exceeded.");
                return firstPidStart >= 0 ? new Pair<>(firstPidStart, firstPidEnd) : null;
            }

            if (firstPid) {
                firstPidEnd = tf.length();
            }
            if (DEBUG_ANR) {
                Slog.d(TAG, "Done with pid " + firstPids.get(i) + " in " + timeTaken + "ms");
            }
        }
    }

    // Next collect the stacks of the native pids
    if (nativePids != null) {
        for (int pid : nativePids) {
            Slog.i(TAG, "Collecting stacks for native pid " + pid);
            final long nativeDumpTimeoutMs = Math.min(NATIVE_DUMP_TIMEOUT_MS, remainingTime);

            final long start = SystemClock.elapsedRealtime();
            // dump native 进程
            Debug.dumpNativeBacktraceToFileTimeout(
                    pid, tracesFile, (int) (nativeDumpTimeoutMs / 1000));
            final long timeTaken = SystemClock.elapsedRealtime() - start;

            remainingTime -= timeTaken;
            if (remainingTime <= 0) {
                Slog.e(TAG, "Aborting stack trace dump (current native pid=" + pid +
                    "); deadline exceeded.");
                return firstPidStart >= 0 ? new Pair<>(firstPidStart, firstPidEnd) : null;
            }

            if (DEBUG_ANR) {
                Slog.d(TAG, "Done with native pid " + pid + " in " + timeTaken + "ms");
            }
        }
    }

    // Lastly, dump stacks for all extra PIDs from the CPU tracker.
    if (extraPids != null) {
        for (int pid : extraPids) {
            Slog.i(TAG, "Collecting stacks for extra pid " + pid);

            final long timeTaken = dumpJavaTracesTombstoned(pid, tracesFile, remainingTime);

            remainingTime -= timeTaken;
            if (remainingTime <= 0) {
                Slog.e(TAG, "Aborting stack trace dump (current extra pid=" + pid +
                        "); deadline exceeded.");
                return firstPidStart >= 0 ? new Pair<>(firstPidStart, firstPidEnd) : null;
            }

            if (DEBUG_ANR) {
                Slog.d(TAG, "Done with extra pid " + pid + " in " + timeTaken + "ms");
            }
        }
    }
    Slog.i(TAG, "Done dumping");
    return firstPidStart >= 0 ? new Pair<>(firstPidStart, firstPidEnd) : null;
}

下面以 dumpJavaTracesTombstoned 为例。

dumpJavaTracesTombstoned
/**
 * Dump java traces for process {@code pid} to the specified file. If java trace dumping
 * fails, a native backtrace is attempted. Note that the timeout {@code timeoutMs} only applies
 * to the java section of the trace, a further {@code NATIVE_DUMP_TIMEOUT_MS} might be spent
 * attempting to obtain native traces in the case of a failure. Returns the total time spent
 * capturing traces.
 */
private static long dumpJavaTracesTombstoned(int pid, String fileName, long timeoutMs) {
    final long timeStart = SystemClock.elapsedRealtime();
    // 通过 Debug 实现
    boolean javaSuccess = Debug.dumpJavaBacktraceToFileTimeout(pid, fileName,
            (int) (timeoutMs / 1000));
    if (javaSuccess) {
        // Check that something is in the file, actually. Try-catch should not be necessary,
        // but better safe than sorry.
        try {
            long size = new File(fileName).length();
            if (size < JAVA_DUMP_MINIMUM_SIZE) { // 空文件
                Slog.w(TAG, "Successfully created Java ANR file is empty!");
                javaSuccess = false;
            }
        } catch (Exception e) {
            Slog.w(TAG, "Unable to get ANR file size", e);
            javaSuccess = false;
        }
    }
    if (!javaSuccess) {
        Slog.w(TAG, "Dumping Java threads failed, initiating native stack dump.");
        if (!Debug.dumpNativeBacktraceToFileTimeout(pid, fileName,
                (NATIVE_DUMP_TIMEOUT_MS / 1000))) { // 尝试dump native进程
            Slog.w(TAG, "Native stack dump failed!");
        }
    }

    return SystemClock.elapsedRealtime() - timeStart;
}

接下来通过 Debug实现连接 debuggerd 来完成dump

Debug#dumpJavaBacktraceToFileTimeout

Debug的dumpJavaBacktraceToFileTimeout方法是一个native方法

/// @frameworks/base/core/java/android/os/Debug.java
/**
 * Append the Java stack traces of a given native process to a specified file.
 *
 * @param pid pid to dump.
 * @param file path of file to append dump to.
 * @param timeoutSecs time to wait in seconds, or 0 to wait forever.
 * @hide
 */
public static native boolean dumpJavaBacktraceToFileTimeout(int pid, String file,

该native的实现在 android_os_Debug.cpp, 对应的jni函数是 android_os_Debug_dumpJavaBacktraceToFileTimeout

/// @frameworks/base/core/jni/android_os_Debug.cpp
{ "dumpJavaBacktraceToFileTimeout", "(ILjava/lang/String;I)Z",
        (void*)android_os_Debug_dumpJavaBacktraceToFileTimeout },
android_os_Debug_dumpJavaBacktraceToFileTimeout
static jboolean android_os_Debug_dumpJavaBacktraceToFileTimeout(JNIEnv* env, jobject clazz,
        jint pid, jstring fileName, jint timeoutSecs) {
    const bool ret = dumpTraces(env, pid, fileName, timeoutSecs, kDebuggerdJavaBacktrace);
    return ret ? JNI_TRUE : JNI_FALSE;
}

此jni实现直接调用了 dumpTraces

static bool dumpTraces(JNIEnv* env, jint pid, jstring fileName, jint timeoutSecs,
                       DebuggerdDumpType dumpType) {
    const ScopedUtfChars fileNameChars(env, fileName);
    if (fileNameChars.c_str() == nullptr) {
        return false;
    }
    // 打开trace文件
    android::base::unique_fd fd(open(fileNameChars.c_str(),
                                     O_CREAT | O_WRONLY | O_NOFOLLOW | O_CLOEXEC | O_APPEND,
                                     0666));
    if (fd < 0) {
        PLOG(ERROR) << "Can't open " << fileNameChars.c_str();
        return false;
    }
    // libdebuggerd_client 连接 tombstoned 来执行dump,此函数在 debuggerd/client.h 定义
    int res = dump_backtrace_to_file_timeout(pid, dumpType, timeoutSecs, fd);
    if (fdatasync(fd.get()) != 0) {
        PLOG(ERROR) << "Failed flushing trace.";
    }
    return res == 0;
}
dump_backtrace_to_file_timeout
/// @system/core/debuggerd/client/debuggerd_client.cpp
int dump_backtrace_to_file_timeout(pid_t tid, DebuggerdDumpType dump_type, int timeout_secs,
                                   int fd) {
  android::base::unique_fd copy(dup(fd));
  if (copy == -1) {
    return -1;
  }

  // debuggerd_trigger_dump results in every thread in the process being interrupted
  // by a signal, so we need to fetch the wchan data before calling that.
  std::string wchan_data = get_wchan_data(tid);

  int timeout_ms = timeout_secs > 0 ? timeout_secs * 1000 : 0;
  // 开始连接 debuggerd的 tombstoned
  int ret = debuggerd_trigger_dump(tid, dump_type, timeout_ms, std::move(copy)) ? 0 : -1;

  // Dump wchan data, since only privileged processes (CAP_SYS_ADMIN) can read
  // kernel stack traces (/proc/*/stack).
  dump_wchan_data(wchan_data, fd, tid);

  return ret;
}
debuggerd_trigger_dump
  • 通过 socket 连接 tombstoned, 与其进行交互
  • 创建pipe,将write端传给服务端,本地端通过poll监听read fd
  • 接受来自对端的trace数据,并将之写入trace文件
/// @system/core/debuggerd/client/debuggerd_client.cpp
bool debuggerd_trigger_dump(pid_t tid, DebuggerdDumpType dump_type, unsigned int timeout_ms,
                            unique_fd output_fd) {
  pid_t pid = tid;
  if (dump_type == kDebuggerdJavaBacktrace) {
    // Java dumps always get sent to the tgid, so we need to resolve our tid to a tgid.
    android::procinfo::ProcessInfo procinfo;
    std::string error;
    if (!android::procinfo::GetProcessInfo(tid, &procinfo, &error)) {
      LOG(ERROR) << "libdebugged_client: failed to get process info: " << error;
      return false;
    }
    pid = procinfo.pid;
  }

  LOG(INFO) << "libdebuggerd_client: started dumping process " << pid;
  unique_fd sockfd;
  const auto end = std::chrono::steady_clock::now() + std::chrono::milliseconds(timeout_ms);
  auto time_left = [&end]() { return end - std::chrono::steady_clock::now(); };
  auto set_timeout = [timeout_ms, &time_left](int sockfd) { // 定义设置 timeout 函数
    if (timeout_ms <= 0) {
      return sockfd;
    }

    auto remaining = time_left();
    if (remaining < decltype(remaining)::zero()) {
      LOG(ERROR) << "libdebuggerd_client: timeout expired";
      return -1;
    }

    struct timeval timeout;
    populate_timeval(&timeout, remaining);

    if (setsockopt(sockfd, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof(timeout)) != 0) {
      PLOG(ERROR) << "libdebuggerd_client: failed to set receive timeout";
      return -1;
    }
    if (setsockopt(sockfd, SOL_SOCKET, SO_SNDTIMEO, &timeout, sizeof(timeout)) != 0) {
      PLOG(ERROR) << "libdebuggerd_client: failed to set send timeout";
      return -1;
    }

    return sockfd;
  };

  sockfd.reset(socket(AF_LOCAL, SOCK_SEQPACKET, 0)); // 创建 socket
  if (sockfd == -1) {
    PLOG(ERROR) << "libdebugger_client: failed to create socket";
    return false;
  }
  // 连接 socket tombstoned_intercept
  if (socket_local_client_connect(set_timeout(sockfd.get()), kTombstonedInterceptSocketName,
                                  ANDROID_SOCKET_NAMESPACE_RESERVED, SOCK_SEQPACKET) == -1) {
    PLOG(ERROR) << "libdebuggerd_client: failed to connect to tombstoned";
    return false;
  }

  InterceptRequest req = {
      .dump_type = dump_type,
      .pid = pid,
  };
  if (!set_timeout(sockfd)) {
    PLOG(ERROR) << "libdebugger_client: failed to set timeout";
    return false;
  }

  // Create an intermediate pipe to pass to the other end.
  unique_fd pipe_read, pipe_write;
  if (!Pipe(&pipe_read, &pipe_write)) { // 创建 pipe
    PLOG(ERROR) << "libdebuggerd_client: failed to create pipe";
    return false;
  }

  std::string pipe_size_str;
  int pipe_buffer_size = 1024 * 1024;
  // 读取 pipe max size
  if (android::base::ReadFileToString("/proc/sys/fs/pipe-max-size", &pipe_size_str)) {
    pipe_size_str = android::base::Trim(pipe_size_str);

    if (!android::base::ParseInt(pipe_size_str.c_str(), &pipe_buffer_size, 0)) {
      LOG(FATAL) << "failed to parse pipe max size '" << pipe_size_str << "'";
    }
  }

  if (fcntl(pipe_read.get(), F_SETPIPE_SZ, pipe_buffer_size) != pipe_buffer_size) {
    PLOG(ERROR) << "failed to set pipe buffer size";
  }
  // 发送pipe的write fd传给对端
  ssize_t rc = SendFileDescriptors(set_timeout(sockfd), &req, sizeof(req), pipe_write.get());
  pipe_write.reset();
  if (rc != sizeof(req)) {
    PLOG(ERROR) << "libdebuggerd_client: failed to send output fd to tombstoned";
    return false;
  }

  // Check to make sure we've successfully registered.
  InterceptResponse response; // 接受服务端的响应
  rc = TEMP_FAILURE_RETRY(recv(set_timeout(sockfd.get()), &response, sizeof(response), MSG_TRUNC));
  if (rc == 0) {
    LOG(ERROR) << "libdebuggerd_client: failed to read initial response from tombstoned: "
               << "timeout reached?";
    return false;
  } else if (rc == -1) {
    PLOG(ERROR) << "libdebuggerd_client: failed to read initial response from tombstoned";
    return false;
  } else if (rc != sizeof(response)) {
    LOG(ERROR) << "libdebuggerd_client: received packet of unexpected length from tombstoned while "
                  "reading initial response: expected "
               << sizeof(response) << ", received " << rc;
    return false;
  }

  if (response.status != InterceptStatus::kRegistered) {
    LOG(ERROR) << "libdebuggerd_client: unexpected registration response: "
               << static_cast(response.status);
    return false;
  }

  if (!send_signal(tid, dump_type)) {
    return false;
  }
  // 读取服务端响应
  rc = TEMP_FAILURE_RETRY(recv(set_timeout(sockfd.get()), &response, sizeof(response), MSG_TRUNC));
  if (rc == 0) {
    LOG(ERROR) << "libdebuggerd_client: failed to read status response from tombstoned: "
                  "timeout reached?";
    return false;
  } else if (rc == -1) {
    PLOG(ERROR) << "libdebuggerd_client: failed to read status response from tombstoned";
    return false;
  } else if (rc != sizeof(response)) {
    LOG(ERROR) << "libdebuggerd_client: received packet of unexpected length from tombstoned while "
                  "reading confirmation response: expected "
               << sizeof(response) << ", received " << rc;
    return false;
  }

  if (response.status != InterceptStatus::kStarted) {
    response.error_message[sizeof(response.error_message) - 1] = '<= 0) {
      remaining_ms = -1;
    } else if (remaining_ms < 0) {
      LOG(ERROR) << "libdebuggerd_client: timeout expired";
      return false;
    }

    struct pollfd pfd = {
        .fd = pipe_read.get(), .events = POLLIN, .revents = 0,
    };

    rc = poll(&pfd, 1, remaining_ms); // 通过 poll 监听 read fd
    if (rc == -1) {
      if (errno == EINTR) {
        continue;
      } else {
        PLOG(ERROR) << "libdebuggerd_client: error while polling";
        return false;
      }
    } else if (rc == 0) {
      LOG(ERROR) << "libdebuggerd_client: timeout expired";
      return false;
    }

    char buf[1024];
    rc = TEMP_FAILURE_RETRY(read(pipe_read.get(), buf, sizeof(buf))); // 读取数据。
    if (rc == 0) {
      // Done.
      break;
    } else if (rc == -1) {
      PLOG(ERROR) << "libdebuggerd_client: error while reading";
      return false;
    }

    if (!android::base::WriteFully(output_fd.get(), buf, rc)) { // 写数据
      PLOG(ERROR) << "libdebuggerd_client: error while writing";
      return false;
    }
  }

  LOG(INFO) << "libdebuggerd_client: done dumping process " << pid;

  return true;
}
';
    LOG(ERROR) << "libdebuggerd_client: tombstoned reported failure: " << response.error_message;
    return false;
  }

  // Forward output from the pipe to the output fd.
  while (true) { // 读取服务端写过来的数据,然后输出到dump文件
    auto remaining_ms = std::chrono::duration_cast(time_left()).count();
    if (timeout_ms 

欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/langs/797594.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-05-06
下一篇 2022-05-06

发表评论

登录后才能评论

评论列表(0条)

保存