LINUX源码分析之进程和系统调用-洪萨配资

概述

GLIBC（GNU C Library）是 GNU 项目提供的 C 标准库实现，作为 Linux 系统的核心组件之一。它为应用程序提供基础的系统调用封装、内存管理、字符串处理等接口，是大多数 Linux 发行版的默认 C 库。

本文主要分析Linux下的进程创建流程，内核版本为6.18，glibc版本为2.31。

冷知识：pthread库没有单独的代码仓库，它是glibc的一个子模块。

在Linux内核中，进程和线程共用task_struct数据结构，因此可以说是同一种内核对象的不同状态。

系统调用

Linux的所有进/线程都通过syscall间接创建内核对象，无论使用系统函数（如fork）还是pthread库都是如此。但使用的系统调用编号有可能不同，多数情况下使用__NR_clone，少数情况下使用__NR_fork，vfork使用__NR_vfork。

pthread

下面是nptl\pthread_create.c中的pthread_create：

int __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr, void *(*start_routine) (void *), void *arg) { STACK_VARIABLES; const struct pthread_attr *iattr = (struct pthread_attr *) attr; struct pthread_attr default_attr; bool free_cpuset = false; bool c11 = (attr == ATTR_C11_THREAD); if (iattr == NULL || c11) { lll_lock (__default_pthread_attr_lock, LLL_PRIVATE); default_attr = __default_pthread_attr; size_t cpusetsize = default_attr.cpusetsize; if (cpusetsize > 0) { cpu_set_t *cpuset; if (__glibc_likely (__libc_use_alloca (cpusetsize))) cpuset = __alloca (cpusetsize); else { cpuset = malloc (cpusetsize); if (cpuset == NULL) { lll_unlock (__default_pthread_attr_lock, LLL_PRIVATE); return ENOMEM; } free_cpuset = true; } memcpy (cpuset, default_attr.cpuset, cpusetsize); default_attr.cpuset = cpuset; } lll_unlock (__default_pthread_attr_lock, LLL_PRIVATE); iattr = &default_attr; } struct pthread *pd = NULL; int err = ALLOCATE_STACK (iattr, &pd); int retval = 0; if (__glibc_unlikely (err != 0)) /* Something went wrong. Maybe a parameter of the attributes is invalid or we could not allocate memory. Note we have to translate error codes. */ { retval = err == ENOMEM ? EAGAIN : err; goto out; } /* Initialize the TCB. All initializations with zero should be performed in 'get_cached_stack'. This way we avoid doing this if the stack freshly allocated with 'mmap'. */ #if TLS_TCB_AT_TP /* Reference to the TCB itself. */ pd->header.self = pd; /* Self-reference for TLS. */ pd->header.tcb = pd; #endif /* Store the address of the start routine and the parameter. Since we do not start the function directly the stillborn thread will get the information from its thread descriptor. */ pd->start_routine = start_routine; pd->arg = arg; pd->c11 = c11; /* Copy the thread attribute flags. */ struct pthread *self = THREAD_SELF; pd->flags = ((iattr->flags & ~(ATTR_FLAG_SCHED_SET | ATTR_FLAG_POLICY_SET)) | (self->flags & (ATTR_FLAG_SCHED_SET | ATTR_FLAG_POLICY_SET))); /* Initialize the field for the ID of the thread which is waiting for us. This is a self-reference in case the thread is created detached. */ pd->joinid = iattr->flags & ATTR_FLAG_DETACHSTATE ? pd : NULL; /* The debug events are inherited from the parent. */ pd->eventbuf = self->eventbuf; /* Copy the parent's scheduling parameters. The flags will say what is valid and what is not. */ pd->schedpolicy = self->schedpolicy; pd->schedparam = self->schedparam; /* Copy the stack guard canary. */ #ifdef THREAD_COPY_STACK_GUARD THREAD_COPY_STACK_GUARD (pd); #endif /* Copy the pointer guard value. */ #ifdef THREAD_COPY_POINTER_GUARD THREAD_COPY_POINTER_GUARD (pd); #endif /* Setup tcbhead. */ tls_setup_tcbhead (pd); /* Verify the sysinfo bits were copied in allocate_stack if needed. */ #ifdef NEED_DL_SYSINFO CHECK_THREAD_SYSINFO (pd); #endif /* Inform start_thread (above) about cancellation state that might translate into inherited signal state. */ pd->parent_cancelhandling = THREAD_GETMEM (THREAD_SELF, cancelhandling); /* Determine scheduling parameters for the thread. */ if (__builtin_expect ((iattr->flags & ATTR_FLAG_NOTINHERITSCHED) != 0, 0) && (iattr->flags & (ATTR_FLAG_SCHED_SET | ATTR_FLAG_POLICY_SET)) != 0) { /* Use the scheduling parameters the user provided. */ if (iattr->flags & ATTR_FLAG_POLICY_SET) { pd->schedpolicy = iattr->schedpolicy; pd->flags |= ATTR_FLAG_POLICY_SET; } if (iattr->flags & ATTR_FLAG_SCHED_SET) { /* The values were validated in pthread_attr_setschedparam. */ pd->schedparam = iattr->schedparam; pd->flags |= ATTR_FLAG_SCHED_SET; } if ((pd->flags & (ATTR_FLAG_SCHED_SET | ATTR_FLAG_POLICY_SET)) != (ATTR_FLAG_SCHED_SET | ATTR_FLAG_POLICY_SET)) collect_default_sched (pd); } if (__glibc_unlikely (__nptl_nthreads == 1)) _IO_enable_locks (); /* Pass the descriptor to the caller. */ *newthread = (pthread_t) pd; LIBC_PROBE (pthread_create, 4, newthread, attr, start_routine, arg); /* One more thread. We cannot have the thread do this itself, since it might exist but not have been scheduled yet by the time we've returned and need to check the value to behave correctly. We must do it before creating the thread, in case it does get scheduled first and then might mistakenly think it was the only thread. In the failure case, we momentarily store a false value; this doesn't matter because there is no kosher thing a signal handler interrupting us right here can do that cares whether the thread count is correct. */ atomic_increment (&__nptl_nthreads); /* Our local value of stopped_start and thread_ran can be accessed at any time. The PD->stopped_start may only be accessed if we have ownership of PD (see CONCURRENCY NOTES above). */ bool stopped_start = false; bool thread_ran = false; /* Start the thread. */ if (__glibc_unlikely (report_thread_creation (pd))) { stopped_start = true; /* We always create the thread stopped at startup so we can notify the debugger. */ retval = create_thread (pd, iattr, &stopped_start, STACK_VARIABLES_ARGS, &thread_ran); if (retval == 0) { /* We retain ownership of PD until (a) (see CONCURRENCY NOTES above). */ /* Assert stopped_start is true in both our local copy and the PD copy. */ assert (stopped_start); assert (pd->stopped_start); /* Now fill in the information about the new thread in the newly created thread's data structure. We cannot let the new thread do this since we don't know whether it was already scheduled when we send the event. */ pd->eventbuf.eventnum = TD_CREATE; pd->eventbuf.eventdata = pd; /* Enqueue the descriptor. */ do pd->nextevent = __nptl_last_event; while (atomic_compare_and_exchange_bool_acq (&__nptl_last_event, pd, pd->nextevent) != 0); /* Now call the function which signals the event. See CONCURRENCY NOTES for the nptl_db interface comments. */ __nptl_create_event (); } } else retval = create_thread (pd, iattr, &stopped_start, STACK_VARIABLES_ARGS, &thread_ran); if (__glibc_unlikely (retval != 0)) { if (thread_ran) /* State (c) or (d) and we may not have PD ownership (see CONCURRENCY NOTES above). We can assert that STOPPED_START must have been true because thread creation didn't fail, but thread attribute setting did. */ /* See bug 19511 which explains why doing nothing here is a resource leak for a joinable thread. */ assert (stopped_start); else { /* State (e) and we have ownership of PD (see CONCURRENCY NOTES above). */ /* Oops, we lied for a second. */ atomic_decrement (&__nptl_nthreads); /* Perhaps a thread wants to change the IDs and is waiting for this stillborn thread. */ if (__glibc_unlikely (atomic_exchange_acq (&pd->setxid_futex, 0) == -2)) futex_wake (&pd->setxid_futex, 1, FUTEX_PRIVATE); /* Free the resources. */ __deallocate_stack (pd); } /* We have to translate error codes. */ if (retval == ENOMEM) retval = EAGAIN; } else { /* We don't know if we have PD ownership. Once we check the local stopped_start we'll know if we're in state (a) or (b) (see CONCURRENCY NOTES above). */ if (stopped_start) /* State (a), we own PD. The thread blocked on this lock either because we're doing TD_CREATE event reporting, or for some other reason that create_thread chose. Now let it run free. */ lll_unlock (pd->lock, LLL_PRIVATE); /* We now have for sure more than one thread. The main thread might not yet have the flag set. No need to set the global variable again if this is what we use. */ THREAD_SETMEM (THREAD_SELF, header.multiple_threads, 1); } out: if (__glibc_unlikely (free_cpuset)) free (default_attr.cpuset); return retval; } versioned_symbol (libpthread, __pthread_create_2_1, pthread_create, GLIBC_2_1);

pthread_create这个符号被转换成了__pthread_create_2_1，而实际的创建线程的函数是create_thread。create_thread内部调用syscall，具体行为与操作系统和CPU有关。

create_thread在glibc中有2个定义，nptl\createthread.c中的始终返回错误，在Linux下编译时使用的是sysdeps\unix\sysv\linux\createthread.c：

/* Low-level thread creation for NPTL. Linux version. Copyright (C) 2002-2020 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@redhat.com>, 2002. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ #include <sched.h> #include <setjmp.h> #include <signal.h> #include <stdlib.h> #include <atomic.h> #include <ldsodefs.h> #include <tls.h> #include <stdint.h> #include <arch-fork.h> #ifdef __NR_clone2 # define ARCH_CLONE __clone2 #else # define ARCH_CLONE __clone #endif /* See the comments in pthread_create.c for the requirements for these two macros and the create_thread function. */ #define START_THREAD_DEFN \ static int __attribute__ ((noreturn)) start_thread (void *arg) #define START_THREAD_SELF arg /* pthread_create.c defines this using START_THREAD_DEFN We need a forward declaration here so we can take its address. */ static int start_thread (void *arg) __attribute__ ((noreturn)); static int create_thread (struct pthread *pd, const struct pthread_attr *attr, bool *stopped_start, STACK_VARIABLES_PARMS, bool *thread_ran) { /* Determine whether the newly created threads has to be started stopped since we have to set the scheduling parameters or set the affinity. */ if (attr != NULL && (__glibc_unlikely (attr->cpuset != NULL) || __glibc_unlikely ((attr->flags & ATTR_FLAG_NOTINHERITSCHED) != 0))) *stopped_start = true; pd->stopped_start = *stopped_start; if (__glibc_unlikely (*stopped_start)) /* See CONCURRENCY NOTES in nptl/pthread_creat.c. */ lll_lock (pd->lock, LLL_PRIVATE); /* We rely heavily on various flags the CLONE function understands: CLONE_VM, CLONE_FS, CLONE_FILES These flags select semantics with shared address space and file descriptors according to what POSIX requires. CLONE_SIGHAND, CLONE_THREAD This flag selects the POSIX signal semantics and various other kinds of sharing (itimers, POSIX timers, etc.). CLONE_SETTLS The sixth parameter to CLONE determines the TLS area for the new thread. CLONE_PARENT_SETTID The kernels writes the thread ID of the newly created thread into the location pointed to by the fifth parameters to CLONE. Note that it would be semantically equivalent to use CLONE_CHILD_SETTID but it is be more expensive in the kernel. CLONE_CHILD_CLEARTID The kernels clears the thread ID of a thread that has called sys_exit() in the location pointed to by the seventh parameter to CLONE. The termination signal is chosen to be zero which means no signal is sent. */ const int clone_flags = (CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SYSVSEM | CLONE_SIGHAND | CLONE_THREAD | CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | 0); TLS_DEFINE_INIT_TP (tp, pd); if (__glibc_unlikely (ARCH_CLONE (&start_thread, STACK_VARIABLES_ARGS, clone_flags, pd, &pd->tid, tp, &pd->tid) == -1)) return errno; /* It's started now, so if we fail below, we'll have to cancel it and let it clean itself up. */ *thread_ran = true; /* Now we have the possibility to set scheduling parameters etc. */ if (attr != NULL) { INTERNAL_SYSCALL_DECL (err); int res; /* Set the affinity mask if necessary. */ if (attr->cpuset != NULL) { assert (*stopped_start); res = INTERNAL_SYSCALL (sched_setaffinity, err, 3, pd->tid, attr->cpusetsize, attr->cpuset); if (__glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (res, err))) err_out: { /* The operation failed. We have to kill the thread. We let the normal cancellation mechanism do the work. */ pid_t pid = __getpid (); INTERNAL_SYSCALL_DECL (err2); (void) INTERNAL_SYSCALL_CALL (tgkill, err2, pid, pd->tid, SIGCANCEL); return INTERNAL_SYSCALL_ERRNO (res, err); } } /* Set the scheduling parameters. */ if ((attr->flags & ATTR_FLAG_NOTINHERITSCHED) != 0) { assert (*stopped_start); res = INTERNAL_SYSCALL (sched_setscheduler, err, 3, pd->tid, pd->schedpolicy, &pd->schedparam); if (__glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (res, err))) goto err_out; } } return 0; }

注意事项：同一个系统调用在不同平台下的编号可能不同。例如clone：

fork

tools\include\nolibc\sys.h：

/* * pid_t fork(void); */ #ifndef sys_fork static __attribute__((unused)) pid_t sys_fork(void) { #if defined(__NR_clone) /* note: some archs only have clone() and not fork(). Different archs * have a different API, but most archs have the flags on first arg and * will not use the rest with no other flag. */ return my_syscall5(__NR_clone, SIGCHLD, 0, 0, 0, 0); #else return my_syscall0(__NR_fork); #endif } #endif static __attribute__((unused)) pid_t fork(void) { return __sysret(sys_fork()); }

由于__NR_clone一般都有定义，fork在一般情况下使用__NR_clone而非__NR_fork。