Skip to content

Commit

Permalink
taskCopyContext should not require holding task.mu.
Browse files Browse the repository at this point in the history
The primary existing user (ptrace) does not do this, and it leads to lock
inversion with MemoryManager.mappingMu.

PiperOrigin-RevId: 724526712
  • Loading branch information
nlacasse authored and gvisor-bot committed Feb 10, 2025
1 parent d6454b4 commit 15d71fb
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 83 deletions.
10 changes: 2 additions & 8 deletions pkg/sentry/kernel/task_usermem.go
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,8 @@ func (cc *taskCopyContext) CopyScratchBuffer(size int) []byte {
}

func (cc *taskCopyContext) getMemoryManager() (*mm.MemoryManager, error) {
cc.t.mu.Lock()
defer cc.t.mu.Unlock()
tmm := cc.t.MemoryManager()
if tmm == nil {
return nil, linuxerr.ESRCH
Expand All @@ -360,10 +362,6 @@ func (cc *taskCopyContext) getMemoryManager() (*mm.MemoryManager, error) {
}

// CopyInBytes implements marshal.CopyContext.CopyInBytes.
//
// Preconditions: Same as usermem.IO.CopyIn, plus:
// - The caller must be running on the task goroutine or hold the cc.t.mu
// - t's AddressSpace must be active.
func (cc *taskCopyContext) CopyInBytes(addr hostarch.Addr, dst []byte) (int, error) {
tmm, err := cc.getMemoryManager()
if err != nil {
Expand All @@ -374,10 +372,6 @@ func (cc *taskCopyContext) CopyInBytes(addr hostarch.Addr, dst []byte) (int, err
}

// CopyOutBytes implements marshal.CopyContext.CopyOutBytes.
//
// Preconditions: Same as usermem.IO.CopyOut, plus:
// - The caller must be running on the task goroutine or hold the cc.t.mu
// - t's AddressSpace must be active.
func (cc *taskCopyContext) CopyOutBytes(addr hostarch.Addr, src []byte) (int, error) {
tmm, err := cc.getMemoryManager()
if err != nil {
Expand Down
138 changes: 63 additions & 75 deletions pkg/sentry/syscalls/linux/sys_process_vm.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,12 @@
package linux

import (
"fmt"

"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/hostarch"
"gvisor.dev/gvisor/pkg/marshal"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/mm"
"gvisor.dev/gvisor/pkg/usermem"
)

Expand Down Expand Up @@ -80,87 +78,77 @@ func processVMOp(t *kernel.Task, args arch.SyscallArguments, op processVMOpType)
return 0, nil, linuxerr.EPERM
}

// Figure out which processes and arguments (local or remote) are for
// writing and which are for reading, based on the operation.
var opArgs processVMOpArgs
switch op {
case processVMOpRead:
// Read from remote process and write into local.
opArgs = processVMOpArgs{
readCtx: remoteTask.CopyContext(t, usermem.IOOpts{}),
readAddr: rvec,
readIovecCount: riovcnt,
writeCtx: t.CopyContext(t, usermem.IOOpts{AddressSpaceActive: true}),
writeAddr: lvec,
writeIovecCount: liovcnt,
}
case processVMOpWrite:
// Read from local process and write into remote.
opArgs = processVMOpArgs{
readCtx: t.CopyContext(t, usermem.IOOpts{AddressSpaceActive: true}),
readAddr: lvec,
readIovecCount: liovcnt,
writeCtx: remoteTask.CopyContext(t, usermem.IOOpts{}),
writeAddr: rvec,
writeIovecCount: riovcnt,
}
default:
panic(fmt.Sprintf("unknown process vm op type: %v", op))
// Calculate MemoryManager, IOOpts, and iovecs for each of the local
// and remote operations.
localIovecs, err := t.CopyInIovecsAsSlice(lvec, liovcnt)
if err != nil {
return 0, nil, err
}

var (
n int
err error
)
if t == remoteTask {
// No need to lock remote process's task mutex since it is the
// same as this process.
n, err = doProcessVMOpMaybeLocked(t, opArgs)
localOps := processVMOps{
mm: t.MemoryManager(),
ioOpts: usermem.IOOpts{AddressSpaceActive: true},
iovecs: localIovecs,
}
remoteIovecs, err := t.CopyInIovecsAsSlice(rvec, riovcnt)
if err != nil {
return 0, nil, err
}
remoteOps := processVMOps{
iovecs: remoteIovecs,
}
if remoteTask == t {
// No need to take remoteTask.mu to fetch the memory manager,
// and we can assume address space is active.
remoteOps.mm = t.MemoryManager()
remoteOps.ioOpts = usermem.IOOpts{AddressSpaceActive: true}
} else {
// Need to take remote process's task mutex to pin
// remoteTask.MemoryManager().
// Grab the remoteTask memory manager, and pin it by adding
// ourselves as a user.
remoteTask.WithMuLocked(func(*kernel.Task) {
if remoteTask.MemoryManager() == nil {
err = linuxerr.ESRCH
return
}
n, err = doProcessVMOpMaybeLocked(t, opArgs)
remoteOps.mm = remoteTask.MemoryManager()
})
// Check remoteTask memory manager exists and
if remoteOps.mm == nil {
return 0, nil, linuxerr.ESRCH
}
if !remoteOps.mm.IncUsers() {
return 0, nil, linuxerr.EFAULT
}
defer remoteOps.mm.DecUsers(t)
}

// Finally time to copy some bytes. The order depends on whether we are
// "reading" or "writing".
var n int
switch op {
case processVMOpRead:
// Copy from remote process to local.
n, err = processVMCopyIovecs(t, remoteOps, localOps)
case processVMOpWrite:
// Copy from local process to remote.
n, err = processVMCopyIovecs(t, localOps, remoteOps)
}
if n == 0 && err != nil {
return 0, nil, err
}
return uintptr(n), nil, nil
}

type processVMOpArgs struct {
readCtx marshal.CopyContext
readAddr hostarch.Addr
readIovecCount int
writeCtx marshal.CopyContext
writeAddr hostarch.Addr
writeIovecCount int
}

// maxScratchBufferSize is the maximum size of a scratch buffer. It should be
// sufficiently large to minimizing the number of trips through MM.
const maxScratchBufferSize = 1 << 20

func doProcessVMOpMaybeLocked(t *kernel.Task, args processVMOpArgs) (int, error) {
// Copy IOVecs in to kernel.
readIovecs, err := t.CopyInIovecsAsSlice(args.readAddr, args.readIovecCount)
if err != nil {
return 0, err
}
writeIovecs, err := t.CopyInIovecsAsSlice(args.writeAddr, args.writeIovecCount)
if err != nil {
return 0, err
}
type processVMOps struct {
mm *mm.MemoryManager
ioOpts usermem.IOOpts
iovecs []hostarch.AddrRange
}

func processVMCopyIovecs(t *kernel.Task, readOps, writeOps processVMOps) (int, error) {
// Get scratch buffer from the calling task.
// Size should be max be size of largest read iovec.
var bufSize int
for _, readIovec := range readIovecs {
for _, readIovec := range readOps.iovecs {
if int(readIovec.Length()) > bufSize {
bufSize = int(readIovec.Length())
}
Expand All @@ -172,40 +160,40 @@ func doProcessVMOpMaybeLocked(t *kernel.Task, args processVMOpArgs) (int, error)

// Number of bytes written.
var n int
for len(readIovecs) != 0 && len(writeIovecs) != 0 {
readIovec := readIovecs[0]
for len(readOps.iovecs) != 0 && len(writeOps.iovecs) != 0 {
readIovec := readOps.iovecs[0]
length := readIovec.Length()
if length == 0 {
readIovecs = readIovecs[1:]
readOps.iovecs = readOps.iovecs[1:]
continue
}
if length > maxScratchBufferSize {
length = maxScratchBufferSize
}
buf = buf[0:int(length)]
bytes, err := args.readCtx.CopyInBytes(readIovec.Start, buf)
bytes, err := readOps.mm.CopyIn(t, readIovec.Start, buf, readOps.ioOpts)
if bytes == 0 {
return n, err
}
readIovecs[0].Start += hostarch.Addr(bytes)
readOps.iovecs[0].Start += hostarch.Addr(bytes)

start := 0
for bytes > start && len(writeIovecs) > 0 {
writeLength := int(writeIovecs[0].Length())
for bytes > start && len(writeOps.iovecs) > 0 {
writeLength := int(writeOps.iovecs[0].Length())
if writeLength == 0 {
writeIovecs = writeIovecs[1:]
writeOps.iovecs = writeOps.iovecs[1:]
continue
}
if writeLength > (bytes - start) {
writeLength = bytes - start
}
out, err := args.writeCtx.CopyOutBytes(writeIovecs[0].Start, buf[start:writeLength+start])
out, err := writeOps.mm.CopyOut(t, writeOps.iovecs[0].Start, buf[start:writeLength+start], writeOps.ioOpts)
n += out
start += out
if out != writeLength {
return n, err
}
writeIovecs[0].Start += hostarch.Addr(out)
writeOps.iovecs[0].Start += hostarch.Addr(out)
}
}
return n, nil
Expand Down

0 comments on commit 15d71fb

Please sign in to comment.