1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
|
From 982a314bd3000a16c3128afadb36a8ff41029adc Mon Sep 17 00:00:00 2001
From: Julien Grall <jgrall@amazon.com>
Date: Tue, 7 Jun 2022 14:06:11 +0200
Subject: [PATCH 17/51] xen: io: Fix race between sending an I/O and domain
shutdown
Xen provides hypercalls to shutdown (SCHEDOP_shutdown{,_code}) and
resume a domain (XEN_DOMCTL_resumedomain). They can be used for checkpoint
where the expectation is the domain should continue as nothing happened
afterwards.
hvmemul_do_io() and handle_pio() will act differently if the return
code of hvm_send_ioreq() (resp. hvmemul_do_pio_buffer()) is X86EMUL_RETRY.
In this case, the I/O state will be reset to STATE_IOREQ_NONE (i.e
no I/O is pending) and/or the PC will not be advanced.
If the shutdown request happens right after the I/O was sent to the
IOREQ, then emulation code will end up to re-execute the instruction
and therefore forward again the same I/O (at least when reading IO port).
This would be problem if the access has a side-effect. A dumb example,
is a device implementing a counter which is incremented by one for every
access. When running shutdown/resume in a loop, the value read by the
OS may not be the old value + 1.
Add an extra boolean in the structure hvm_vcpu_io to indicate whether
the I/O was suspended. This is then used in place of checking the domain
is shutting down in hvmemul_do_io() and handle_pio() as they should
act on suspend (i.e. vcpu_start_shutdown_deferral() returns false) rather
than shutdown.
Signed-off-by: Julien Grall <jgrall@amazon.com>
Reviewed-by: Paul Durrant <paul@xen.org>
master commit: b7e0d8978810b534725e94a321736496928f00a5
master date: 2022-05-06 17:16:22 +0100
---
xen/arch/arm/ioreq.c | 3 ++-
xen/arch/x86/hvm/emulate.c | 3 ++-
xen/arch/x86/hvm/io.c | 7 ++++---
xen/common/ioreq.c | 4 ++++
xen/include/xen/sched.h | 5 +++++
5 files changed, 17 insertions(+), 5 deletions(-)
diff --git a/xen/arch/arm/ioreq.c b/xen/arch/arm/ioreq.c
index 308650b40051..fbccef212bf1 100644
--- a/xen/arch/arm/ioreq.c
+++ b/xen/arch/arm/ioreq.c
@@ -80,9 +80,10 @@ enum io_state try_fwd_ioserv(struct cpu_user_regs *regs,
return IO_ABORT;
vio->req = p;
+ vio->suspended = false;
rc = ioreq_send(s, &p, 0);
- if ( rc != IO_RETRY || v->domain->is_shutting_down )
+ if ( rc != IO_RETRY || vio->suspended )
vio->req.state = STATE_IOREQ_NONE;
else if ( !ioreq_needs_completion(&vio->req) )
rc = IO_HANDLED;
diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index 76a2ccfafe23..7da348b5d486 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -239,6 +239,7 @@ static int hvmemul_do_io(
ASSERT(p.count);
vio->req = p;
+ vio->suspended = false;
rc = hvm_io_intercept(&p);
@@ -334,7 +335,7 @@ static int hvmemul_do_io(
else
{
rc = ioreq_send(s, &p, 0);
- if ( rc != X86EMUL_RETRY || currd->is_shutting_down )
+ if ( rc != X86EMUL_RETRY || vio->suspended )
vio->req.state = STATE_IOREQ_NONE;
else if ( !ioreq_needs_completion(&vio->req) )
rc = X86EMUL_OKAY;
diff --git a/xen/arch/x86/hvm/io.c b/xen/arch/x86/hvm/io.c
index 93f1d1503fa6..80915f27e488 100644
--- a/xen/arch/x86/hvm/io.c
+++ b/xen/arch/x86/hvm/io.c
@@ -138,10 +138,11 @@ bool handle_pio(uint16_t port, unsigned int size, int dir)
case X86EMUL_RETRY:
/*
- * We should not advance RIP/EIP if the domain is shutting down or
- * if X86EMUL_RETRY has been returned by an internal handler.
+ * We should not advance RIP/EIP if the vio was suspended (e.g.
+ * because the domain is shutting down) or if X86EMUL_RETRY has
+ * been returned by an internal handler.
*/
- if ( curr->domain->is_shutting_down || !vcpu_ioreq_pending(curr) )
+ if ( vio->suspended || !vcpu_ioreq_pending(curr) )
return false;
break;
diff --git a/xen/common/ioreq.c b/xen/common/ioreq.c
index d732dc045df9..42414b750bef 100644
--- a/xen/common/ioreq.c
+++ b/xen/common/ioreq.c
@@ -1256,6 +1256,7 @@ int ioreq_send(struct ioreq_server *s, ioreq_t *proto_p,
struct vcpu *curr = current;
struct domain *d = curr->domain;
struct ioreq_vcpu *sv;
+ struct vcpu_io *vio = &curr->io;
ASSERT(s);
@@ -1263,7 +1264,10 @@ int ioreq_send(struct ioreq_server *s, ioreq_t *proto_p,
return ioreq_send_buffered(s, proto_p);
if ( unlikely(!vcpu_start_shutdown_deferral(curr)) )
+ {
+ vio->suspended = true;
return IOREQ_STATUS_RETRY;
+ }
list_for_each_entry ( sv,
&s->ioreq_vcpu_list,
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 28146ee404e6..9671062360ac 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -159,6 +159,11 @@ enum vio_completion {
struct vcpu_io {
/* I/O request in flight to device model. */
enum vio_completion completion;
+ /*
+ * Indicate whether the I/O was not handled because the domain
+ * is about to be paused.
+ */
+ bool suspended;
ioreq_t req;
};
--
2.35.1
|