1 // Copyright 2018 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "VkQueue.hpp"
16 
17 #include "VkCommandBuffer.hpp"
18 #include "VkFence.hpp"
19 #include "VkSemaphore.hpp"
20 #include "VkStringify.hpp"
21 #include "VkTimelineSemaphore.hpp"
22 #include "Device/Renderer.hpp"
23 #include "WSI/VkSwapchainKHR.hpp"
24 
25 #include "marl/defer.h"
26 #include "marl/scheduler.h"
27 #include "marl/thread.h"
28 #include "marl/trace.h"
29 
30 #include <cstring>
31 
32 namespace vk {
33 
DeepCopySubmitInfo(uint32_t submitCount, const VkSubmitInfo *pSubmits)34 Queue::SubmitInfo *Queue::DeepCopySubmitInfo(uint32_t submitCount, const VkSubmitInfo *pSubmits)
35 {
36 	size_t submitSize = sizeof(SubmitInfo) * submitCount;
37 	size_t totalSize = submitSize;
38 	for(uint32_t i = 0; i < submitCount; i++)
39 	{
40 		totalSize += pSubmits[i].waitSemaphoreCount * sizeof(VkSemaphore);
41 		totalSize += pSubmits[i].waitSemaphoreCount * sizeof(VkPipelineStageFlags);
42 		totalSize += pSubmits[i].signalSemaphoreCount * sizeof(VkSemaphore);
43 		totalSize += pSubmits[i].commandBufferCount * sizeof(VkCommandBuffer);
44 
45 		for(const auto *extension = reinterpret_cast<const VkBaseInStructure *>(pSubmits[i].pNext);
46 		    extension != nullptr; extension = reinterpret_cast<const VkBaseInStructure *>(extension->pNext))
47 		{
48 			switch(extension->sType)
49 			{
50 			case VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO:
51 				{
52 					const auto *tlsSubmitInfo = reinterpret_cast<const VkTimelineSemaphoreSubmitInfo *>(extension);
53 					totalSize += tlsSubmitInfo->waitSemaphoreValueCount * sizeof(uint64_t);
54 					totalSize += tlsSubmitInfo->signalSemaphoreValueCount * sizeof(uint64_t);
55 				}
56 				break;
57 			case VK_STRUCTURE_TYPE_DEVICE_GROUP_SUBMIT_INFO:
58 				// SwiftShader doesn't use device group submit info because it only supports a single physical device.
59 				// However, this extension is core in Vulkan 1.1, so we must treat it as a valid structure type.
60 				break;
61 			case VK_STRUCTURE_TYPE_MAX_ENUM:
62 				// dEQP tests that this value is ignored.
63 				break;
64 			default:
65 				UNSUPPORTED("submitInfo[%d]->pNext sType: %s", i, vk::Stringify(extension->sType).c_str());
66 				break;
67 			}
68 		}
69 	}
70 
71 	uint8_t *mem = static_cast<uint8_t *>(
72 	    vk::allocateHostMemory(totalSize, vk::REQUIRED_MEMORY_ALIGNMENT, vk::NULL_ALLOCATION_CALLBACKS, vk::Fence::GetAllocationScope()));
73 
74 	auto submits = new(mem) SubmitInfo[submitCount];
75 	mem += submitSize;
76 
77 	for(uint32_t i = 0; i < submitCount; i++)
78 	{
79 		submits[i].commandBufferCount = pSubmits[i].commandBufferCount;
80 		submits[i].signalSemaphoreCount = pSubmits[i].signalSemaphoreCount;
81 		submits[i].waitSemaphoreCount = pSubmits[i].waitSemaphoreCount;
82 
83 		submits[i].pWaitSemaphores = nullptr;
84 		submits[i].pWaitDstStageMask = nullptr;
85 		submits[i].pSignalSemaphores = nullptr;
86 		submits[i].pCommandBuffers = nullptr;
87 
88 		if(pSubmits[i].waitSemaphoreCount > 0)
89 		{
90 			size_t size = pSubmits[i].waitSemaphoreCount * sizeof(VkSemaphore);
91 			submits[i].pWaitSemaphores = reinterpret_cast<const VkSemaphore *>(mem);
92 			memcpy(mem, pSubmits[i].pWaitSemaphores, size);
93 			mem += size;
94 
95 			size = pSubmits[i].waitSemaphoreCount * sizeof(VkPipelineStageFlags);
96 			submits[i].pWaitDstStageMask = reinterpret_cast<const VkPipelineStageFlags *>(mem);
97 			memcpy(mem, pSubmits[i].pWaitDstStageMask, size);
98 			mem += size;
99 		}
100 
101 		if(pSubmits[i].signalSemaphoreCount > 0)
102 		{
103 			size_t size = pSubmits[i].signalSemaphoreCount * sizeof(VkSemaphore);
104 			submits[i].pSignalSemaphores = reinterpret_cast<const VkSemaphore *>(mem);
105 			memcpy(mem, pSubmits[i].pSignalSemaphores, size);
106 			mem += size;
107 		}
108 
109 		if(pSubmits[i].commandBufferCount > 0)
110 		{
111 			size_t size = pSubmits[i].commandBufferCount * sizeof(VkCommandBuffer);
112 			submits[i].pCommandBuffers = reinterpret_cast<const VkCommandBuffer *>(mem);
113 			memcpy(mem, pSubmits[i].pCommandBuffers, size);
114 			mem += size;
115 		}
116 
117 		submits[i].waitSemaphoreValueCount = 0;
118 		submits[i].pWaitSemaphoreValues = nullptr;
119 		submits[i].signalSemaphoreValueCount = 0;
120 		submits[i].pSignalSemaphoreValues = nullptr;
121 
122 		for(const auto *extension = reinterpret_cast<const VkBaseInStructure *>(pSubmits[i].pNext);
123 		    extension != nullptr; extension = reinterpret_cast<const VkBaseInStructure *>(extension->pNext))
124 		{
125 			switch(extension->sType)
126 			{
127 			case VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO:
128 				{
129 					const VkTimelineSemaphoreSubmitInfo *tlsSubmitInfo = reinterpret_cast<const VkTimelineSemaphoreSubmitInfo *>(extension);
130 
131 					if(tlsSubmitInfo->waitSemaphoreValueCount > 0)
132 					{
133 						submits[i].waitSemaphoreValueCount = tlsSubmitInfo->waitSemaphoreValueCount;
134 						size_t size = tlsSubmitInfo->waitSemaphoreValueCount * sizeof(uint64_t);
135 						submits[i].pWaitSemaphoreValues = reinterpret_cast<uint64_t *>(mem);
136 						memcpy(mem, tlsSubmitInfo->pWaitSemaphoreValues, size);
137 						mem += size;
138 					}
139 
140 					if(tlsSubmitInfo->signalSemaphoreValueCount > 0)
141 					{
142 						submits[i].signalSemaphoreValueCount = tlsSubmitInfo->signalSemaphoreValueCount;
143 						size_t size = tlsSubmitInfo->signalSemaphoreValueCount * sizeof(uint64_t);
144 						submits[i].pSignalSemaphoreValues = reinterpret_cast<uint64_t *>(mem);
145 						memcpy(mem, tlsSubmitInfo->pSignalSemaphoreValues, size);
146 						mem += size;
147 					}
148 				}
149 				break;
150 			case VK_STRUCTURE_TYPE_DEVICE_GROUP_SUBMIT_INFO:
151 				// SwiftShader doesn't use device group submit info because it only supports a single physical device.
152 				// However, this extension is core in Vulkan 1.1, so we must treat it as a valid structure type.
153 				break;
154 			case VK_STRUCTURE_TYPE_MAX_ENUM:
155 				// dEQP tests that this value is ignored.
156 				break;
157 			default:
158 				UNSUPPORTED("submitInfo[%d]->pNext sType: %s", i, vk::Stringify(extension->sType).c_str());
159 				break;
160 			}
161 		}
162 	}
163 
164 	return submits;
165 }
166 
Queue(Device *device, marl::Scheduler *scheduler)167 Queue::Queue(Device *device, marl::Scheduler *scheduler)
168     : device(device)
169 {
170 	queueThread = std::thread(&Queue::taskLoop, this, scheduler);
171 }
172 
~Queue()173 Queue::~Queue()
174 {
175 	Task task;
176 	task.type = Task::KILL_THREAD;
177 	pending.put(task);
178 
179 	queueThread.join();
180 	ASSERT_MSG(pending.count() == 0, "queue has work after worker thread shutdown");
181 
182 	garbageCollect();
183 }
184 
submit(uint32_t submitCount, const VkSubmitInfo *pSubmits, Fence *fence)185 VkResult Queue::submit(uint32_t submitCount, const VkSubmitInfo *pSubmits, Fence *fence)
186 {
187 	garbageCollect();
188 
189 	Task task;
190 	task.submitCount = submitCount;
191 	task.pSubmits = DeepCopySubmitInfo(submitCount, pSubmits);
192 	if(fence)
193 	{
194 		task.events = fence->getCountedEvent();
195 		task.events->add();
196 	}
197 
198 	pending.put(task);
199 
200 	return VK_SUCCESS;
201 }
202 
submitQueue(const Task &task)203 void Queue::submitQueue(const Task &task)
204 {
205 	if(renderer == nullptr)
206 	{
207 		renderer.reset(new sw::Renderer(device));
208 	}
209 
210 	for(uint32_t i = 0; i < task.submitCount; i++)
211 	{
212 		SubmitInfo &submitInfo = task.pSubmits[i];
213 		for(uint32_t j = 0; j < submitInfo.waitSemaphoreCount; j++)
214 		{
215 			if(auto *sem = DynamicCast<TimelineSemaphore>(submitInfo.pWaitSemaphores[j]))
216 			{
217 				ASSERT(j < submitInfo.waitSemaphoreValueCount);
218 				sem->wait(submitInfo.pWaitSemaphoreValues[j]);
219 			}
220 			else if(auto *sem = DynamicCast<BinarySemaphore>(submitInfo.pWaitSemaphores[j]))
221 			{
222 				sem->wait(submitInfo.pWaitDstStageMask[j]);
223 			}
224 			else
225 			{
226 				UNSUPPORTED("Unknown semaphore type");
227 			}
228 		}
229 
230 		{
231 			CommandBuffer::ExecutionState executionState;
232 			executionState.renderer = renderer.get();
233 			executionState.events = task.events.get();
234 			for(uint32_t j = 0; j < submitInfo.commandBufferCount; j++)
235 			{
236 				Cast(submitInfo.pCommandBuffers[j])->submit(executionState);
237 			}
238 		}
239 
240 		for(uint32_t j = 0; j < submitInfo.signalSemaphoreCount; j++)
241 		{
242 			if(auto *sem = DynamicCast<TimelineSemaphore>(submitInfo.pSignalSemaphores[j]))
243 			{
244 				ASSERT(j < submitInfo.signalSemaphoreValueCount);
245 				sem->signal(submitInfo.pSignalSemaphoreValues[j]);
246 			}
247 			else if(auto *sem = DynamicCast<BinarySemaphore>(submitInfo.pSignalSemaphores[j]))
248 			{
249 				sem->signal();
250 			}
251 			else
252 			{
253 				UNSUPPORTED("Unknown semaphore type");
254 			}
255 		}
256 	}
257 
258 	if(task.pSubmits)
259 	{
260 		toDelete.put(task.pSubmits);
261 	}
262 
263 	if(task.events)
264 	{
265 		// TODO: fix renderer signaling so that work submitted separately from (but before) a fence
266 		// is guaranteed complete by the time the fence signals.
267 		renderer->synchronize();
268 		task.events->done();
269 	}
270 }
271 
taskLoop(marl::Scheduler *scheduler)272 void Queue::taskLoop(marl::Scheduler *scheduler)
273 {
274 	marl::Thread::setName("Queue<%p>", this);
275 	scheduler->bind();
276 	defer(scheduler->unbind());
277 
278 	while(true)
279 	{
280 		Task task = pending.take();
281 
282 		switch(task.type)
283 		{
284 		case Task::KILL_THREAD:
285 			ASSERT_MSG(pending.count() == 0, "queue has remaining work!");
286 			return;
287 		case Task::SUBMIT_QUEUE:
288 			submitQueue(task);
289 			break;
290 		default:
291 			UNREACHABLE("task.type %d", static_cast<int>(task.type));
292 			break;
293 		}
294 	}
295 }
296 
waitIdle()297 VkResult Queue::waitIdle()
298 {
299 	// Wait for task queue to flush.
300 	auto event = std::make_shared<sw::CountedEvent>();
301 	event->add();  // done() is called at the end of submitQueue()
302 
303 	Task task;
304 	task.events = event;
305 	pending.put(task);
306 
307 	event->wait();
308 
309 	garbageCollect();
310 
311 	return VK_SUCCESS;
312 }
313 
garbageCollect()314 void Queue::garbageCollect()
315 {
316 	while(true)
317 	{
318 		auto v = toDelete.tryTake();
319 		if(!v.second) { break; }
320 		vk::freeHostMemory(v.first, NULL_ALLOCATION_CALLBACKS);
321 	}
322 }
323 
324 #ifndef __ANDROID__
present(const VkPresentInfoKHR *presentInfo)325 VkResult Queue::present(const VkPresentInfoKHR *presentInfo)
326 {
327 	// This is a hack to deal with screen tearing for now.
328 	// Need to correctly implement threading using VkSemaphore
329 	// to get rid of it. b/132458423
330 	waitIdle();
331 
332 	for(uint32_t i = 0; i < presentInfo->waitSemaphoreCount; i++)
333 	{
334 		auto *semaphore = vk::DynamicCast<BinarySemaphore>(presentInfo->pWaitSemaphores[i]);
335 		semaphore->wait();
336 	}
337 
338 	VkResult commandResult = VK_SUCCESS;
339 
340 	for(uint32_t i = 0; i < presentInfo->swapchainCount; i++)
341 	{
342 		auto *swapchain = vk::Cast(presentInfo->pSwapchains[i]);
343 		VkResult perSwapchainResult = swapchain->present(presentInfo->pImageIndices[i]);
344 
345 		if(presentInfo->pResults)
346 		{
347 			presentInfo->pResults[i] = perSwapchainResult;
348 		}
349 
350 		// Keep track of the worst result code. VK_SUBOPTIMAL_KHR is a success code so it should
351 		// not override failure codes, but should not get replaced by a VK_SUCCESS result itself.
352 		if(perSwapchainResult != VK_SUCCESS)
353 		{
354 			if(commandResult == VK_SUCCESS || commandResult == VK_SUBOPTIMAL_KHR)
355 			{
356 				commandResult = perSwapchainResult;
357 			}
358 		}
359 	}
360 
361 	return commandResult;
362 }
363 #endif
364 
beginDebugUtilsLabel(const VkDebugUtilsLabelEXT *pLabelInfo)365 void Queue::beginDebugUtilsLabel(const VkDebugUtilsLabelEXT *pLabelInfo)
366 {
367 	// Optional debug label region
368 }
369 
endDebugUtilsLabel()370 void Queue::endDebugUtilsLabel()
371 {
372 	// Close debug label region opened with beginDebugUtilsLabel()
373 }
374 
insertDebugUtilsLabel(const VkDebugUtilsLabelEXT *pLabelInfo)375 void Queue::insertDebugUtilsLabel(const VkDebugUtilsLabelEXT *pLabelInfo)
376 {
377 	// Optional single debug label
378 }
379 
380 }  // namespace vk
381