1 files changed, 64 insertions, 12 deletions
diff --git a/base/src/mpu/kernel/kernel.c b/base/src/mpu/kernel/kernel.c
index 60a8667..e43e7ae 100644
--- a/base/src/mpu/kernel/kernel.c
+++ b/base/src/mpu/kernel/kernel.c
@@ -37,7 +37,6 @@
 
 #include <stddef.h>
 #include <stdint.h>
-#include <string.h>
 #include <spu_mfcio.h>
 
 #include "config.h"
@@ -70,10 +69,42 @@ static uint64_t workload_ea;
 static struct mars_workload_context schedule_workload;
 static uint16_t schedule_workload_id;
 
-/* module entry */
+/* workload module cached */
+static struct mars_workload_module cached_workload_module = {0, 0, 0, 0};
+
+/* workload module entry */
 typedef void (*module_entry)(
 	const struct mars_kernel_syscalls *kernel_syscalls);
 
+static int kernel_memcmp(const void *s1, const void *s2, int size)
+{
+	unsigned char *ptr_1 = (unsigned char *)s1;
+	unsigned char *ptr_2 = (unsigned char *)s2;
+	unsigned char *ptr_e = (unsigned char *)ptr_1 + size;
+
+	while (ptr_1 < ptr_e) {
+		if (*ptr_1 != *ptr_2)
+			return 1;
+		ptr_1++;
+		ptr_2++;
+	}
+
+	return 0;
+}
+
+static void kernel_memcpy(void *dst, const void *src, int size)
+{
+	unsigned char *ptr_1 = (unsigned char *)dst;
+	unsigned char *ptr_2 = (unsigned char *)src;
+	unsigned char *ptr_e = (unsigned char *)ptr_1 + size;
+
+	while (ptr_1 < ptr_e) {
+		*ptr_1 = *ptr_2;
+		ptr_1++;
+		ptr_2++;
+	}
+}
+
 static void dma_put(void *ls, uint64_t ea, uint32_t size)
 {
 	mfc_put((volatile void *)ls, ea, size, MARS_KERNEL_DMA_TAG, 0, 0);
@@ -629,6 +660,35 @@ static void __attribute__((noinline)) run_workload(void)
 	((module_entry)workload.module.entry)(&kernel_syscalls);
 }
 
+static void load_workload_module(void)
+{
+	__vector unsigned char *bss_ptr, *bss_end;
+
+	/* 0 the bss section */
+	bss_ptr = (__vector unsigned char *)(MARS_WORKLOAD_MODULE_BASE_ADDR +
+					     workload.module.exec_size);
+	bss_end = (__vector unsigned char *)((void *)bss_ptr +
+					     workload.module.bss_size);
+
+	while (bss_ptr < bss_end)
+		*bss_ptr++ = spu_splats((unsigned char)0);
+
+	/* only reload the workload module if different from cached */
+	if (kernel_memcmp(&cached_workload_module, &workload.module,
+			  sizeof(struct mars_workload_module))) {
+		/* load the exec code into mpu storage from host storage */
+		dma_get((void *)MARS_WORKLOAD_MODULE_BASE_ADDR,
+			workload.module.exec_ea, workload.module.exec_size);
+
+		/* store the current cached workload module ea */
+		kernel_memcpy(&cached_workload_module, &workload.module,
+			      sizeof(struct mars_workload_module));
+	}
+
+	/* sync before executing loaded code */
+	spu_sync();
+}
+
 static int scheduler(void)
 {
 	int status;
@@ -650,16 +710,8 @@ static int scheduler(void)
 	if (status == MARS_WORKLOAD_RESERVED_NONE)
 		return MARS_KERNEL_STATUS_IDLE;
 
-	/* load the exec code into mpu storage from host storage */
-	dma_get((void *)MARS_WORKLOAD_MODULE_BASE_ADDR,
-		workload.module.exec_ea, workload.module.exec_size);
-
-	/* 0 the bss section */
-	memset((void *)MARS_WORKLOAD_MODULE_BASE_ADDR +
-	       workload.module.exec_size, 0, workload.module.bss_size);
-
-	/* sync before executing loaded code */
-	spu_sync();
+	/* load the workload module */
+	load_workload_module();
 
 	/* run workload */
 	run_workload();