From a10a64b189b24d668172132a5bb88e8051e3c9a4 Mon Sep 17 00:00:00 2001
From: Piotr Gorski <lucjan.lucjanov@gmail.com>
Date: Thu, 28 Aug 2025 14:32:50 +0200
Subject: [PATCH] bore

Signed-off-by: Piotr Gorski <lucjan.lucjanov@gmail.com>
---
 include/linux/sched.h      |  29 +++
 include/linux/sched/bore.h |  39 ++++
 init/Kconfig               |  17 ++
 kernel/Kconfig.hz          |  17 ++
 kernel/fork.c              |   8 +
 kernel/futex/waitwake.c    |  11 ++
 kernel/sched/Makefile      |   1 +
 kernel/sched/bore.c        | 393 +++++++++++++++++++++++++++++++++++++
 kernel/sched/core.c        |  12 ++
 kernel/sched/debug.c       |  61 +++++-
 kernel/sched/fair.c        | 111 +++++++++--
 kernel/sched/features.h    |   3 +
 kernel/sched/sched.h       |   9 +
 13 files changed, 699 insertions(+), 12 deletions(-)
 create mode 100644 include/linux/sched/bore.h
 create mode 100644 kernel/sched/bore.c

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0d1d70ade..2a0aba074 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -782,6 +782,32 @@ struct kmap_ctrl {
 #endif
 };
 
+#ifdef CONFIG_SCHED_BORE
+#define BORE_BC_TIMESTAMP_SHIFT 16
+
+struct bore_bc {
+	u64				timestamp:	48;
+	u64				penalty:	16;
+};
+
+struct bore_ctx {
+	struct bore_bc	subtree;
+	struct bore_bc	group;
+	u64				burst_time;
+	u16				prev_penalty;
+	u16				curr_penalty;
+	union {
+		u16			penalty;
+		struct {
+			u8		_;
+			u8		score;
+		};
+	};
+	bool			stop_update;
+	bool			futex_waiting;
+};
+#endif /* CONFIG_SCHED_BORE */
+
 struct task_struct {
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 	/*
@@ -842,6 +868,9 @@ struct task_struct {
 #ifdef CONFIG_SCHED_CLASS_EXT
 	struct sched_ext_entity		scx;
 #endif
+#ifdef CONFIG_SCHED_BORE
+	struct bore_ctx			bore;
+#endif /* CONFIG_SCHED_BORE */
 	const struct sched_class	*sched_class;
 
 #ifdef CONFIG_SCHED_CORE
diff --git a/include/linux/sched/bore.h b/include/linux/sched/bore.h
new file mode 100644
index 000000000..5afb5fed4
--- /dev/null
+++ b/include/linux/sched/bore.h
@@ -0,0 +1,39 @@
+#ifndef _KERNEL_SCHED_BORE_H
+#define _KERNEL_SCHED_BORE_H
+
+#include <linux/sched.h>
+#include <linux/sched/cputime.h>
+#include <linux/atomic.h>
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+
+#define SCHED_BORE_AUTHOR   "Masahito Suzuki"
+#define SCHED_BORE_PROGNAME "BORE CPU Scheduler modification"
+
+#define SCHED_BORE_VERSION  "6.5.2"
+
+extern u8   __read_mostly sched_bore;
+extern u8   __read_mostly sched_burst_inherit_type;
+extern u8   __read_mostly sched_burst_smoothness;
+extern u8   __read_mostly sched_burst_penalty_offset;
+extern uint __read_mostly sched_burst_penalty_scale;
+extern uint __read_mostly sched_burst_cache_lifetime;
+
+extern u8   effective_prio_bore(struct task_struct *p);
+extern void update_curr_bore(struct task_struct *p, u64 delta_exec);
+extern void restart_burst_bore(struct task_struct *p);
+extern void restart_burst_rescale_deadline_bore(struct task_struct *p);
+extern void task_fork_bore(struct task_struct *p, struct task_struct *parent,
+													u64 clone_flags, u64 now);
+extern void sched_init_bore(void);
+extern void reset_task_bore(struct task_struct *p);
+
+extern int  sched_bore_update_handler(const struct ctl_table *table,
+	int write, void __user *buffer, size_t *lenp, loff_t *ppos);
+extern int  sched_burst_inherit_type_update_handler(const struct ctl_table *table,
+	int write, void __user *buffer, size_t *lenp, loff_t *ppos);
+
+extern void reweight_entity(
+	struct cfs_rq *cfs_rq, struct sched_entity *se, unsigned long weight);
+
+#endif /* _KERNEL_SCHED_BORE_H */
diff --git a/init/Kconfig b/init/Kconfig
index 45990792c..e375aebc5 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1349,6 +1349,23 @@ config CHECKPOINT_RESTORE
 
 	  If unsure, say N here.
 
+config SCHED_BORE
+	bool "Burst-Oriented Response Enhancer"
+	default y
+	help
+	  In Desktop and Mobile computing, one might prefer interactive
+	  tasks to keep responsive no matter what they run in the background.
+
+	  Enabling this kernel feature modifies the scheduler to discriminate
+	  tasks by their burst time (runtime since it last went sleeping or
+	  yielding state) and prioritize those that run less bursty.
+	  Such tasks usually include window compositor, widgets backend,
+	  terminal emulator, video playback, games and so on.
+	  With a little impact to scheduling fairness, it may improve
+	  responsiveness especially under heavy background workload.
+
+	  If unsure, say Y here.
+
 config SCHED_AUTOGROUP
 	bool "Automatic process group scheduling"
 	select CGROUPS
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 38ef6d068..4ee584ce9 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -57,3 +57,20 @@ config HZ
 
 config SCHED_HRTICK
 	def_bool HIGH_RES_TIMERS
+
+config MIN_BASE_SLICE_NS
+	int "Default value for min_base_slice_ns"
+	default 2000000
+	help
+	 The BORE Scheduler automatically calculates the optimal base
+	 slice for the configured HZ using the following equation:
+	 
+	 base_slice_ns =
+	 	1000000000/HZ * DIV_ROUNDUP(min_base_slice_ns, 1000000000/HZ)
+	 
+	 This option sets the default lower bound limit of the base slice
+	 to prevent the loss of task throughput due to overscheduling.
+	 
+	 Setting this value too high can cause the system to boot with
+	 an unnecessarily large base slice, resulting in high scheduling
+	 latency and poor system responsiveness.
diff --git a/kernel/fork.c b/kernel/fork.c
index 97c9afe3e..e14b29847 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -113,6 +113,10 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#ifdef CONFIG_SCHED_BORE
+#include <linux/sched/bore.h>
+#endif /* CONFIG_SCHED_BORE */
+
 #include <trace/events/sched.h>
 
 #define CREATE_TRACE_POINTS
@@ -2524,6 +2528,10 @@ __latent_entropy struct task_struct *copy_process(
 	 * Need tasklist lock for parent etc handling!
 	 */
 	write_lock_irq(&tasklist_lock);
+#ifdef CONFIG_SCHED_BORE
+	if (likely(p->pid))
+		task_fork_bore(p, current, clone_flags, p->start_time);
+#endif /* CONFIG_SCHED_BORE */
 
 	/* CLONE_PARENT re-uses the old parent */
 	if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c
index a9056acb7..e60425086 100644
--- a/kernel/futex/waitwake.c
+++ b/kernel/futex/waitwake.c
@@ -4,6 +4,9 @@
 #include <linux/sched/task.h>
 #include <linux/sched/signal.h>
 #include <linux/freezer.h>
+#ifdef CONFIG_SCHED_BORE
+#include <linux/sched/bore.h>
+#endif // CONFIG_SCHED_BORE
 
 #include "futex.h"
 
@@ -367,7 +370,15 @@ void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q,
 		 * is no timeout, or if it has yet to expire.
 		 */
 		if (!timeout || timeout->task)
+#ifdef CONFIG_SCHED_BORE
+		{
+			current->bore.futex_waiting = true;
+#endif // CONFIG_SCHED_BORE
 			schedule();
+#ifdef CONFIG_SCHED_BORE
+			current->bore.futex_waiting = false;
+		}
+#endif // CONFIG_SCHED_BORE
 	}
 	__set_current_state(TASK_RUNNING);
 }
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 976092b7b..83cbd093b 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -32,3 +32,4 @@ obj-y += core.o
 obj-y += fair.o
 obj-y += build_policy.o
 obj-y += build_utility.o
+obj-$(CONFIG_SCHED_BORE) += bore.o
diff --git a/kernel/sched/bore.c b/kernel/sched/bore.c
new file mode 100644
index 000000000..c9e76eda3
--- /dev/null
+++ b/kernel/sched/bore.c
@@ -0,0 +1,393 @@
+/*
+ *  Burst-Oriented Response Enhancer (BORE) CPU Scheduler
+ *  Copyright (C) 2021-2025 Masahito Suzuki <firelzrd@gmail.com>
+ */
+#include <linux/cpuset.h>
+#include <linux/sched/task.h>
+#include <linux/sched/bore.h>
+#include "sched.h"
+
+#ifdef CONFIG_SCHED_BORE
+u8   __read_mostly sched_bore                   = 1;
+u8   __read_mostly sched_burst_inherit_type     = 2;
+u8   __read_mostly sched_burst_smoothness       = 1;
+u8   __read_mostly sched_burst_penalty_offset   = 24;
+uint __read_mostly sched_burst_penalty_scale    = 1536;
+uint __read_mostly sched_burst_cache_lifetime   = 75000000;
+static int __maybe_unused maxval_prio    =   39;
+static int __maybe_unused maxval_6_bits  =   63;
+static int __maybe_unused maxval_8_bits  =  255;
+static int __maybe_unused maxval_12_bits = 4095;
+
+#define MAX_BURST_PENALTY ((40U << 8) - 1)
+#define BURST_CACHE_STOP_COUNT 63
+
+static u32 (*inherit_penalty_fn)(struct task_struct *, u64, u64);
+
+static inline u32 log2p1_u64_u32fp(u64 v, u8 fp) {
+	if (!v) return 0;
+	u32 exponent = fls64(v),
+		mantissa = (u32)(v << (64 - exponent) << 1 >> (64 - fp));
+	return exponent << fp | mantissa;
+}
+
+static inline u32 calc_burst_penalty(u64 burst_time) {
+	u32 greed = log2p1_u64_u32fp(burst_time, 8),
+		tolerance = sched_burst_penalty_offset << 8,
+		penalty = max(0, (s32)(greed - tolerance)),
+		scaled_penalty = penalty * sched_burst_penalty_scale >> 10;
+	return min(MAX_BURST_PENALTY, scaled_penalty);
+}
+
+static inline u64 rescale_slice(u64 delta, u8 old_prio, u8 new_prio) {
+	u64 unscaled, rescaled;
+	unscaled = mul_u64_u32_shr(delta   , sched_prio_to_weight[old_prio], 10);
+	rescaled = mul_u64_u32_shr(unscaled, sched_prio_to_wmult [new_prio], 22);
+	return rescaled;
+}
+
+static inline u32 binary_smooth(u32 new, u32 old) {
+	if (new <= old) return new;
+
+	u32 increment = new - old,
+		shift = sched_burst_smoothness,
+		divisor = 1U << shift;
+
+	return old + ((increment + divisor - 1) >> shift);
+}
+
+static void reweight_task_by_prio(struct task_struct *p, int prio) {
+	if (task_has_idle_policy(p)) return;
+
+	struct sched_entity *se = &p->se;
+	unsigned long weight = scale_load(sched_prio_to_weight[prio]);
+
+	if (se->on_rq) {
+		p->bore.stop_update = true;
+		reweight_entity(cfs_rq_of(se), se, weight);
+		p->bore.stop_update = false;
+	} else
+		se->load.weight = weight;
+	se->load.inv_weight = sched_prio_to_wmult[prio];
+}
+
+u8 effective_prio_bore(struct task_struct *p) {
+	int prio = p->static_prio - MAX_RT_PRIO;
+	if (likely(sched_bore))
+		prio += p->bore.score;
+	return (u8)clamp(prio, 0, maxval_prio);
+}
+
+static void update_penalty(struct task_struct *p) {
+	struct bore_ctx *ctx = &p->bore;
+
+	u8  prev_prio = effective_prio_bore(p);
+	u32 penalty = 0;
+
+	if (!(p->flags & PF_KTHREAD)) {
+		u32 curr_penalty = ctx->curr_penalty;
+		penalty = ctx->prev_penalty;
+		if (penalty < curr_penalty)
+			penalty = curr_penalty;
+	}
+	ctx->penalty = penalty;
+
+	u8 new_prio = effective_prio_bore(p);
+	if (new_prio != prev_prio)
+		reweight_task_by_prio(p, new_prio);
+}
+
+void update_curr_bore(struct task_struct *p, u64 delta_exec) {
+	struct bore_ctx *ctx = &p->bore;
+	if (ctx->stop_update) return;
+
+	ctx->burst_time += delta_exec;
+	u32 curr_penalty = ctx->curr_penalty = calc_burst_penalty(ctx->burst_time);
+
+	if (curr_penalty <= ctx->prev_penalty) return;
+	update_penalty(p);
+}
+
+void restart_burst_bore(struct task_struct *p) {
+	struct bore_ctx *ctx = &p->bore;
+	u32 new_penalty = binary_smooth(ctx->curr_penalty, ctx->prev_penalty);
+	ctx->prev_penalty = new_penalty;
+	ctx->curr_penalty = 0;
+	ctx->burst_time = 0;
+	update_penalty(p);
+}
+
+void restart_burst_rescale_deadline_bore(struct task_struct *p) {
+	struct sched_entity *se = &p->se;
+	s64 vscaled, vremain = se->deadline - se->vruntime;
+
+	u8 old_prio = effective_prio_bore(p);
+	restart_burst_bore(p);
+	u8 new_prio = effective_prio_bore(p);
+
+	if (old_prio > new_prio) {
+		vscaled = rescale_slice(abs(vremain), old_prio, new_prio);
+		if (unlikely(vremain < 0))
+			vscaled = -vscaled;
+		se->deadline = se->vruntime + vscaled;
+	}
+}
+
+static inline bool task_is_bore_eligible(struct task_struct *p)
+{return p && p->sched_class == &fair_sched_class && !p->exit_state;}
+
+#ifndef for_each_child_task
+#define for_each_child_task(p, t) \
+	list_for_each_entry(t, &(p)->children, sibling)
+#endif
+
+static inline u32 count_children_upto2(struct task_struct *p) {
+	struct list_head *head = &p->children;
+	struct list_head *next = head->next;
+	return (next != head) + (next->next != head);
+}
+
+static inline bool burst_cache_expired(struct bore_bc *bc, u64 now) {
+	u64 timestamp = bc->timestamp << BORE_BC_TIMESTAMP_SHIFT;
+	return now - timestamp > sched_burst_cache_lifetime;
+}
+
+static void update_burst_cache(struct bore_bc *bc,
+		struct task_struct *p, u32 count, u32 total, u64 now) {
+	u32 average = count ? total / count : 0;
+	bc->penalty = max(average, p->bore.penalty);
+	bc->timestamp = now >> BORE_BC_TIMESTAMP_SHIFT;
+}
+
+static u32 inherit_none(struct task_struct *parent,
+									u64 clone_flags, u64 now)
+{ return 0; }
+
+static u32 inherit_from_parent(struct task_struct *parent,
+									u64 clone_flags, u64 now) {
+	if (clone_flags & CLONE_PARENT)
+		parent = parent->real_parent;
+
+	struct bore_bc *bc = &parent->bore.subtree;
+
+	if (burst_cache_expired(bc, now)) {
+		struct task_struct *child;
+		u32 count = 0, total = 0;
+		for_each_child_task(parent, child) {
+			if (count >= BURST_CACHE_STOP_COUNT) break;
+
+			if (!task_is_bore_eligible(child)) continue;
+			count++;
+			total += child->bore.penalty;
+		}
+
+		update_burst_cache(bc, parent, count, total, now);
+	}
+
+	return bc->penalty;
+}
+
+static u32 inherit_from_ancestor_hub(struct task_struct *parent,
+										u64 clone_flags, u64 now) {
+	struct task_struct *ancestor = parent;
+	u32 sole_child_count = 0;
+
+	if (clone_flags & CLONE_PARENT) {
+		ancestor = ancestor->real_parent;
+		sole_child_count = 1;
+	}
+
+	for (struct task_struct *next;
+			(next = ancestor->real_parent) != ancestor &&
+			count_children_upto2(ancestor) <= sole_child_count;
+			ancestor = next, sole_child_count = 1) {}
+
+	struct bore_bc *bc = &ancestor->bore.subtree;
+
+	if (burst_cache_expired(bc, now)) {
+		struct task_struct *direct_child;
+		u32 count = 0, total = 0;
+		for_each_child_task(ancestor, direct_child) {
+			if (count >= BURST_CACHE_STOP_COUNT) break;
+
+			struct task_struct *descendant = direct_child;
+			while (count_children_upto2(descendant) == 1)
+				descendant = list_first_entry(&descendant->children,
+												struct task_struct, sibling);
+
+			if (!task_is_bore_eligible(descendant)) continue;
+			count++;
+			total += descendant->bore.penalty;
+		}
+
+		update_burst_cache(bc, ancestor, count, total, now);
+	}
+
+	return bc->penalty;
+}
+
+static u32 inherit_from_thread_group(struct task_struct *p, u64 now) {
+	struct task_struct *leader = p->group_leader;
+	struct bore_bc *bc = &leader->bore.group;
+
+	if (burst_cache_expired(bc, now)) {
+		struct task_struct *sibling;
+		u32 count = 0, total = 0;
+
+		for_each_thread(leader, sibling) {
+			if (count >= BURST_CACHE_STOP_COUNT) break;
+
+			if (!task_is_bore_eligible(sibling)) continue;
+			count++;
+			total += sibling->bore.penalty;
+		}
+
+		update_burst_cache(bc, leader, count, total, now);
+	}
+
+	return bc->penalty;
+}
+
+void task_fork_bore(struct task_struct *p,
+	               struct task_struct *parent, u64 clone_flags, u64 now) {
+	if (!task_is_bore_eligible(p) || unlikely(!sched_bore)) return;
+
+	struct bore_ctx *ctx = &p->bore;
+	u32 inherited_penalty = (clone_flags & CLONE_THREAD)?
+		inherit_from_thread_group(parent, now):
+		inherit_penalty_fn(parent, clone_flags, now);
+
+	if (ctx->prev_penalty < inherited_penalty)
+		ctx->prev_penalty = inherited_penalty;
+	ctx->curr_penalty  = 0;
+	ctx->burst_time    = 0;
+	ctx->stop_update   = false;
+	ctx->futex_waiting = false;
+	update_penalty(p);
+}
+
+void reset_task_bore(struct task_struct *p)
+{ memset(&p->bore, 0, sizeof(struct bore_ctx)); }
+
+static void update_inherit_type(void) {
+	switch(sched_burst_inherit_type) {
+	case 1:
+		inherit_penalty_fn = inherit_from_parent;
+		break;
+	case 2:
+		inherit_penalty_fn = inherit_from_ancestor_hub;
+		break;
+	default:
+		inherit_penalty_fn = inherit_none;
+	}
+}
+
+void __init sched_init_bore(void) {
+	printk(KERN_INFO "%s %s by %s\n",
+		SCHED_BORE_PROGNAME, SCHED_BORE_VERSION, SCHED_BORE_AUTHOR);
+
+	reset_task_bore(&init_task);
+	update_inherit_type();
+}
+
+static void readjust_all_task_weights(void) {
+	struct task_struct *task;
+	struct rq *rq;
+	struct rq_flags rf;
+
+	scoped_guard(write_lock_irq, &tasklist_lock)
+	for_each_process(task) {
+		if (!task_is_bore_eligible(task)) continue;
+		rq = task_rq_lock(task, &rf);
+		update_rq_clock(rq);
+		reweight_task_by_prio(task, effective_prio_bore(task));
+		task_rq_unlock(rq, task, &rf);
+	}
+}
+
+int sched_bore_update_handler(const struct ctl_table *table,
+		int write, void __user *buffer, size_t *lenp, loff_t *ppos) {
+	int ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
+	if (ret || !write)
+		return ret;
+
+	readjust_all_task_weights();
+
+	return 0;
+}
+
+int sched_burst_inherit_type_update_handler(const struct ctl_table *table,
+		int write, void __user *buffer, size_t *lenp, loff_t *ppos) {
+	int ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
+	if (ret || !write)
+		return ret;
+
+	update_inherit_type();
+
+	return 0;
+}
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table sched_bore_sysctls[] = {
+	{
+		.procname	= "sched_bore",
+		.data		= &sched_bore,
+		.maxlen		= sizeof(u8),
+		.mode		= 0644,
+		.proc_handler = sched_bore_update_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "sched_burst_inherit_type",
+		.data		= &sched_burst_inherit_type,
+		.maxlen		= sizeof(u8),
+		.mode		= 0644,
+		.proc_handler = sched_burst_inherit_type_update_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_TWO,
+	},
+	{
+		.procname	= "sched_burst_smoothness",
+		.data		= &sched_burst_smoothness,
+		.maxlen		= sizeof(u8),
+		.mode		= 0644,
+		.proc_handler = proc_dou8vec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_THREE,
+	},
+	{
+		.procname	= "sched_burst_penalty_offset",
+		.data		= &sched_burst_penalty_offset,
+		.maxlen		= sizeof(u8),
+		.mode		= 0644,
+		.proc_handler = proc_dou8vec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &maxval_6_bits,
+	},
+	{
+		.procname	= "sched_burst_penalty_scale",
+		.data		= &sched_burst_penalty_scale,
+		.maxlen		= sizeof(uint),
+		.mode		= 0644,
+		.proc_handler = proc_douintvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &maxval_12_bits,
+	},
+	{
+		.procname	= "sched_burst_cache_lifetime",
+		.data		= &sched_burst_cache_lifetime,
+		.maxlen		= sizeof(uint),
+		.mode		= 0644,
+		.proc_handler = proc_douintvec,
+	},
+};
+
+static int __init sched_bore_sysctl_init(void) {
+	register_sysctl_init("kernel", sched_bore_sysctls);
+	return 0;
+}
+late_initcall(sched_bore_sysctl_init);
+
+#endif // CONFIG_SYSCTL
+#endif /* CONFIG_SCHED_BORE */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4b1953b6c..3d78a3c07 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -97,6 +97,10 @@
 #include "../../io_uring/io-wq.h"
 #include "../smpboot.h"
 
+#ifdef CONFIG_SCHED_BORE
+#include <linux/sched/bore.h>
+#endif /* CONFIG_SCHED_BORE */
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpu);
 EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpumask);
 
@@ -1370,7 +1374,11 @@ int tg_nop(struct task_group *tg, void *data)
 
 void set_load_weight(struct task_struct *p, bool update_load)
 {
+#ifdef CONFIG_SCHED_BORE
+	int prio = effective_prio_bore(p);
+#else /* !CONFIG_SCHED_BORE */
 	int prio = p->static_prio - MAX_RT_PRIO;
+#endif /* CONFIG_SCHED_BORE */
 	struct load_weight lw;
 
 	if (task_has_idle_policy(p)) {
@@ -8406,6 +8414,10 @@ void __init sched_init(void)
 	BUG_ON(!sched_class_above(&ext_sched_class, &idle_sched_class));
 #endif
 
+#ifdef CONFIG_SCHED_BORE
+	sched_init_bore();
+#endif /* CONFIG_SCHED_BORE */
+
 	wait_bit_init();
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 9815f9a0c..88bafe9f9 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -167,7 +167,53 @@ static const struct file_operations sched_feat_fops = {
 };
 
 #ifdef CONFIG_SMP
+#ifdef CONFIG_SCHED_BORE
+#define DEFINE_SYSCTL_SCHED_FUNC(name, update_func) \
+static ssize_t sched_##name##_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) \
+{ \
+	char buf[16]; \
+	unsigned int value; \
+\
+	if (cnt > 15) \
+		cnt = 15; \
+\
+	if (copy_from_user(&buf, ubuf, cnt)) \
+		return -EFAULT; \
+	buf[cnt] = '\0'; \
+\
+	if (kstrtouint(buf, 10, &value)) \
+		return -EINVAL; \
+\
+	sysctl_sched_##name = value; \
+	sched_update_##update_func(); \
+\
+	*ppos += cnt; \
+	return cnt; \
+} \
+\
+static int sched_##name##_show(struct seq_file *m, void *v) \
+{ \
+	seq_printf(m, "%d\n", sysctl_sched_##name); \
+	return 0; \
+} \
+\
+static int sched_##name##_open(struct inode *inode, struct file *filp) \
+{ \
+	return single_open(filp, sched_##name##_show, NULL); \
+} \
+\
+static const struct file_operations sched_##name##_fops = { \
+	.open		= sched_##name##_open, \
+	.write		= sched_##name##_write, \
+	.read		= seq_read, \
+	.llseek		= seq_lseek, \
+	.release	= single_release, \
+};
 
+DEFINE_SYSCTL_SCHED_FUNC(min_base_slice, min_base_slice)
+
+#undef DEFINE_SYSCTL_SCHED_FUNC
+#else /* !CONFIG_SCHED_BORE */
 static ssize_t sched_scaling_write(struct file *filp, const char __user *ubuf,
 				   size_t cnt, loff_t *ppos)
 {
@@ -213,7 +259,7 @@ static const struct file_operations sched_scaling_fops = {
 	.llseek		= seq_lseek,
 	.release	= single_release,
 };
-
+#endif /* CONFIG_SCHED_BORE */
 #endif /* SMP */
 
 #ifdef CONFIG_PREEMPT_DYNAMIC
@@ -504,13 +550,20 @@ static __init int sched_init_debug(void)
 	debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops);
 #endif
 
+#ifdef CONFIG_SCHED_BORE
+	debugfs_create_file("min_base_slice_ns", 0644, debugfs_sched, NULL, &sched_min_base_slice_fops);
+	debugfs_create_u32("base_slice_ns", 0444, debugfs_sched, &sysctl_sched_base_slice);
+#else /* !CONFIG_SCHED_BORE */
 	debugfs_create_u32("base_slice_ns", 0644, debugfs_sched, &sysctl_sched_base_slice);
+#endif /* CONFIG_SCHED_BORE */
 
 	debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms);
 	debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once);
 
 #ifdef CONFIG_SMP
+#if !defined(CONFIG_SCHED_BORE)
 	debugfs_create_file("tunable_scaling", 0644, debugfs_sched, NULL, &sched_scaling_fops);
+#endif /* CONFIG_SCHED_BORE */
 	debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, &sysctl_sched_migration_cost);
 	debugfs_create_u32("nr_migrate", 0644, debugfs_sched, &sysctl_sched_nr_migrate);
 
@@ -755,6 +808,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 		SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)),
 		SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime)));
 
+#ifdef CONFIG_SCHED_BORE
+	SEQ_printf(m, " %2d", p->bore.score);
+#endif /* CONFIG_SCHED_BORE */
 #ifdef CONFIG_NUMA_BALANCING
 	SEQ_printf(m, "   %d      %d", task_node(p), task_numa_group_id(p));
 #endif
@@ -1245,6 +1301,9 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
 
 	P(se.load.weight);
 #ifdef CONFIG_SMP
+#ifdef CONFIG_SCHED_BORE
+	P(bore.score);
+#endif /* CONFIG_SCHED_BORE */
 	P(se.avg.load_sum);
 	P(se.avg.runnable_sum);
 	P(se.avg.util_sum);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index af61769b1..a1e9ce4dc 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -55,6 +55,10 @@
 #include "stats.h"
 #include "autogroup.h"
 
+#ifdef CONFIG_SCHED_BORE
+#include <linux/sched/bore.h>
+#endif /* CONFIG_SCHED_BORE */
+
 /*
  * The initial- and re-scaling of tunables is configurable
  *
@@ -64,17 +68,30 @@
  *   SCHED_TUNABLESCALING_LOG - scaled logarithmically, *1+ilog(ncpus)
  *   SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus
  *
- * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
+ * BORE : default SCHED_TUNABLESCALING_NONE = *1 constant
+ * EEVDF: default SCHED_TUNABLESCALING_LOG  = *(1+ilog(ncpus))
  */
+#ifdef CONFIG_SCHED_BORE
+unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
+#else /* !CONFIG_SCHED_BORE */
 unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG;
+#endif /* CONFIG_SCHED_BORE */
 
 /*
  * Minimal preemption granularity for CPU-bound tasks:
  *
- * (default: 0.70 msec * (1 + ilog(ncpus)), units: nanoseconds)
+ * BORE : base_slice = minimum multiple of nsecs_per_tick >= min_base_slice
+ * (default min_base_slice = 2000000 constant, units: nanoseconds)
+ * EEVDF: default 0.70 msec * (1 + ilog(ncpus)), units: nanoseconds
  */
+#ifdef CONFIG_SCHED_BORE
+static const unsigned int nsecs_per_tick       = 1000000000ULL / HZ;
+unsigned int sysctl_sched_min_base_slice       = CONFIG_MIN_BASE_SLICE_NS;
+__read_mostly uint sysctl_sched_base_slice     = nsecs_per_tick;
+#else /* !CONFIG_SCHED_BORE */
 unsigned int sysctl_sched_base_slice			= 700000ULL;
 static unsigned int normalized_sysctl_sched_base_slice	= 700000ULL;
+#endif /* CONFIG_SCHED_BORE */
 
 const_debug unsigned int sysctl_sched_migration_cost	= 500000UL;
 
@@ -188,6 +205,13 @@ static inline void update_load_set(struct load_weight *lw, unsigned long w)
  *
  * This idea comes from the SD scheduler of Con Kolivas:
  */
+#ifdef CONFIG_SCHED_BORE
+static void update_sysctl(void) {
+	sysctl_sched_base_slice = nsecs_per_tick *
+		max(1UL, DIV_ROUND_UP(sysctl_sched_min_base_slice, nsecs_per_tick));
+}
+void sched_update_min_base_slice(void) { update_sysctl(); }
+#else /* !CONFIG_SCHED_BORE */
 static unsigned int get_update_sysctl_factor(void)
 {
 	unsigned int cpus = min_t(unsigned int, num_online_cpus(), 8);
@@ -218,6 +242,7 @@ static void update_sysctl(void)
 	SET_SYSCTL(sched_base_slice);
 #undef SET_SYSCTL
 }
+#endif /* CONFIG_SCHED_BORE */
 
 void __init sched_init_granularity(void)
 {
@@ -695,6 +720,9 @@ static s64 entity_lag(u64 avruntime, struct sched_entity *se)
 
 	vlag = avruntime - se->vruntime;
 	limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
+#ifdef CONFIG_SCHED_BORE
+	limit >>= !!sched_bore;
+#endif /* CONFIG_SCHED_BORE */
 
 	return clamp(vlag, -limit, limit);
 }
@@ -941,7 +969,16 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
 	if (curr && (!curr->on_rq || !entity_eligible(cfs_rq, curr)))
 		curr = NULL;
 
+#if !defined(CONFIG_SCHED_BORE)
 	if (sched_feat(RUN_TO_PARITY) && curr && protect_slice(curr))
+#else /* CONFIG_SCHED_BORE */
+	bool run_to_parity = likely(sched_bore) ?
+		sched_feat(RUN_TO_PARITY_BORE) : sched_feat(RUN_TO_PARITY);
+	if (run_to_parity && curr && protect_slice(curr) &&
+		(!entity_is_task(curr) ||
+		 !task_of(curr)->bore.futex_waiting ||
+		 unlikely(!sched_bore)))
+#endif /* CONFIG_SCHED_BORE */
 		return curr;
 
 	/* Pick the leftmost entity if it's eligible */
@@ -1000,6 +1037,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
  * Scheduling class statistics methods:
  */
 #ifdef CONFIG_SMP
+#if !defined(CONFIG_SCHED_BORE)
 int sched_update_scaling(void)
 {
 	unsigned int factor = get_update_sysctl_factor();
@@ -1011,6 +1049,7 @@ int sched_update_scaling(void)
 
 	return 0;
 }
+#endif /* CONFIG_SCHED_BORE */
 #endif
 #endif
 
@@ -1248,6 +1287,9 @@ static void update_curr(struct cfs_rq *cfs_rq)
 	if (entity_is_task(curr)) {
 		struct task_struct *p = task_of(curr);
 
+#ifdef CONFIG_SCHED_BORE
+		update_curr_bore(p, delta_exec);
+#endif /* CONFIG_SCHED_BORE */
 		update_curr_task(p, delta_exec);
 
 		/*
@@ -3896,7 +3938,7 @@ static void reweight_eevdf(struct sched_entity *se, u64 avruntime,
 	se->deadline = avruntime + vslice;
 }
 
-static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
+void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 			    unsigned long weight)
 {
 	bool curr = cfs_rq->curr == se;
@@ -5296,12 +5338,11 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {}
 static void
 place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
-	u64 vslice, vruntime = avg_vruntime(cfs_rq);
+	u64 vslice = 0, vruntime = avg_vruntime(cfs_rq);
 	s64 lag = 0;
 
 	if (!se->custom_slice)
 		se->slice = sysctl_sched_base_slice;
-	vslice = calc_delta_fair(se->slice, se);
 
 	/*
 	 * Due to how V is constructed as the weighted average of entities,
@@ -5386,7 +5427,18 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 		se->rel_deadline = 0;
 		return;
 	}
-
+#ifdef CONFIG_SCHED_BORE
+	if (entity_is_task(se) &&
+			likely(sched_bore) &&
+			task_of(se)->bore.futex_waiting)
+		goto vslice_found;
+#endif /* !CONFIG_SCHED_BORE */
+	vslice = calc_delta_fair(se->slice, se);
+#ifdef CONFIG_SCHED_BORE
+	if (likely(sched_bore))
+		vslice >>= !!(flags & (ENQUEUE_INITIAL | ENQUEUE_WAKEUP));
+	else
+#endif /* CONFIG_SCHED_BORE */
 	/*
 	 * When joining the competition; the existing tasks will be,
 	 * on average, halfway through their slice, as such start tasks
@@ -5395,6 +5447,9 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	if (sched_feat(PLACE_DEADLINE_INITIAL) && (flags & ENQUEUE_INITIAL))
 		vslice /= 2;
 
+#ifdef CONFIG_SCHED_BORE
+vslice_found:
+#endif /* CONFIG_SCHED_BORE */
 	/*
 	 * EEVDF: vd_i = ve_i + r_i/w_i
 	 */
@@ -5407,7 +5462,7 @@ static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq);
 static inline bool cfs_bandwidth_used(void);
 
 static void
-requeue_delayed_entity(struct sched_entity *se);
+requeue_delayed_entity(struct sched_entity *se, int flags);
 
 static void
 enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
@@ -5572,6 +5627,10 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 		if (sched_feat(DELAY_DEQUEUE) && delay &&
 		    !entity_eligible(cfs_rq, se)) {
 			update_load_avg(cfs_rq, se, 0);
+#ifdef CONFIG_SCHED_BORE
+			if (sched_feat(DELAY_ZERO) && likely(sched_bore))
+				update_entity_lag(cfs_rq, se);
+#endif /* CONFIG_SCHED_BORE */
 			set_delayed(se);
 			return false;
 		}
@@ -6998,7 +7057,7 @@ static int sched_idle_cpu(int cpu)
 #endif
 
 static void
-requeue_delayed_entity(struct sched_entity *se)
+requeue_delayed_entity(struct sched_entity *se, int flags)
 {
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
 
@@ -7011,13 +7070,22 @@ requeue_delayed_entity(struct sched_entity *se)
 	SCHED_WARN_ON(!se->on_rq);
 
 	if (sched_feat(DELAY_ZERO)) {
+#ifdef CONFIG_SCHED_BORE
+		if (likely(sched_bore))
+			flags |= ENQUEUE_WAKEUP;
+		else {
+#endif /* CONFIG_SCHED_BORE */
+		flags = 0;
 		update_entity_lag(cfs_rq, se);
+#ifdef CONFIG_SCHED_BORE
+		}
+#endif /* CONFIG_SCHED_BORE */
 		if (se->vlag > 0) {
 			cfs_rq->nr_running--;
 			if (se != cfs_rq->curr)
 				__dequeue_entity(cfs_rq, se);
 			se->vlag = 0;
-			place_entity(cfs_rq, se, 0);
+			place_entity(cfs_rq, se, flags);
 			if (se != cfs_rq->curr)
 				__enqueue_entity(cfs_rq, se);
 			cfs_rq->nr_running++;
@@ -7054,7 +7122,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 		util_est_enqueue(&rq->cfs, p);
 
 	if (flags & ENQUEUE_DELAYED) {
-		requeue_delayed_entity(se);
+		requeue_delayed_entity(se, flags);
 		return;
 	}
 
@@ -7072,7 +7140,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	for_each_sched_entity(se) {
 		if (se->on_rq) {
 			if (se->sched_delayed)
-				requeue_delayed_entity(se);
+				requeue_delayed_entity(se, flags);
 			break;
 		}
 		cfs_rq = cfs_rq_of(se);
@@ -7302,6 +7370,15 @@ static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 		util_est_dequeue(&rq->cfs, p);
 
 	util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP);
+#ifdef CONFIG_SCHED_BORE
+	struct cfs_rq *cfs_rq = &rq->cfs;
+	struct sched_entity *se = &p->se;
+	if ((flags & DEQUEUE_SLEEP) && entity_is_task(se)) {
+		if (cfs_rq->curr == se)
+			update_curr(cfs_rq_of(&p->se));
+		restart_burst_bore(p);
+	}
+#endif /* CONFIG_SCHED_BORE */
 	if (dequeue_entities(rq, &p->se, flags) < 0)
 		return false;
 
@@ -9131,16 +9208,25 @@ static void yield_task_fair(struct rq *rq)
 	/*
 	 * Are we the only task in the tree?
 	 */
+#if !defined(CONFIG_SCHED_BORE)
 	if (unlikely(rq->nr_running == 1))
 		return;
 
 	clear_buddies(cfs_rq, se);
+#endif /* CONFIG_SCHED_BORE */
 
 	update_rq_clock(rq);
 	/*
 	 * Update run-time statistics of the 'current'.
 	 */
 	update_curr(cfs_rq);
+#ifdef CONFIG_SCHED_BORE
+	restart_burst_rescale_deadline_bore(curr);
+	if (unlikely(rq->nr_running == 1))
+		return;
+
+	clear_buddies(cfs_rq, se);
+#endif /* CONFIG_SCHED_BORE */
 	/*
 	 * Tell update_rq_clock() that we've just updated,
 	 * so we don't do microscopic update in schedule()
@@ -13332,6 +13418,9 @@ static void switched_to_fair(struct rq *rq, struct task_struct *p)
 	SCHED_WARN_ON(p->se.sched_delayed);
 
 	attach_task_cfs_rq(p);
+#ifdef CONFIG_SCHED_BORE
+	reset_task_bore(p);
+#endif /* CONFIG_SCHED_BORE */
 
 	set_task_max_allowed_capacity(p);
 
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 050d75030..b81b5a147 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -18,6 +18,9 @@ SCHED_FEAT(PLACE_REL_DEADLINE, true)
  * 0-lag point or until is has exhausted it's slice.
  */
 SCHED_FEAT(RUN_TO_PARITY, true)
+#ifdef CONFIG_SCHED_BORE
+SCHED_FEAT(RUN_TO_PARITY_BORE, false)
+#endif /* CONFIG_SCHED_BORE */
 /*
  * Allow wakeup of tasks with a shorter slice to cancel RESPECT_SLICE for
  * current.
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index a441990fe..54138cc91 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2085,7 +2085,11 @@ static inline void update_sched_domain_debugfs(void) { }
 static inline void dirty_sched_domain_sysctl(int cpu) { }
 #endif
 
+#ifdef CONFIG_SCHED_BORE
+extern void sched_update_min_base_slice(void);
+#else /* !CONFIG_SCHED_BORE */
 extern int sched_update_scaling(void);
+#endif /* CONFIG_SCHED_BORE */
 
 static inline const struct cpumask *task_user_cpus(struct task_struct *p)
 {
@@ -2807,7 +2811,12 @@ extern void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags);
 extern const_debug unsigned int sysctl_sched_nr_migrate;
 extern const_debug unsigned int sysctl_sched_migration_cost;
 
+#ifdef CONFIG_SCHED_BORE
+extern unsigned int sysctl_sched_min_base_slice;
+extern __read_mostly uint sysctl_sched_base_slice;
+#else /* !CONFIG_SCHED_BORE */
 extern unsigned int sysctl_sched_base_slice;
+#endif /* CONFIG_SCHED_BORE */
 
 #ifdef CONFIG_SCHED_DEBUG
 extern int sysctl_resched_latency_warn_ms;
-- 
2.51.0

