SlideShare ist ein Scribd-Unternehmen logo
1 von 32
Linux Kernel在多核机器上的负载均衡机制 董昊 (三百) 源代码以 2.6.18.8 为准 1
现在的服务器CPU架构 2 每个CPU含多个核 每个核有自己的一级cache 同一CPU内的多个核共享同一个二级cache (下图是最常见的体系结构,但不代表所有的CPU)
Kernel的调度数据结构 每个CPU(核)上有一个运行队列 (structrq) 每个运行队列上有两个优先级队列(structprio_array) active expire(已经用完时间片的进程) 每个优先级队列里有分级队列,分别挂载不同优先级的进程 3
structrq 4
prio_array 5 内核从active队列里按优先级挑进程出来运行,如果进程用完了时间片,就将其放入expire队列 当 active 队列的进程都用完了时间片,则把指向active和expire的指针对换,开始新一轮的优先级调度
任务负载均衡 问题:把任务(进程)分配到多个core上去,保证各个core的负载均衡,且要考虑有些core之间共享cache而有些core之间没有关系。 所有负载均衡需要解决的问题: 怎样衡量负载? 什么时间检查负载是否均衡? 怎么调整为均衡? 6
先想些笨办法 怎样衡量负载? 在运行队列上的running的进程数 什么时候检查任务是否均衡? 每1秒检查一次 怎么调整为均衡? 让各个核上的任务数相等——把任务数最多的核上的任务挪一个(或几个)到任务数最少的核上 7
开始细化这些笨办法 怎样衡量负载? 用进程数衡量CPU负载的缺点——进程的优先级不一样 会造成有的核跑着10个低优先级的任务,而另一个核上跑着10个高优先级的任务,任务响应偏慢 可以用核上正在跑的进程的优先级来衡量负载。高优先级的我们当他负载高,低优先级的我们当他负载低 8
Kernel的做法 进程本身有“静态优先级”(current->static_prio), 值范围是100~139 进程的static_prio在运行时不变 用nice系统调用可以更改 进程的负载计算方法 如果static_prio小于120(高优先级) p->load_weight = (140-p->static_prio) * 128 / 5 如果static_prio等于或大于120(低优先级) p->load_weight = (140-p->static_prio) * 128 / 20
各种静态优先级进程的负载 10 可以看出,低优先级进程的负载衰减的很厉害
Kernel的做法 核(CPU)的负载:正在其上运行的进程的load_weight相加 rq->raw_weighted_load 问:为什么用“静态优先级”来衡量进程的负载,而不是动态优先级? 动态优先级包含了进城运行时的特性,比如sleep_avg(平均睡眠时间) 而衡量负载主要考虑:进程一旦开始使用CPU会造成什么影响。进程之前睡得多少并不重要。 11
继续细化这些笨办法 什么时候检查任务是否均衡? 每秒调整一次负载慢吗? 可以在CPU负载变化的时候调整 进程睡眠或醒来时是负载变化的时候 12
Kernel的做法 进程睡去…… 13   3294 asmlinkage void __sched schedule(void)   3295 { …   3354     switch_count = &prev->nivcsw;   3355     if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {   3356         switch_count = &prev->nvcsw;   3357         if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&   3358                 unlikely(signal_pending(prev))))   3359             prev->state = TASK_RUNNING;   3360         else {   3361             if (prev->state == TASK_UNINTERRUPTIBLE)   3362                 rq->nr_uninterruptible++;   3363             deactivate_task(prev, rq); //将进程拿出rq,进程负载也会被减掉   3364         }   3365     } …
schedule() 14 … schedule()函数继续   3367     cpu = smp_processor_id();   3368     if (unlikely(!rq->nr_running)) { 3369         idle_balance(cpu, rq);   3370         if (!rq->nr_running) {   3371             next = rq->idle;   3372             rq->expired_timestamp = 0;   3373             wake_sleeping_dependent(cpu);   3374             gotoswitch_tasks;   3375         }   3376     } … 这种情况仅发生在rq上没有其它进程的时候
idle_balance() 15 idle_balance()函数   2733 static void idle_balance(intthis_cpu, structrq *this_rq)   2734 {   2735     structsched_domain *sd;   2736   2737     for_each_domain(this_cpu, sd) {   2738         if (sd->flags & SD_BALANCE_NEWIDLE) {   2739             /* If we've pulled tasks over stop searching: */   2740             if (load_balance_newidle(this_cpu, this_rq, sd))   2741                 break;   2742         }   2743     }   2744 }
sched_domain与sched_group 16 sched_domain和sched_group的知识可参考(本图来源):http://www.ibm.com/developerworks/cn/linux/l-cn-schldom/index.html
8核机器 17 从子sched_domain到父shced_domain  287 #define for_each_domain(cpu, __sd) br /> 288     for (__sd = rcu_dereference(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)
8核机器(父调度域) 18
通用的调度特性 19   119 #define SD_CPU_INIT (structsched_domain) {     br />  120     .span           = CPU_MASK_NONE,    br />  121     .parent         = NULL,         br />  122     .groups         = NULL,         br />  123     .min_interval       = 1,            br />  124     .max_interval       = 4,            br />  125     .busy_factor        = 64,           br />  126     .imbalance_pct      = 125,          br />  127     .cache_nice_tries   = 1,            br />  128     .per_cpu_gain       = 100,          br />  129     .busy_idx       = 2,            br />  130     .idle_idx       = 1,            br />  131     .newidle_idx        = 2,            br />  132     .wake_idx       = 1,            br />  133     .forkexec_idx       = 1,            br />  134     .flags          = SD_LOAD_BALANCE   br />  135                 | SD_BALANCE_NEWIDLE    br />  136                 | SD_BALANCE_EXEC   br />  137                 | SD_WAKE_AFFINE    br />  138                 | BALANCE_FOR_POWER,    br />  139     .last_balance       = jiffies,      br />  140     .balance_interval   = 1,            br />  141     .nr_balance_failed  = 0,            br />  142 } 在我们常用的Intel架构上,可以认为: SD_NODE_INIT用来初始化每个CPU SD_CPU_INIT用来初始化CPU上的每个核 正因为同一CPU上的核共享L2 cache,所以SD_CPU_INIT有SD_WAKE_AFFINE(亲和)选项
不同体系结构的不同调度特性 20 include/asm-x86_64/topolopy.h    31 #define SD_NODE_INIT (structsched_domain) {        br />   32     .span           = CPU_MASK_NONE,    br />   33     .parent         = NULL,         br />   34     .groups         = NULL,         br />   35     .min_interval       = 8,            br />   36     .max_interval       = 32,           br />   37     .busy_factor        = 32,           br />   38     .imbalance_pct      = 125,          br />   39     .cache_nice_tries   = 2,            br />   40     .busy_idx       = 3,            br />   41     .idle_idx       = 2,            br />   42     .newidle_idx        = 0,            br />   43     .wake_idx       = 1,            br />   44     .forkexec_idx       = 1,            br />   45     .per_cpu_gain       = 100,          br />   46     .flags          = SD_LOAD_BALANCE   br />   47                 | SD_BALANCE_FORK   br />   48                 | SD_BALANCE_EXEC   br />   49                 | SD_WAKE_BALANCE,  br />   50     .last_balance       = jiffies,      br />   51     .balance_interval   = 1,            br />   52     .nr_balance_failed  = 0,            br />   53 } include/asm-powerpc/topolopy.h    43 #define SD_NODE_INIT (structsched_domain) {        br />   44     .span           = CPU_MASK_NONE,    br />   45     .parent         = NULL,         br />   46     .groups         = NULL,         br />   47     .min_interval       = 8,            br />   48     .max_interval       = 32,           br />   49     .busy_factor        = 32,           br />   50     .imbalance_pct      = 125,          br />   51     .cache_nice_tries   = 1,            br />   52     .per_cpu_gain       = 100,          br />   53     .busy_idx       = 3,            br />   54     .idle_idx       = 1,            br />   55     .newidle_idx        = 2,            br />   56     .wake_idx       = 1,            br />   57     .flags          = SD_LOAD_BALANCE   br />   58                 | SD_BALANCE_EXEC   br />   59                 | SD_BALANCE_NEWIDLE    br />   60                 | SD_WAKE_IDLE      br />   61                 | SD_WAKE_BALANCE,  br />   62     .last_balance       = jiffies,      br />   63     .balance_interval   = 1,            br />   64     .nr_balance_failed  = 0,            br />   65 }
schedule() 21   3378     array = rq->active;   3379     if (unlikely(!array->nr_active)) {   3380         /*   3381          * Switch the active and expired arrays.   3382          */   3383         schedstat_inc(rq, sched_switch);   3384         rq->active = rq->expired;   3385         rq->expired = array;   3386         array = rq->active;   3387         rq->expired_timestamp = 0;   3388         rq->best_expired_prio = MAX_PRIO;   3389     } 这里可以看到active和expire优先级队列互换的操作
Kernel的做法 22 进程醒来…… try_to_wake_up
try_to_wake_up()   1422         intidx = this_sd->wake_idx;   1423         unsigned int imbalance;   1424   1425         imbalance = 100 + (this_sd->imbalance_pct - 100) / 2;   1426   1427         load = source_load(cpu, idx);   1428         this_load = target_load(this_cpu, idx);   1429   1430         new_cpu = this_cpu; /* Wake to this CPU if we can */   1431   1432         if (this_sd->flags & SD_WAKE_AFFINE) { 	//对于一个CPU内的两个核   1433             unsigned long tl = this_load;   1434             unsigned long tl_per_task = cpu_avg_load_per_task(this_cpu);   1435 ……   1441             if (sync)   1442                 tl -= current->load_weight;   1443   1444             if ((tl <= load &&   1445                 tl + target_load(cpu, idx) <= tl_per_task) ||   1446                 100*(tl + p->load_weight) <= imbalance*load) {	//如果是同一CPU里的两个核,只要我这个核的负载不大,就把本来该另一个核跑的进程揽过来,AFFINE,亲兄弟嘛    1447                 /*   1448                  * This domain has SD_WAKE_AFFINE and   1449                  * p is cache cold in this domain, and   1450                  * there is no bad imbalance.   1451                  */   1452                 schedstat_inc(this_sd, ttwu_move_affine);   1453                 gotoout_set_cpu;   1454             }   1455         }
try_to_wake_up()   1461         if (this_sd->flags & SD_WAKE_BALANCE) {    1462             if (imbalance*this_load <= 100*load) {	//如果是两个不同CPU上的核,则只有在我这个核的负载很小时,才揽进程   1463                 schedstat_inc(this_sd, ttwu_move_balance);   1464                 gotoout_set_cpu;   1465             }   1466         }   1467     }   1468   1469     new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */   1470 out_set_cpu:   1471     new_cpu = wake_idle(new_cpu, p);		//有没有空闲的核?   1472     if (new_cpu != cpu) {   1473         set_task_cpu(p, new_cpu);   1474         task_rq_unlock(rq, &flags);   1475         /* might preempt at this point */   1476         rq = task_rq_lock(p, &flags);   1477         old_state = p->state;   1478         if (!(old_state & state))   1479             goto out;   1480         if (p->array)   1481             gotoout_running;   1482   1483         this_cpu = smp_processor_id();   1484         cpu = task_cpu(p);   1485     }
Kernel的做法 只是进程睡去和醒来的时候负载均衡就够了吗? 如果某个进程调用nice修改了static_prio 每个核每10ms调用一次scheduler_tick scheduler_tick调用rebalance_tick rebalance_tick根据核所在的sched_domain的特性来决定多久调一次load_balance 根据本CPU是否繁忙 25
每个core都有一个APIC 26 每个核都有自己的APIC,有自己的时钟中断
rebalance_tick() 27   2811 static void   2812 rebalance_tick(intthis_cpu, structrq *this_rq, enumidle_type idle)   2813 {   2814     unsigned long this_load, interval, j = cpu_offset(this_cpu);   2815     structsched_domain *sd;   2816     inti, scale;   2817   2818     this_load = this_rq->raw_weighted_load;   2819   2820     /* Update our load: */   2821     for (i = 0, scale = 1; i < 3; i++, scale <<= 1) {   2822         unsigned long old_load, new_load;   2823   2824         old_load = this_rq->cpu_load[i];   2825         new_load = this_load;   2826         /*   2827          * Round up the averaging division if load is increasing. This   2828          * prevents us from getting stuck on 9 if the load is 10, for   2829          * example.   2830          */   2831         if (new_load > old_load)   2832             new_load += scale-1;   2833         this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) / scale;   2834     }
rebalance_tick() 28   2836     for_each_domain(this_cpu, sd) {   2837         if (!(sd->flags & SD_LOAD_BALANCE))   2838             continue;   2839   2840         interval = sd->balance_interval;   2841         if (idle != SCHED_IDLE)   2842             interval *= sd->busy_factor;   2843   2844         /* scale ms to jiffies */   2845         interval = msecs_to_jiffies(interval);   2846         if (unlikely(!interval))   2847             interval = 1;   2848   2849         if (j - sd->last_balance >= interval) {   2850             if (load_balance(this_cpu, this_rq, sd, idle)) {   2851                 /*   2852                  * We've pulled tasks over so either we're no   2853                  * longer idle, or one of our SMT siblings is   2854                  * not idle.   2855                  */   2856                 idle = NOT_IDLE;   2857             }   2858             sd->last_balance += interval;   2859         }   2860     }   2861 }
load_balance() 29   2527 static intload_balance(intthis_cpu, structrq *this_rq,   2528             structsched_domain *sd, enumidle_type idle)   2529 { ……   2542 redo:   2543     group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle,   2544                             &cpus);   2545     if (!group) {   2546         schedstat_inc(sd, lb_nobusyg[idle]);   2547         gotoout_balanced;   2548     }   2549   2550     busiest = find_busiest_queue(group, idle, imbalance, &cpus);   2551     if (!busiest) {   2552         schedstat_inc(sd, lb_nobusyq[idle]);   2553         gotoout_balanced;   2554     }   2555   2556     BUG_ON(busiest == this_rq);   2557   2558     schedstat_add(sd, lb_imbalance[idle], imbalance);   2559   2560     nr_moved = 0;   2561     if (busiest->nr_running > 1) { ……   2568         double_rq_lock(this_rq, busiest);   2569         nr_moved = move_tasks(this_rq, this_cpu, busiest,   2570                       minus_1_or_zero(busiest->nr_running),   2571                       imbalance, sd, idle, &all_pinned);   2572         double_rq_unlock(this_rq, busiest);
load_balance()的考量 find_busiest_group从本调度域找出负载最高的sched_group,find_busiest_queue从sched_group中找出最繁忙的rq 可能负载已经均衡,找不出,则返回NULL 要考虑imbalance_pct move_task把任务从最繁忙的rq里挪到本CPU来 Linux的负载均衡是“拉模式”:我(本core)定时从别处最繁忙的地方拉任务过来 一边预计算负载一边挪动,如果负载已经均衡,就不要再挪了 30
Kernel的任务负载均衡 31 怎样衡量负载? 运行队列上的running进程的load_weight之和 什么时间检查负载是否均衡? 进程睡去、醒来 每个核每10ms 怎么调整为均衡? 根据imbalance程度从最忙的队列里抽出running进程放入其他核
参考资料 http://www.ibm.com/developerworks/cn/linux/l-cn-schldom/index.html http://book.opensourceproject.org.cn/kernel/kernel3rd/opensource/0596005652/understandlk-chp-7-sect-5.html Q&A 32

Weitere ähnliche Inhalte

Ähnlich wie Linux内核多核负载均衡机制

망고100 보드로 놀아보자 15
망고100 보드로 놀아보자 15망고100 보드로 놀아보자 15
망고100 보드로 놀아보자 15종인 전
 
Show innodb status
Show innodb statusShow innodb status
Show innodb statusjustlooks
 
Exploiting the Linux Kernel via Intel's SYSRET Implementation
Exploiting the Linux Kernel via Intel's SYSRET ImplementationExploiting the Linux Kernel via Intel's SYSRET Implementation
Exploiting the Linux Kernel via Intel's SYSRET Implementationnkslides
 
Wait queue
Wait queueWait queue
Wait queueRoy Lee
 
Using ngx_lua in UPYUN
Using ngx_lua in UPYUNUsing ngx_lua in UPYUN
Using ngx_lua in UPYUNCong Zhang
 
Varnish presentation for the Symfony Zaragoza user group
Varnish presentation for the Symfony Zaragoza user groupVarnish presentation for the Symfony Zaragoza user group
Varnish presentation for the Symfony Zaragoza user groupJorge Nerín
 
BKK16-317 How to generate power models for EAS and IPA
BKK16-317 How to generate power models for EAS and IPABKK16-317 How to generate power models for EAS and IPA
BKK16-317 How to generate power models for EAS and IPALinaro
 
BKK16-TR08 How to generate power models for EAS and IPA
BKK16-TR08 How to generate power models for EAS and IPABKK16-TR08 How to generate power models for EAS and IPA
BKK16-TR08 How to generate power models for EAS and IPALinaro
 
OSTEP Chapter2 Introduction
OSTEP Chapter2 IntroductionOSTEP Chapter2 Introduction
OSTEP Chapter2 IntroductionShuya Osaki
 
-----------------------------------------------------CPU.java------.pdf
 -----------------------------------------------------CPU.java------.pdf -----------------------------------------------------CPU.java------.pdf
-----------------------------------------------------CPU.java------.pdfannikasarees
 
New features in Performance Schema 5.7 in action
New features in Performance Schema 5.7 in actionNew features in Performance Schema 5.7 in action
New features in Performance Schema 5.7 in actionSveta Smirnova
 
TMPA-2017: Predicate Abstraction Based Configurable Method for Data Race Dete...
TMPA-2017: Predicate Abstraction Based Configurable Method for Data Race Dete...TMPA-2017: Predicate Abstraction Based Configurable Method for Data Race Dete...
TMPA-2017: Predicate Abstraction Based Configurable Method for Data Race Dete...Iosif Itkin
 
TLPI - 6 Process
TLPI - 6 ProcessTLPI - 6 Process
TLPI - 6 ProcessShu-Yu Fu
 
Drizzles Approach To Improving Performance Of The Server
Drizzles  Approach To  Improving  Performance Of The  ServerDrizzles  Approach To  Improving  Performance Of The  Server
Drizzles Approach To Improving Performance Of The ServerPerconaPerformance
 
Linux Ethernet device driver
Linux Ethernet device driverLinux Ethernet device driver
Linux Ethernet device driver艾鍗科技
 
Bare metal performance in Elixir
Bare metal performance in ElixirBare metal performance in Elixir
Bare metal performance in ElixirAaron Seigo
 
Analyzing the Performance Effects of Meltdown + Spectre on Apache Spark Workl...
Analyzing the Performance Effects of Meltdown + Spectre on Apache Spark Workl...Analyzing the Performance Effects of Meltdown + Spectre on Apache Spark Workl...
Analyzing the Performance Effects of Meltdown + Spectre on Apache Spark Workl...Databricks
 
Open daylight and Openstack
Open daylight and OpenstackOpen daylight and Openstack
Open daylight and OpenstackDave Neary
 
Using ngx_lua in UPYUN 2
Using ngx_lua in UPYUN 2Using ngx_lua in UPYUN 2
Using ngx_lua in UPYUN 2Cong Zhang
 

Ähnlich wie Linux内核多核负载均衡机制 (20)

망고100 보드로 놀아보자 15
망고100 보드로 놀아보자 15망고100 보드로 놀아보자 15
망고100 보드로 놀아보자 15
 
Show innodb status
Show innodb statusShow innodb status
Show innodb status
 
Exploiting the Linux Kernel via Intel's SYSRET Implementation
Exploiting the Linux Kernel via Intel's SYSRET ImplementationExploiting the Linux Kernel via Intel's SYSRET Implementation
Exploiting the Linux Kernel via Intel's SYSRET Implementation
 
Wait queue
Wait queueWait queue
Wait queue
 
Using ngx_lua in UPYUN
Using ngx_lua in UPYUNUsing ngx_lua in UPYUN
Using ngx_lua in UPYUN
 
Varnish presentation for the Symfony Zaragoza user group
Varnish presentation for the Symfony Zaragoza user groupVarnish presentation for the Symfony Zaragoza user group
Varnish presentation for the Symfony Zaragoza user group
 
BKK16-317 How to generate power models for EAS and IPA
BKK16-317 How to generate power models for EAS and IPABKK16-317 How to generate power models for EAS and IPA
BKK16-317 How to generate power models for EAS and IPA
 
BKK16-TR08 How to generate power models for EAS and IPA
BKK16-TR08 How to generate power models for EAS and IPABKK16-TR08 How to generate power models for EAS and IPA
BKK16-TR08 How to generate power models for EAS and IPA
 
OSTEP Chapter2 Introduction
OSTEP Chapter2 IntroductionOSTEP Chapter2 Introduction
OSTEP Chapter2 Introduction
 
-----------------------------------------------------CPU.java------.pdf
 -----------------------------------------------------CPU.java------.pdf -----------------------------------------------------CPU.java------.pdf
-----------------------------------------------------CPU.java------.pdf
 
New features in Performance Schema 5.7 in action
New features in Performance Schema 5.7 in actionNew features in Performance Schema 5.7 in action
New features in Performance Schema 5.7 in action
 
TMPA-2017: Predicate Abstraction Based Configurable Method for Data Race Dete...
TMPA-2017: Predicate Abstraction Based Configurable Method for Data Race Dete...TMPA-2017: Predicate Abstraction Based Configurable Method for Data Race Dete...
TMPA-2017: Predicate Abstraction Based Configurable Method for Data Race Dete...
 
TLPI - 6 Process
TLPI - 6 ProcessTLPI - 6 Process
TLPI - 6 Process
 
Drizzles Approach To Improving Performance Of The Server
Drizzles  Approach To  Improving  Performance Of The  ServerDrizzles  Approach To  Improving  Performance Of The  Server
Drizzles Approach To Improving Performance Of The Server
 
Linux Ethernet device driver
Linux Ethernet device driverLinux Ethernet device driver
Linux Ethernet device driver
 
Bare metal performance in Elixir
Bare metal performance in ElixirBare metal performance in Elixir
Bare metal performance in Elixir
 
Analyzing the Performance Effects of Meltdown + Spectre on Apache Spark Workl...
Analyzing the Performance Effects of Meltdown + Spectre on Apache Spark Workl...Analyzing the Performance Effects of Meltdown + Spectre on Apache Spark Workl...
Analyzing the Performance Effects of Meltdown + Spectre on Apache Spark Workl...
 
Open daylight and Openstack
Open daylight and OpenstackOpen daylight and Openstack
Open daylight and Openstack
 
Using ngx_lua in UPYUN 2
Using ngx_lua in UPYUN 2Using ngx_lua in UPYUN 2
Using ngx_lua in UPYUN 2
 
Nubilus Perl
Nubilus PerlNubilus Perl
Nubilus Perl
 

Mehr von Hao(Robin) Dong

Mehr von Hao(Robin) Dong (9)

Transformer and BERT
Transformer and BERTTransformer and BERT
Transformer and BERT
 
Google TPU
Google TPUGoogle TPU
Google TPU
 
flashcache原理及改造
flashcache原理及改造flashcache原理及改造
flashcache原理及改造
 
ext2-110628041727-phpapp02
ext2-110628041727-phpapp02ext2-110628041727-phpapp02
ext2-110628041727-phpapp02
 
Ext4 Bigalloc report public
Ext4 Bigalloc report publicExt4 Bigalloc report public
Ext4 Bigalloc report public
 
Overlayfs and VFS
Overlayfs and VFSOverlayfs and VFS
Overlayfs and VFS
 
Ext4 new feature - bigalloc
Ext4 new feature - bigallocExt4 new feature - bigalloc
Ext4 new feature - bigalloc
 
why we need ext4
why we need ext4why we need ext4
why we need ext4
 
Linux下Poll和Epoll内核源码剖析
Linux下Poll和Epoll内核源码剖析Linux下Poll和Epoll内核源码剖析
Linux下Poll和Epoll内核源码剖析
 

Kürzlich hochgeladen

Developing An App To Navigate The Roads of Brazil
Developing An App To Navigate The Roads of BrazilDeveloping An App To Navigate The Roads of Brazil
Developing An App To Navigate The Roads of BrazilV3cube
 
The Codex of Business Writing Software for Real-World Solutions 2.pptx
The Codex of Business Writing Software for Real-World Solutions 2.pptxThe Codex of Business Writing Software for Real-World Solutions 2.pptx
The Codex of Business Writing Software for Real-World Solutions 2.pptxMalak Abu Hammad
 
Tata AIG General Insurance Company - Insurer Innovation Award 2024
Tata AIG General Insurance Company - Insurer Innovation Award 2024Tata AIG General Insurance Company - Insurer Innovation Award 2024
Tata AIG General Insurance Company - Insurer Innovation Award 2024The Digital Insurer
 
04-2024-HHUG-Sales-and-Marketing-Alignment.pptx
04-2024-HHUG-Sales-and-Marketing-Alignment.pptx04-2024-HHUG-Sales-and-Marketing-Alignment.pptx
04-2024-HHUG-Sales-and-Marketing-Alignment.pptxHampshireHUG
 
Axa Assurance Maroc - Insurer Innovation Award 2024
Axa Assurance Maroc - Insurer Innovation Award 2024Axa Assurance Maroc - Insurer Innovation Award 2024
Axa Assurance Maroc - Insurer Innovation Award 2024The Digital Insurer
 
A Domino Admins Adventures (Engage 2024)
A Domino Admins Adventures (Engage 2024)A Domino Admins Adventures (Engage 2024)
A Domino Admins Adventures (Engage 2024)Gabriella Davis
 
08448380779 Call Girls In Civil Lines Women Seeking Men
08448380779 Call Girls In Civil Lines Women Seeking Men08448380779 Call Girls In Civil Lines Women Seeking Men
08448380779 Call Girls In Civil Lines Women Seeking MenDelhi Call girls
 
CNv6 Instructor Chapter 6 Quality of Service
CNv6 Instructor Chapter 6 Quality of ServiceCNv6 Instructor Chapter 6 Quality of Service
CNv6 Instructor Chapter 6 Quality of Servicegiselly40
 
Slack Application Development 101 Slides
Slack Application Development 101 SlidesSlack Application Development 101 Slides
Slack Application Development 101 Slidespraypatel2
 
2024: Domino Containers - The Next Step. News from the Domino Container commu...
2024: Domino Containers - The Next Step. News from the Domino Container commu...2024: Domino Containers - The Next Step. News from the Domino Container commu...
2024: Domino Containers - The Next Step. News from the Domino Container commu...Martijn de Jong
 
Top 5 Benefits OF Using Muvi Live Paywall For Live Streams
Top 5 Benefits OF Using Muvi Live Paywall For Live StreamsTop 5 Benefits OF Using Muvi Live Paywall For Live Streams
Top 5 Benefits OF Using Muvi Live Paywall For Live StreamsRoshan Dwivedi
 
How to Troubleshoot Apps for the Modern Connected Worker
How to Troubleshoot Apps for the Modern Connected WorkerHow to Troubleshoot Apps for the Modern Connected Worker
How to Troubleshoot Apps for the Modern Connected WorkerThousandEyes
 
Automating Google Workspace (GWS) & more with Apps Script
Automating Google Workspace (GWS) & more with Apps ScriptAutomating Google Workspace (GWS) & more with Apps Script
Automating Google Workspace (GWS) & more with Apps Scriptwesley chun
 
Exploring the Future Potential of AI-Enabled Smartphone Processors
Exploring the Future Potential of AI-Enabled Smartphone ProcessorsExploring the Future Potential of AI-Enabled Smartphone Processors
Exploring the Future Potential of AI-Enabled Smartphone Processorsdebabhi2
 
Histor y of HAM Radio presentation slide
Histor y of HAM Radio presentation slideHistor y of HAM Radio presentation slide
Histor y of HAM Radio presentation slidevu2urc
 
The 7 Things I Know About Cyber Security After 25 Years | April 2024
The 7 Things I Know About Cyber Security After 25 Years | April 2024The 7 Things I Know About Cyber Security After 25 Years | April 2024
The 7 Things I Know About Cyber Security After 25 Years | April 2024Rafal Los
 
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...apidays
 
Driving Behavioral Change for Information Management through Data-Driven Gree...
Driving Behavioral Change for Information Management through Data-Driven Gree...Driving Behavioral Change for Information Management through Data-Driven Gree...
Driving Behavioral Change for Information Management through Data-Driven Gree...Enterprise Knowledge
 
Presentation on how to chat with PDF using ChatGPT code interpreter
Presentation on how to chat with PDF using ChatGPT code interpreterPresentation on how to chat with PDF using ChatGPT code interpreter
Presentation on how to chat with PDF using ChatGPT code interpreternaman860154
 
Kalyanpur ) Call Girls in Lucknow Finest Escorts Service 🍸 8923113531 🎰 Avail...
Kalyanpur ) Call Girls in Lucknow Finest Escorts Service 🍸 8923113531 🎰 Avail...Kalyanpur ) Call Girls in Lucknow Finest Escorts Service 🍸 8923113531 🎰 Avail...
Kalyanpur ) Call Girls in Lucknow Finest Escorts Service 🍸 8923113531 🎰 Avail...gurkirankumar98700
 

Kürzlich hochgeladen (20)

Developing An App To Navigate The Roads of Brazil
Developing An App To Navigate The Roads of BrazilDeveloping An App To Navigate The Roads of Brazil
Developing An App To Navigate The Roads of Brazil
 
The Codex of Business Writing Software for Real-World Solutions 2.pptx
The Codex of Business Writing Software for Real-World Solutions 2.pptxThe Codex of Business Writing Software for Real-World Solutions 2.pptx
The Codex of Business Writing Software for Real-World Solutions 2.pptx
 
Tata AIG General Insurance Company - Insurer Innovation Award 2024
Tata AIG General Insurance Company - Insurer Innovation Award 2024Tata AIG General Insurance Company - Insurer Innovation Award 2024
Tata AIG General Insurance Company - Insurer Innovation Award 2024
 
04-2024-HHUG-Sales-and-Marketing-Alignment.pptx
04-2024-HHUG-Sales-and-Marketing-Alignment.pptx04-2024-HHUG-Sales-and-Marketing-Alignment.pptx
04-2024-HHUG-Sales-and-Marketing-Alignment.pptx
 
Axa Assurance Maroc - Insurer Innovation Award 2024
Axa Assurance Maroc - Insurer Innovation Award 2024Axa Assurance Maroc - Insurer Innovation Award 2024
Axa Assurance Maroc - Insurer Innovation Award 2024
 
A Domino Admins Adventures (Engage 2024)
A Domino Admins Adventures (Engage 2024)A Domino Admins Adventures (Engage 2024)
A Domino Admins Adventures (Engage 2024)
 
08448380779 Call Girls In Civil Lines Women Seeking Men
08448380779 Call Girls In Civil Lines Women Seeking Men08448380779 Call Girls In Civil Lines Women Seeking Men
08448380779 Call Girls In Civil Lines Women Seeking Men
 
CNv6 Instructor Chapter 6 Quality of Service
CNv6 Instructor Chapter 6 Quality of ServiceCNv6 Instructor Chapter 6 Quality of Service
CNv6 Instructor Chapter 6 Quality of Service
 
Slack Application Development 101 Slides
Slack Application Development 101 SlidesSlack Application Development 101 Slides
Slack Application Development 101 Slides
 
2024: Domino Containers - The Next Step. News from the Domino Container commu...
2024: Domino Containers - The Next Step. News from the Domino Container commu...2024: Domino Containers - The Next Step. News from the Domino Container commu...
2024: Domino Containers - The Next Step. News from the Domino Container commu...
 
Top 5 Benefits OF Using Muvi Live Paywall For Live Streams
Top 5 Benefits OF Using Muvi Live Paywall For Live StreamsTop 5 Benefits OF Using Muvi Live Paywall For Live Streams
Top 5 Benefits OF Using Muvi Live Paywall For Live Streams
 
How to Troubleshoot Apps for the Modern Connected Worker
How to Troubleshoot Apps for the Modern Connected WorkerHow to Troubleshoot Apps for the Modern Connected Worker
How to Troubleshoot Apps for the Modern Connected Worker
 
Automating Google Workspace (GWS) & more with Apps Script
Automating Google Workspace (GWS) & more with Apps ScriptAutomating Google Workspace (GWS) & more with Apps Script
Automating Google Workspace (GWS) & more with Apps Script
 
Exploring the Future Potential of AI-Enabled Smartphone Processors
Exploring the Future Potential of AI-Enabled Smartphone ProcessorsExploring the Future Potential of AI-Enabled Smartphone Processors
Exploring the Future Potential of AI-Enabled Smartphone Processors
 
Histor y of HAM Radio presentation slide
Histor y of HAM Radio presentation slideHistor y of HAM Radio presentation slide
Histor y of HAM Radio presentation slide
 
The 7 Things I Know About Cyber Security After 25 Years | April 2024
The 7 Things I Know About Cyber Security After 25 Years | April 2024The 7 Things I Know About Cyber Security After 25 Years | April 2024
The 7 Things I Know About Cyber Security After 25 Years | April 2024
 
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...
Apidays Singapore 2024 - Building Digital Trust in a Digital Economy by Veron...
 
Driving Behavioral Change for Information Management through Data-Driven Gree...
Driving Behavioral Change for Information Management through Data-Driven Gree...Driving Behavioral Change for Information Management through Data-Driven Gree...
Driving Behavioral Change for Information Management through Data-Driven Gree...
 
Presentation on how to chat with PDF using ChatGPT code interpreter
Presentation on how to chat with PDF using ChatGPT code interpreterPresentation on how to chat with PDF using ChatGPT code interpreter
Presentation on how to chat with PDF using ChatGPT code interpreter
 
Kalyanpur ) Call Girls in Lucknow Finest Escorts Service 🍸 8923113531 🎰 Avail...
Kalyanpur ) Call Girls in Lucknow Finest Escorts Service 🍸 8923113531 🎰 Avail...Kalyanpur ) Call Girls in Lucknow Finest Escorts Service 🍸 8923113531 🎰 Avail...
Kalyanpur ) Call Girls in Lucknow Finest Escorts Service 🍸 8923113531 🎰 Avail...
 

Linux内核多核负载均衡机制

  • 1. Linux Kernel在多核机器上的负载均衡机制 董昊 (三百) 源代码以 2.6.18.8 为准 1
  • 2. 现在的服务器CPU架构 2 每个CPU含多个核 每个核有自己的一级cache 同一CPU内的多个核共享同一个二级cache (下图是最常见的体系结构,但不代表所有的CPU)
  • 3. Kernel的调度数据结构 每个CPU(核)上有一个运行队列 (structrq) 每个运行队列上有两个优先级队列(structprio_array) active expire(已经用完时间片的进程) 每个优先级队列里有分级队列,分别挂载不同优先级的进程 3
  • 5. prio_array 5 内核从active队列里按优先级挑进程出来运行,如果进程用完了时间片,就将其放入expire队列 当 active 队列的进程都用完了时间片,则把指向active和expire的指针对换,开始新一轮的优先级调度
  • 7. 先想些笨办法 怎样衡量负载? 在运行队列上的running的进程数 什么时候检查任务是否均衡? 每1秒检查一次 怎么调整为均衡? 让各个核上的任务数相等——把任务数最多的核上的任务挪一个(或几个)到任务数最少的核上 7
  • 8. 开始细化这些笨办法 怎样衡量负载? 用进程数衡量CPU负载的缺点——进程的优先级不一样 会造成有的核跑着10个低优先级的任务,而另一个核上跑着10个高优先级的任务,任务响应偏慢 可以用核上正在跑的进程的优先级来衡量负载。高优先级的我们当他负载高,低优先级的我们当他负载低 8
  • 9. Kernel的做法 进程本身有“静态优先级”(current->static_prio), 值范围是100~139 进程的static_prio在运行时不变 用nice系统调用可以更改 进程的负载计算方法 如果static_prio小于120(高优先级) p->load_weight = (140-p->static_prio) * 128 / 5 如果static_prio等于或大于120(低优先级) p->load_weight = (140-p->static_prio) * 128 / 20
  • 11. Kernel的做法 核(CPU)的负载:正在其上运行的进程的load_weight相加 rq->raw_weighted_load 问:为什么用“静态优先级”来衡量进程的负载,而不是动态优先级? 动态优先级包含了进城运行时的特性,比如sleep_avg(平均睡眠时间) 而衡量负载主要考虑:进程一旦开始使用CPU会造成什么影响。进程之前睡得多少并不重要。 11
  • 12. 继续细化这些笨办法 什么时候检查任务是否均衡? 每秒调整一次负载慢吗? 可以在CPU负载变化的时候调整 进程睡眠或醒来时是负载变化的时候 12
  • 13. Kernel的做法 进程睡去…… 13 3294 asmlinkage void __sched schedule(void) 3295 { … 3354 switch_count = &prev->nivcsw; 3355 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { 3356 switch_count = &prev->nvcsw; 3357 if (unlikely((prev->state & TASK_INTERRUPTIBLE) && 3358 unlikely(signal_pending(prev)))) 3359 prev->state = TASK_RUNNING; 3360 else { 3361 if (prev->state == TASK_UNINTERRUPTIBLE) 3362 rq->nr_uninterruptible++; 3363 deactivate_task(prev, rq); //将进程拿出rq,进程负载也会被减掉 3364 } 3365 } …
  • 14. schedule() 14 … schedule()函数继续 3367 cpu = smp_processor_id(); 3368 if (unlikely(!rq->nr_running)) { 3369 idle_balance(cpu, rq); 3370 if (!rq->nr_running) { 3371 next = rq->idle; 3372 rq->expired_timestamp = 0; 3373 wake_sleeping_dependent(cpu); 3374 gotoswitch_tasks; 3375 } 3376 } … 这种情况仅发生在rq上没有其它进程的时候
  • 15. idle_balance() 15 idle_balance()函数 2733 static void idle_balance(intthis_cpu, structrq *this_rq) 2734 { 2735 structsched_domain *sd; 2736 2737 for_each_domain(this_cpu, sd) { 2738 if (sd->flags & SD_BALANCE_NEWIDLE) { 2739 /* If we've pulled tasks over stop searching: */ 2740 if (load_balance_newidle(this_cpu, this_rq, sd)) 2741 break; 2742 } 2743 } 2744 }
  • 17. 8核机器 17 从子sched_domain到父shced_domain 287 #define for_each_domain(cpu, __sd) br /> 288 for (__sd = rcu_dereference(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)
  • 19. 通用的调度特性 19 119 #define SD_CPU_INIT (structsched_domain) { br /> 120 .span = CPU_MASK_NONE, br /> 121 .parent = NULL, br /> 122 .groups = NULL, br /> 123 .min_interval = 1, br /> 124 .max_interval = 4, br /> 125 .busy_factor = 64, br /> 126 .imbalance_pct = 125, br /> 127 .cache_nice_tries = 1, br /> 128 .per_cpu_gain = 100, br /> 129 .busy_idx = 2, br /> 130 .idle_idx = 1, br /> 131 .newidle_idx = 2, br /> 132 .wake_idx = 1, br /> 133 .forkexec_idx = 1, br /> 134 .flags = SD_LOAD_BALANCE br /> 135 | SD_BALANCE_NEWIDLE br /> 136 | SD_BALANCE_EXEC br /> 137 | SD_WAKE_AFFINE br /> 138 | BALANCE_FOR_POWER, br /> 139 .last_balance = jiffies, br /> 140 .balance_interval = 1, br /> 141 .nr_balance_failed = 0, br /> 142 } 在我们常用的Intel架构上,可以认为: SD_NODE_INIT用来初始化每个CPU SD_CPU_INIT用来初始化CPU上的每个核 正因为同一CPU上的核共享L2 cache,所以SD_CPU_INIT有SD_WAKE_AFFINE(亲和)选项
  • 20. 不同体系结构的不同调度特性 20 include/asm-x86_64/topolopy.h 31 #define SD_NODE_INIT (structsched_domain) { br /> 32 .span = CPU_MASK_NONE, br /> 33 .parent = NULL, br /> 34 .groups = NULL, br /> 35 .min_interval = 8, br /> 36 .max_interval = 32, br /> 37 .busy_factor = 32, br /> 38 .imbalance_pct = 125, br /> 39 .cache_nice_tries = 2, br /> 40 .busy_idx = 3, br /> 41 .idle_idx = 2, br /> 42 .newidle_idx = 0, br /> 43 .wake_idx = 1, br /> 44 .forkexec_idx = 1, br /> 45 .per_cpu_gain = 100, br /> 46 .flags = SD_LOAD_BALANCE br /> 47 | SD_BALANCE_FORK br /> 48 | SD_BALANCE_EXEC br /> 49 | SD_WAKE_BALANCE, br /> 50 .last_balance = jiffies, br /> 51 .balance_interval = 1, br /> 52 .nr_balance_failed = 0, br /> 53 } include/asm-powerpc/topolopy.h 43 #define SD_NODE_INIT (structsched_domain) { br /> 44 .span = CPU_MASK_NONE, br /> 45 .parent = NULL, br /> 46 .groups = NULL, br /> 47 .min_interval = 8, br /> 48 .max_interval = 32, br /> 49 .busy_factor = 32, br /> 50 .imbalance_pct = 125, br /> 51 .cache_nice_tries = 1, br /> 52 .per_cpu_gain = 100, br /> 53 .busy_idx = 3, br /> 54 .idle_idx = 1, br /> 55 .newidle_idx = 2, br /> 56 .wake_idx = 1, br /> 57 .flags = SD_LOAD_BALANCE br /> 58 | SD_BALANCE_EXEC br /> 59 | SD_BALANCE_NEWIDLE br /> 60 | SD_WAKE_IDLE br /> 61 | SD_WAKE_BALANCE, br /> 62 .last_balance = jiffies, br /> 63 .balance_interval = 1, br /> 64 .nr_balance_failed = 0, br /> 65 }
  • 21. schedule() 21 3378 array = rq->active; 3379 if (unlikely(!array->nr_active)) { 3380 /* 3381 * Switch the active and expired arrays. 3382 */ 3383 schedstat_inc(rq, sched_switch); 3384 rq->active = rq->expired; 3385 rq->expired = array; 3386 array = rq->active; 3387 rq->expired_timestamp = 0; 3388 rq->best_expired_prio = MAX_PRIO; 3389 } 这里可以看到active和expire优先级队列互换的操作
  • 23. try_to_wake_up() 1422 intidx = this_sd->wake_idx; 1423 unsigned int imbalance; 1424 1425 imbalance = 100 + (this_sd->imbalance_pct - 100) / 2; 1426 1427 load = source_load(cpu, idx); 1428 this_load = target_load(this_cpu, idx); 1429 1430 new_cpu = this_cpu; /* Wake to this CPU if we can */ 1431 1432 if (this_sd->flags & SD_WAKE_AFFINE) { //对于一个CPU内的两个核 1433 unsigned long tl = this_load; 1434 unsigned long tl_per_task = cpu_avg_load_per_task(this_cpu); 1435 …… 1441 if (sync) 1442 tl -= current->load_weight; 1443 1444 if ((tl <= load && 1445 tl + target_load(cpu, idx) <= tl_per_task) || 1446 100*(tl + p->load_weight) <= imbalance*load) { //如果是同一CPU里的两个核,只要我这个核的负载不大,就把本来该另一个核跑的进程揽过来,AFFINE,亲兄弟嘛 1447 /* 1448 * This domain has SD_WAKE_AFFINE and 1449 * p is cache cold in this domain, and 1450 * there is no bad imbalance. 1451 */ 1452 schedstat_inc(this_sd, ttwu_move_affine); 1453 gotoout_set_cpu; 1454 } 1455 }
  • 24. try_to_wake_up() 1461 if (this_sd->flags & SD_WAKE_BALANCE) { 1462 if (imbalance*this_load <= 100*load) { //如果是两个不同CPU上的核,则只有在我这个核的负载很小时,才揽进程 1463 schedstat_inc(this_sd, ttwu_move_balance); 1464 gotoout_set_cpu; 1465 } 1466 } 1467 } 1468 1469 new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */ 1470 out_set_cpu: 1471 new_cpu = wake_idle(new_cpu, p); //有没有空闲的核? 1472 if (new_cpu != cpu) { 1473 set_task_cpu(p, new_cpu); 1474 task_rq_unlock(rq, &flags); 1475 /* might preempt at this point */ 1476 rq = task_rq_lock(p, &flags); 1477 old_state = p->state; 1478 if (!(old_state & state)) 1479 goto out; 1480 if (p->array) 1481 gotoout_running; 1482 1483 this_cpu = smp_processor_id(); 1484 cpu = task_cpu(p); 1485 }
  • 25. Kernel的做法 只是进程睡去和醒来的时候负载均衡就够了吗? 如果某个进程调用nice修改了static_prio 每个核每10ms调用一次scheduler_tick scheduler_tick调用rebalance_tick rebalance_tick根据核所在的sched_domain的特性来决定多久调一次load_balance 根据本CPU是否繁忙 25
  • 27. rebalance_tick() 27 2811 static void 2812 rebalance_tick(intthis_cpu, structrq *this_rq, enumidle_type idle) 2813 { 2814 unsigned long this_load, interval, j = cpu_offset(this_cpu); 2815 structsched_domain *sd; 2816 inti, scale; 2817 2818 this_load = this_rq->raw_weighted_load; 2819 2820 /* Update our load: */ 2821 for (i = 0, scale = 1; i < 3; i++, scale <<= 1) { 2822 unsigned long old_load, new_load; 2823 2824 old_load = this_rq->cpu_load[i]; 2825 new_load = this_load; 2826 /* 2827 * Round up the averaging division if load is increasing. This 2828 * prevents us from getting stuck on 9 if the load is 10, for 2829 * example. 2830 */ 2831 if (new_load > old_load) 2832 new_load += scale-1; 2833 this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) / scale; 2834 }
  • 28. rebalance_tick() 28 2836 for_each_domain(this_cpu, sd) { 2837 if (!(sd->flags & SD_LOAD_BALANCE)) 2838 continue; 2839 2840 interval = sd->balance_interval; 2841 if (idle != SCHED_IDLE) 2842 interval *= sd->busy_factor; 2843 2844 /* scale ms to jiffies */ 2845 interval = msecs_to_jiffies(interval); 2846 if (unlikely(!interval)) 2847 interval = 1; 2848 2849 if (j - sd->last_balance >= interval) { 2850 if (load_balance(this_cpu, this_rq, sd, idle)) { 2851 /* 2852 * We've pulled tasks over so either we're no 2853 * longer idle, or one of our SMT siblings is 2854 * not idle. 2855 */ 2856 idle = NOT_IDLE; 2857 } 2858 sd->last_balance += interval; 2859 } 2860 } 2861 }
  • 29. load_balance() 29 2527 static intload_balance(intthis_cpu, structrq *this_rq, 2528 structsched_domain *sd, enumidle_type idle) 2529 { …… 2542 redo: 2543 group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, 2544 &cpus); 2545 if (!group) { 2546 schedstat_inc(sd, lb_nobusyg[idle]); 2547 gotoout_balanced; 2548 } 2549 2550 busiest = find_busiest_queue(group, idle, imbalance, &cpus); 2551 if (!busiest) { 2552 schedstat_inc(sd, lb_nobusyq[idle]); 2553 gotoout_balanced; 2554 } 2555 2556 BUG_ON(busiest == this_rq); 2557 2558 schedstat_add(sd, lb_imbalance[idle], imbalance); 2559 2560 nr_moved = 0; 2561 if (busiest->nr_running > 1) { …… 2568 double_rq_lock(this_rq, busiest); 2569 nr_moved = move_tasks(this_rq, this_cpu, busiest, 2570 minus_1_or_zero(busiest->nr_running), 2571 imbalance, sd, idle, &all_pinned); 2572 double_rq_unlock(this_rq, busiest);
  • 30. load_balance()的考量 find_busiest_group从本调度域找出负载最高的sched_group,find_busiest_queue从sched_group中找出最繁忙的rq 可能负载已经均衡,找不出,则返回NULL 要考虑imbalance_pct move_task把任务从最繁忙的rq里挪到本CPU来 Linux的负载均衡是“拉模式”:我(本core)定时从别处最繁忙的地方拉任务过来 一边预计算负载一边挪动,如果负载已经均衡,就不要再挪了 30
  • 31. Kernel的任务负载均衡 31 怎样衡量负载? 运行队列上的running进程的load_weight之和 什么时间检查负载是否均衡? 进程睡去、醒来 每个核每10ms 怎么调整为均衡? 根据imbalance程度从最忙的队列里抽出running进程放入其他核