接着继续探讨 BRIN.我们建立两个 table ,都使用 BRIN,来做比较.create table brin1 ( id int not null, val int not null);create table brin2 ( id int not null, val int not null);insert into brin1select generate_series(1,1e6) , round(random() * 10);insert into brin2select id , round(random() * 10) from generate_series(1,1e6) as t(id) order by random();analyze brin1;analyze brin2;接下来要来介绍 table 统计资讯中,栏位的物理储存顺序与逻辑顺序相关性.select correlation from pg_stats where schemaname = 'aki' and tablename = 'brin1' and attname = 'id';+-------------+| correlation |+-------------+| 1 |+-------------+select correlation from pg_stats where schemaname = 'aki' and tablename = 'brin2' and attname = 'id';+--------------+| correlation |+--------------+| -0.009371842 |+--------------+看一下两个table 的内部实际分布情况,使用了ctid 系统栏位.select one.id as "logical" , one.ctid as "physical in one" , two.ctid as "physical in two" from brin1 as one join brin2 as two using (id) where one.id between 1 and 10 order by 1; +---------+-----------------+-----------------+| logical | physical in one | physical in two |+---------+-----------------+-----------------+| 1 | (0,1) | (2836,26) || 2 | (0,2) | (3310,6) || 3 | (0,3) | (1423,168) || 4 | (0,4) | (4042,58) || 5 | (0,5) | (4117,27) || 6 | (0,6) | (2960,165) || 7 | (0,7) | (4052,80) || 8 | (0,8) | (2377,40) || 9 | (0,9) | (1249,151) || 10 | (0,10) | (2153,220) |+---------+-----------------+-----------------+(10 rows)由上面的观察,可以得知 brin1 与 brin2 的资料相关情况.接下来建立 BRIN indexcreate index brin1_idx on brin1 using brin (id);create index brin2_idx on brin2 using brin (id);with t as (select pg_relation_size('brin1') as tableSize , pg_relation_size('brin1_idx') as indexSize)select pg_size_pretty(t.tableSize) as tableSize , pg_size_pretty(t.indexSize) as indexSize , (t.indexSize / t.tableSize::real) as "Index / Table" from t;+-----------+-----------+-----------------------+| tablesize | indexsize | Index / Table |+-----------+-----------+-----------------------+| 35 MB | 24 kB | 0.0006779661016949153 |+-----------+-----------+-----------------------+analyze brin1;analyze brin2;explain analyze select sum(val) from brin1 where id >= 10000 and id < 20000; +--------------------------------------------------------------------------------------------------------------------------------+| QUERY PLAN |+--------------------------------------------------------------------------------------------------------------------------------+| Aggregate (cost=4892.16..4892.17 rows=1 width=8) (actual time=8.992..8.993 rows=1 loops=1) || -> Bitmap Heap Scan on brin1 (cost=14.45..4868.01 rows=9660 width=4) (actual time=2.180..6.886 rows=10000 loops=1) || Recheck Cond: ((id >= 10000) AND (id < 20000)) || Rows Removed by Index Recheck: 18928 || Heap Blocks: lossy=128 || -> Bitmap Index Scan on brin1_idx (cost=0.00..12.03 rows=28571 width=0) (actual time=0.067..0.067 rows=1280 loops=1) || Index Cond: ((id >= 10000) AND (id < 20000)) || Planning Time: 0.152 ms || Execution Time: 9.071 ms |+--------------------------------------------------------------------------------------------------------------------------------+相对 brin2 的情况explain analyze select sum(val) from brin2 where id >= 10000 and id < 20000; +-------------------------------------------------------------------------------------------------------------------------------------+| QUERY PLAN |+-------------------------------------------------------------------------------------------------------------------------------------+| Finalize Aggregate (cost=11686.19..11686.20 rows=1 width=8) (actual time=81.731..82.950 rows=1 loops=1) || -> Gather (cost=11685.97..11686.18 rows=2 width=8) (actual time=79.185..82.941 rows=3 loops=1) || Workers Planned: 2 || Workers Launched: 2 || -> Partial Aggregate (cost=10685.97..10685.98 rows=1 width=8) (actual time=53.211..53.212 rows=1 loops=3) || -> Parallel Seq Scan on brin2 (cost=0.00..10675.00 rows=4389 width=4) (actual time=0.261..52.585 rows=3333 loops=3) || Filter: ((id >= 10000) AND (id < 20000)) || Rows Removed by Filter: 330000 || Planning Time: 0.370 ms || Execution Time: 85.732 ms |+-------------------------------------------------------------------------------------------------------------------------------------+观察以上两个查询, brin2 是选择做 Seq Scan, 不使用 brin index!PostgreSQL 有提供一个 cluster 指令,可以依照指定的 index 来做资料排序.但目前 brin index 还未提供 access method. 我们先把 brin 形态 index 删除,并且建立 btree desc index.drop index brin2_idx;create index brin2_btree_idx on brin2 using btree (id desc);cluster brin2 using brin2_btree_idx;CLUSTERTime: 4536.090 ms (00:04.536)commit;analyze brin2;花了4点多秒 重整百万笔的table.接着来观察相关性.select correlation from pg_stats where schemaname = 'aki' and tablename = 'brin2' and attname = 'id';+-------------+| correlation |+-------------+| -1 |+-------------+相关性的值域由 -1 ~ +1, 负就是反向, 越接近 0 相关性越差, 绝对值越接近 1 相关性越好.接着观察物理与逻辑顺序select one.id as "logical" , one.ctid as "physical in one" , two.ctid as "physical in two" from brin1 as one join brin2 as two using (id) where one.id between 1 and 5 order by 1;+---------+-----------------+-----------------+| logical | physical in one | physical in two |+---------+-----------------+-----------------+| 1 | (0,1) | (4424,176) || 2 | (0,2) | (4424,175) || 3 | (0,3) | (4424,174) || 4 | (0,4) | (4424,173) || 5 | (0,5) | (4424,172) |+---------+-----------------+-----------------+select one.id as "logical" , one.ctid as "physical in one" , two.ctid as "physical in two" from brin1 as one join brin2 as two using (id) where one.id between 999996 and 1000000 order by 1 desc;+---------+-----------------+-----------------+| logical | physical in one | physical in two |+---------+-----------------+-----------------+| 1000000 | (4424,176) | (0,1) || 999999 | (4424,175) | (0,2) || 999998 | (4424,174) | (0,3) || 999997 | (4424,173) | (0,4) || 999996 | (4424,172) | (0,5) |+---------+-----------------+-----------------+接着建立 brin2 的 brin index create index brin2_brin_idx on brin2 using brin (id);commit;analyze brin2;explain analyze select sum(val) from brin2 where id >= 10000 and id < 20000; 这时候会选 btree 形态的 index!+----------------------------------------------------------------------------------------------------------------------------------------+| QUERY PLAN |+----------------------------------------------------------------------------------------------------------------------------------------+| Aggregate (cost=395.44..395.45 rows=1 width=8) (actual time=17.802..17.803 rows=1 loops=1) || -> Index Scan using brin2_btree_idx on brin2 (cost=0.42..369.89 rows=10223 width=4) (actual time=2.160..15.673 rows=10000 loops=1) || Index Cond: ((id >= 10000) AND (id < 20000)) || Planning Time: 0.518 ms || Execution Time: 17.882 ms |+----------------------------------------------------------------------------------------------------------------------------------------+第二次 Planning Time: 0.214 ms Execution Time: 7.785 ms 将 btree index 删除.drop index brin2_btree_idx;commit;analyze brin2;explain analyze select sum(val) from brin2 where id >= 10000 and id < 20000; | Planning Time: 0.221 ms || Execution Time: 11.685 ms | Planning Time: 0.116 ms || Execution Time: 11.652 ms 我们可以观察到 brin index 对 相关性为负值的栏位,效果也不佳!了解其限制及其适合的情境,我们就可以适当的使用.今天先介绍到此!