合并两个结果集同时从一组结果中排除相同结果的最有效方法
我有一个包含当前值和默认值的表。
create table temp_se (
id varchar2(1),
default_value varchar2(1),
sequence number
)
/
insert into temp_se values ('A', 'N', 1);
insert into temp_se values ('B', 'N', 2);
insert into temp_se values ('C', 'N', 3);
insert into temp_se values ('D', 'N', 4);
insert into temp_se values ('E', 'N', null);
insert into temp_se values ('A', 'Y', null);
insert into temp_se values ('B', 'Y', null);
insert into temp_se values ('C', 'Y', null);
insert into temp_se values ('D', 'Y', null);
insert into temp_se values ('E', 'Y', 5);
/
任务是返回所有当前值并根据 ID 从默认值中添加缺失值。
我想知道最有效的方法是什么,我目前的方法是:
with data as
(select 1 rank, t.*
from temp_se t
where default_value = 'N'
and nvl(sequence, -1) < 3
union
select 2 rank, t.*
from temp_se t
where default_value = 'Y'),
ranked as
(select rank() over(partition by id order by rank) rank_no, t.* from data t)
select id, default_value, sequence from ranked where rank_no = 1;
/
我已经尝试了一些其他方法,包括下面的方法,但有些方法没有产生预期的结果,或者我不确定它是否在性能方面更好。也许有更好的方法来解决这个我不知道/没有尝试过的问题?
当前蚂蚁尝试的解决方案:
-- Create and populate table, to reproduce problem
create table temp_se (
id varchar2(1),
default_value varchar2(1),
sequence number
)
/
insert into temp_se values ('A', 'N', 1);
insert into temp_se values ('B', 'N', 2);
insert into temp_se values ('C', 'N', 3);
insert into temp_se values ('D', 'N', 4);
insert into temp_se values ('E', 'N', null);
insert into temp_se values ('A', 'Y', null);
insert into temp_se values ('B', 'Y', null);
insert into temp_se values ('C', 'Y', null);
insert into temp_se values ('D', 'Y', null);
insert into temp_se values ('E', 'Y', 5);
/
--Current approach
-- produces expected result
-- not happy with performance
with data as
(select 1 rank, t.*
from temp_se t
where default_value = 'N'
and nvl(sequence, -1) < 3
union
select 2 rank, t.*
from temp_se t
where default_value = 'Y'),
ranked as
(select rank() over(partition by id order by rank) rank_no, t.* from data t)
select id, default_value, sequence from ranked where rank_no = 1;
/
/*
* What i've tried:
* */
--union -> default_value flag always differ so everything is included
-- would be nice if you could specify on which columns should be compared
select *
from (select *
from temp_se
where default_value = 'N'
and nvl(sequence, -1) < 3
union
select *
from temp_se
where default_value = 'Y')
order by id;
--full outer join, using(id) with coalesce, null values are overwritten with default non null values
select id,
coalesce(current_values.default_value, default_values.default_value) default_value,
coalesce(current_values.sequence, default_values.sequence) sequence
from (select *
from temp_se
where default_value = 'N'
and nvl(sequence, -1) < 3) current_values
full outer join (select * from temp_se where default_value = 'Y') default_values
using (id);
--exlude default_data values with not in clause
-- produces expected result
-- better cost, IO cost, and bytes read in explain plan compared to current solution
-- however select times are longer
with current_data as
(select *
from temp_se
where default_value = 'N'
and nvl(sequence, -1) < 3),
default_data as
(select *
from temp_se
where default_value = 'Y'
and id not in (select id from current_data))
select *
from current_data
union
select * from default_data;
/
drop table temp_se;
/
- 编辑 -
预期结果:
| ID | 默认值 | 序列 |
|---|---|---|
| 一种 | N | 1 |
| 乙 | N | 2 |
| C | 是 | |
| D | 是 | |
| 乙 | N |
回答
重新表述你目前的方法
select id, default_value, sequence
from (
select t.* ,
row_number() over(partition by id order by case when default_value = 'N' then 1 else 2 end, nvl(sequence, -1) desc) rn
from temp_se t
where default_value = 'N' and (sequence is null or sequence < 3)
or default_value = 'Y'
) t
where rn = 1;