合并两个结果集同时从一组结果中排除相同结果的最有效方法

我有一个包含当前值和默认值的表。

create table temp_se (
  id varchar2(1),
  default_value varchar2(1),
  sequence number
)
/

insert into temp_se values ('A', 'N', 1);
insert into temp_se values ('B', 'N', 2);
insert into temp_se values ('C', 'N', 3);
insert into temp_se values ('D', 'N', 4);
insert into temp_se values ('E', 'N', null);
insert into temp_se values ('A', 'Y', null);
insert into temp_se values ('B', 'Y', null);
insert into temp_se values ('C', 'Y', null);
insert into temp_se values ('D', 'Y', null);
insert into temp_se values ('E', 'Y', 5);
/

任务是返回所有当前值并根据 ID 从默认值中添加缺失值。

我想知道最有效的方法是什么,我目前的方法是:

with data as
 (select 1 rank, t.*
    from temp_se t
   where default_value = 'N'
     and nvl(sequence, -1) < 3
  union
  select 2 rank, t.*
    from temp_se t
   where default_value = 'Y'),
ranked as
 (select rank() over(partition by id order by rank) rank_no, t.* from data t)
select id, default_value, sequence from ranked where rank_no = 1;
/

我已经尝试了一些其他方法,包括下面的方法,但有些方法没有产生预期的结果,或者我不确定它是否在性能方面更好。也许有更好的方法来解决这个我不知道/没有尝试过的问题?

当前蚂蚁尝试的解决方案:

-- Create and populate table, to reproduce problem
create table temp_se (
  id varchar2(1),
  default_value varchar2(1),
  sequence number
)
/

insert into temp_se values ('A', 'N', 1);
insert into temp_se values ('B', 'N', 2);
insert into temp_se values ('C', 'N', 3);
insert into temp_se values ('D', 'N', 4);
insert into temp_se values ('E', 'N', null);
insert into temp_se values ('A', 'Y', null);
insert into temp_se values ('B', 'Y', null);
insert into temp_se values ('C', 'Y', null);
insert into temp_se values ('D', 'Y', null);
insert into temp_se values ('E', 'Y', 5);
/

--Current approach
-- produces expected result
-- not happy with performance
with data as
 (select 1 rank, t.*
    from temp_se t
   where default_value = 'N'
     and nvl(sequence, -1) < 3
  union
  select 2 rank, t.*
    from temp_se t
   where default_value = 'Y'),
ranked as
 (select rank() over(partition by id order by rank) rank_no, t.* from data t)
select id, default_value, sequence from ranked where rank_no = 1;
/

/*
 * What i've tried:
 * */

--union -> default_value flag always differ so everything is included
-- would be nice if you could specify on which columns should be compared
  select *
    from (select *
            from temp_se
           where default_value = 'N'
             and nvl(sequence, -1) < 3
          union
          select *
            from temp_se
           where default_value = 'Y')
   order by id;
--full outer join, using(id) with coalesce, null values are overwritten with default non null values
select id,
       coalesce(current_values.default_value, default_values.default_value) default_value,
       coalesce(current_values.sequence, default_values.sequence) sequence
  from (select *
          from temp_se
         where default_value = 'N'
           and nvl(sequence, -1) < 3) current_values
  full outer join (select * from temp_se where default_value = 'Y') default_values
 using (id);

--exlude default_data values with not in clause
-- produces expected result
-- better cost, IO cost, and bytes read in explain plan compared to current solution
-- however select times are longer
with current_data as
 (select *
    from temp_se
   where default_value = 'N'
     and nvl(sequence, -1) < 3),
default_data as
 (select *
    from temp_se
   where default_value = 'Y'
     and id not in (select id from current_data))
select *
  from current_data
union
select * from default_data;
/

drop table temp_se;
/

- 编辑 -

预期结果:

ID 默认值 序列
一种 N 1
N 2
C
D
N

回答

重新表述你目前的方法

select id, default_value, sequence 
from (
    select t.* , 
       row_number() over(partition by id order by case when default_value = 'N' then 1 else 2 end, nvl(sequence, -1) desc) rn
    from temp_se t
    where default_value = 'N' and (sequence is null or sequence < 3)
       or default_value = 'Y'
) t
where rn = 1;


以上是合并两个结果集同时从一组结果中排除相同结果的最有效方法的全部内容。
THE END
分享
二维码
< <上一篇
下一篇>>