Групировка согласно порядку

vyaslav
Дата: 15.03.2011 18:28:02
Здраствуйте ув. форумчане,

Дано случайные данные:
DT                ID      DAT
--------- ---------- ---------
15-MAR-11          1 16-MAR-11
16-MAR-11          1 17-MAR-11
17-MAR-11          1 18-MAR-11
18-MAR-11          2 19-MAR-11
19-MAR-11          2 20-MAR-11
20-MAR-11          1 21-MAR-11
21-MAR-11          2 22-MAR-11
22-MAR-11          1 23-MAR-11
23-MAR-11          1 24-MAR-11
24-MAR-11          1 25-MAR-11
25-MAR-11          2 26-MAR-11

11 rows selected.

Групой считается одинаковые значения поля ID которое следуют по порядку друг за другом.
Тоесть для значений ID = 1, получаются такие групы:
1я: записи с DT = 15-MAR-11 по 17-MAR-11
2я: записи с DT = 22-MAR-11 по 24-MAR-11
3я: запись с DT = 20-MAR-11

Для всех таких груп нужно вывести Min(dt) и Max(dat)

Пришел к такому решению задачи:
with tb as(
select sysdate dt, 1 id, sysdate + 1 dat from dual union
select sysdate + 1 dt, 1 id, sysdate + 2 dat from dual union
select sysdate + 2 dt, 1 id, sysdate + 3 dat from dual union
select sysdate + 3 dt, 2 id, sysdate + 4 dat from dual union
select sysdate + 4 dt, 2 id, sysdate + 5 dat from dual union
select sysdate + 5 dt, 1 id, sysdate + 6 dat from dual union
select sysdate + 6 dt, 2 id, sysdate + 7 dat from dual union
select sysdate + 7 dt, 1 id, sysdate + 8 dat from dual union
select sysdate + 8 dt, 1 id, sysdate + 9 dat from dual union
select sysdate + 9 dt, 1 id, sysdate + 10 dat from dual union
select sysdate + 10 dt, 2 id, sysdate + 11 dat from dual)
select id, start_date,end_date from
    (select id, dt start_date, case x when 'Z' then dat 
                                      when 'B' then LEAD(dat,1) over (order by dt,x) 
                                 end end_date, x 
       from
        (select tbl.*,
            case 
            WHEN LG IS NULL AND LD IS NULL 
            THEN 'Z' 
            WHEN LG IS NULL AND LD <> id
            THEN 'Z' 
            WHEN LG IS NULL 
            THEN 'B' 
            WHEN LD <> id AND LG <> id
            THEN 'Z' 
            WHEN LD = id AND LD <> LG
            THEN 'B' 
            WHEN LD IS NULL AND LG <> id
            THEN 'Z' 
            WHEN LD IS NULL 
            THEN 'E' 
            WHEN LG = id AND LD <> LG
            THEN 'E' 
            ELSE 'X' END x 
        from(select tb.dt, tb.id, tb.dat, 
                   LAG(id,1) over (order by dt) lg,
                   LEAD(id,1) over (order by dt) ld
               from tb) tbl) otbl
    where otbl.x<>'X')
where x = 'B' or x = 'Z'
order by start_date

Но видно, что такое решение не эфективно и не красиво.

Если у кого то есть мысли как решить эту задачу покрасивее(только на SQL), прошу поделится.
AmKad
Дата: 15.03.2011 18:49:52
+ start_of_group
with tb as(
select sysdate dt, 1 id, sysdate + 1 dat from dual union
select sysdate + 1 dt, 1 id, sysdate + 2 dat from dual union all
select sysdate + 2 dt, 1 id, sysdate + 3 dat from dual union all
select sysdate + 3 dt, 2 id, sysdate + 4 dat from dual union all
select sysdate + 4 dt, 2 id, sysdate + 5 dat from dual union all
select sysdate + 5 dt, 1 id, sysdate + 6 dat from dual union all
select sysdate + 6 dt, 2 id, sysdate + 7 dat from dual union all
select sysdate + 7 dt, 1 id, sysdate + 8 dat from dual union all
select sysdate + 8 dt, 1 id, sysdate + 9 dat from dual union all
select sysdate + 9 dt, 1 id, sysdate + 10 dat from dual union all
select sysdate + 10 dt, 2 id, sysdate + 11 dat from dual)
select id, min(dt) d1, max(dat) d2
from
   (select dt, id, dat,
    row_number() over (order by dt) -
    row_number() over (partition by id order by dt) rn
    from tb
   )
group by id, rn
order by 2;

        ID D1         D2
---------- ---------- ----------
         1 15.03.2011 18.03.2011
         2 18.03.2011 20.03.2011
         1 20.03.2011 21.03.2011
         2 21.03.2011 22.03.2011
         1 22.03.2011 25.03.2011
         2 25.03.2011 26.03.2011

6 строк выбрано.

SQL>
vyaslav
Дата: 15.03.2011 19:11:17
AmKad,

гениально, спасибо большое!
Maxim Demenko
Дата: 15.03.2011 19:21:49
На приведенных тестовых данных не имеет значения, но в общем случае имхо лучше использовать в данной конструкции dense_rank() вместо row_number() - последняя сбивается со счету при наличии дубликатов.

Best regards

Maxim
AmKad
Дата: 15.03.2011 21:36:58
+ А теперь выкинем group by и заменим один window sort на window buffer
SQL> set autot on explain stat
SQL> alter session set nls_date_format='dd.mm.yyyy';

Сеанс изменен.

SQL> with tb as(
  2  select sysdate dt, 1 id, sysdate + 1 dat from dual union all
  3  select sysdate + 1 dt, 1 id, sysdate + 2 dat from dual union all
  4  select sysdate + 2 dt, 1 id, sysdate + 3 dat from dual union all
  5  select sysdate + 3 dt, 2 id, sysdate + 4 dat from dual union all
  6  select sysdate + 4 dt, 2 id, sysdate + 5 dat from dual union all
  7  select sysdate + 5 dt, 1 id, sysdate + 6 dat from dual union all
  8  select sysdate + 6 dt, 2 id, sysdate + 7 dat from dual union all
  9  select sysdate + 7 dt, 1 id, sysdate + 8 dat from dual union all
 10  select sysdate + 8 dt, 1 id, sysdate + 9 dat from dual union all
 11  select sysdate + 9 dt, 1 id, sysdate + 10 dat from dual union all
 12  select sysdate + 10 dt, 2 id, sysdate + 11 dat from dual)
 13  select id, min(dt) d1, max(dat) d2
 14  from
 15     (select dt, id, dat,
 16      row_number() over (order by dt) -
 17      row_number() over (partition by id order by dt) rn
 18      from tb
 19     )
 20  group by id, rn;

        ID D1         D2
---------- ---------- ----------
         1 15.03.2011 18.03.2011
         2 21.03.2011 22.03.2011
         2 25.03.2011 26.03.2011
         2 18.03.2011 20.03.2011
         1 20.03.2011 21.03.2011
         1 22.03.2011 25.03.2011

6 строк выбрано.


План выполнения
----------------------------------------------------------
Plan hash value: 803537873

-------------------------------------------------------------------------
| Id  | Operation        | Name | Rows  | Bytes | Cost (%CPU)| Time     |
-------------------------------------------------------------------------
|   0 | SELECT STATEMENT |      |    11 |   374 |    25  (12)| 00:00:01 |
|   1 |  HASH GROUP BY   |      |    11 |   374 |    25  (12)| 00:00:01 |
|   2 |   VIEW           |      |    11 |   374 |    24   (9)| 00:00:01 |
|   3 |    WINDOW SORT   |      |    11 |   231 |    24   (9)| 00:00:01 |
|   4 |     WINDOW SORT  |      |    11 |   231 |    24   (9)| 00:00:01 |
|   5 |      VIEW        |      |    11 |   231 |    22   (0)| 00:00:01 |
|   6 |       UNION-ALL  |      |       |       |            |          |
|   7 |        FAST DUAL |      |     1 |       |     2   (0)| 00:00:01 |
|   8 |        FAST DUAL |      |     1 |       |     2   (0)| 00:00:01 |
|   9 |        FAST DUAL |      |     1 |       |     2   (0)| 00:00:01 |
|  10 |        FAST DUAL |      |     1 |       |     2   (0)| 00:00:01 |
|  11 |        FAST DUAL |      |     1 |       |     2   (0)| 00:00:01 |
|  12 |        FAST DUAL |      |     1 |       |     2   (0)| 00:00:01 |
|  13 |        FAST DUAL |      |     1 |       |     2   (0)| 00:00:01 |
|  14 |        FAST DUAL |      |     1 |       |     2   (0)| 00:00:01 |
|  15 |        FAST DUAL |      |     1 |       |     2   (0)| 00:00:01 |
|  16 |        FAST DUAL |      |     1 |       |     2   (0)| 00:00:01 |
|  17 |        FAST DUAL |      |     1 |       |     2   (0)| 00:00:01 |
-------------------------------------------------------------------------


Статистика
----------------------------------------------------------
          0  recursive calls
          0  db block gets
          0  consistent gets
          0  physical reads
          0  redo size
        689  bytes sent via SQL*Net to client
        419  bytes received via SQL*Net from client
          2  SQL*Net roundtrips to/from client
          2  sorts (memory)
          0  sorts (disk)
          6  rows processed

------------------------------------------------------------------------------

SQL> with tb as(
  2  select sysdate     dt,  1 id, sysdate + 1 dat from dual union all
  3  select sysdate + 1 dt,  1 id, sysdate + 2 dat from dual union all
  4  select sysdate + 2 dt,  1 id, sysdate + 3 dat from dual union all
  5  select sysdate + 3 dt,  2 id, sysdate + 4 dat from dual union all
  6  select sysdate + 4 dt,  2 id, sysdate + 5 dat from dual union all
  7  select sysdate + 5 dt,  1 id, sysdate + 6 dat from dual union all
  8  select sysdate + 6 dt,  2 id, sysdate + 7 dat from dual union all
  9  select sysdate + 7 dt,  1 id, sysdate + 8 dat from dual union all
 10  select sysdate + 8 dt,  1 id, sysdate + 9 dat from dual union all
 11  select sysdate + 9 dt,  1 id, sysdate + 10 dat from dual union all
 12  select sysdate + 10 dt, 2 id, sysdate + 11 dat from dual)
 13  select id, dt, d2
 14  from
 15   (select id, dt, dcd,
 16    nvl(first_value(decode(dcd, 1, lg) ignore nulls) over (order by dt rows between 1 following and unbounded following),
 17        dat
 18      ) d2
 19    from
 20     (select dt, id, dat,
 21      lag(dat)       over (order by dt)            lg,
 22      decode(lag(id) over (order by dt), id, 0, 1) dcd
 23      from tb
 24     )
 25   )
 26  where dcd = 1;

        ID DT         D2
---------- ---------- ----------
         1 15.03.2011 18.03.2011
         2 18.03.2011 20.03.2011
         1 20.03.2011 21.03.2011
         2 21.03.2011 22.03.2011
         1 22.03.2011 25.03.2011
         2 25.03.2011 26.03.2011

6 строк выбрано.


План выполнения
----------------------------------------------------------
Plan hash value: 3539063114

-------------------------------------------------------------------------
| Id  | Operation        | Name | Rows  | Bytes | Cost (%CPU)| Time     |
-------------------------------------------------------------------------
|   0 | SELECT STATEMENT |      |    11 |   264 |    23   (5)| 00:00:01 |
|*  1 |  VIEW            |      |    11 |   264 |    23   (5)| 00:00:01 |
|   2 |   WINDOW BUFFER  |      |    11 |   363 |    23   (5)| 00:00:01 |
|   3 |    VIEW          |      |    11 |   363 |    23   (5)| 00:00:01 |
|   4 |     WINDOW SORT  |      |    11 |   231 |    23   (5)| 00:00:01 |
|   5 |      VIEW        |      |    11 |   231 |    22   (0)| 00:00:01 |
|   6 |       UNION-ALL  |      |       |       |            |          |
|   7 |        FAST DUAL |      |     1 |       |     2   (0)| 00:00:01 |
|   8 |        FAST DUAL |      |     1 |       |     2   (0)| 00:00:01 |
|   9 |        FAST DUAL |      |     1 |       |     2   (0)| 00:00:01 |
|  10 |        FAST DUAL |      |     1 |       |     2   (0)| 00:00:01 |
|  11 |        FAST DUAL |      |     1 |       |     2   (0)| 00:00:01 |
|  12 |        FAST DUAL |      |     1 |       |     2   (0)| 00:00:01 |
|  13 |        FAST DUAL |      |     1 |       |     2   (0)| 00:00:01 |
|  14 |        FAST DUAL |      |     1 |       |     2   (0)| 00:00:01 |
|  15 |        FAST DUAL |      |     1 |       |     2   (0)| 00:00:01 |
|  16 |        FAST DUAL |      |     1 |       |     2   (0)| 00:00:01 |
|  17 |        FAST DUAL |      |     1 |       |     2   (0)| 00:00:01 |
-------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   1 - filter("DCD"=1)


Статистика
----------------------------------------------------------
          1  recursive calls
          0  db block gets
          0  consistent gets
          0  physical reads
          0  redo size
        698  bytes sent via SQL*Net to client
        419  bytes received via SQL*Net from client
          2  SQL*Net roundtrips to/from client
          2  sorts (memory)
          0  sorts (disk)
          6  rows processed

SQL>
andreymx
Дата: 15.03.2011 22:23:38
не корысти ради :)
with tb as
(
    select sysdate      dt, 1 id, sysdate + 1 dat from dual union
    select sysdate +  1 dt, 1 id, sysdate + 2 dat from dual union
    select sysdate +  2 dt, 1 id, sysdate + 3 dat from dual union
    select sysdate +  3 dt, 2 id, sysdate + 4 dat from dual union
    select sysdate +  4 dt, 2 id, sysdate + 5 dat from dual union
    select sysdate +  5 dt, 1 id, sysdate + 6 dat from dual union
    select sysdate +  6 dt, 2 id, sysdate + 7 dat from dual union
    select sysdate +  7 dt, 1 id, sysdate + 8 dat from dual union
    select sysdate +  8 dt, 1 id, sysdate + 9 dat from dual union
    select sysdate +  9 dt, 1 id, sysdate + 10 dat from dual union
    select sysdate + 10 dt, 2 id, sysdate + 11 dat from dual
)
select min(dt), max(dt), id
from
(
    select id, dt, trim(sys_connect_by_path(case when level = 1 then dt end, ' ')) grp
      from tb
     start with not exists(select  null from tb tb1 where tb.dt = tb1.dat and tb.id = tb1.id)
     connect by dt=prior dat
           and id = prior id
)
group by id, grp
parkan4k
Дата: 15.04.2011 10:02:52
with t as(
select sysdate dt, 1 id, sysdate + 1 dat from dual union
select sysdate + 1 dt, 1 id, sysdate + 2 dat from dual union
select sysdate + 2 dt, 1 id, sysdate + 3 dat from dual union
select sysdate + 3 dt, 2 id, sysdate + 4 dat from dual union
select sysdate + 4 dt, 2 id, sysdate + 5 dat from dual union
select sysdate + 5 dt, 1 id, sysdate + 6 dat from dual union
select sysdate + 6 dt, 2 id, sysdate + 7 dat from dual union
select sysdate + 7 dt, 1 id, sysdate + 8 dat from dual union
select sysdate + 8 dt, 1 id, sysdate + 9 dat from dual union
select sysdate + 9 dt, 1 id, sysdate + 10 dat from dual union
select sysdate + 10 dt, 2 id, sysdate + 11 dat from dual), t2 as
(
select r,dt,id, dat,part from t
model
dimension by (row_number() over (order by dt) r)
measures(dt,id, dat, 0 part)
rules(
part[r>1]=case when id[cv()-1]=id[cv()] then part[cv()-1] else part[cv()-1]+1  end
)
)
select max(id), min(dt), max(dat) from t2 group by part
order by part