文章归档

在Oracle中手工对任务进行分区的方法

1. 根据dba_extents中的rowid分布,来得到需要处理的表的rowid范围以进行手工分区.
摘自 http://www.dautkhanov.com/2010/04/splitting-big-tables-for-parallel.html

CREATE OR REPLACE PROCEDURE  get_table_chunk_rowids2 (p_schema VARCHAR2, p_table VARCHAR2,
   p_chunks NUMBER, p_cur_chunk NUMBER,
   p_min_rowid OUT VARCHAR2, p_max_rowid OUT VARCHAR2
   )
AS
BEGIN
 SELECT q.r1, q.r2
   INTO p_min_rowid, p_max_rowid
 FROM
 (
  SELECT rownum rn
      ,  sys.DBMS_ROWID.rowid_create (1, d.oid, c.fid1, c.bid1, 0) r1
      ,  sys.DBMS_ROWID.rowid_create (1, d.oid, c.fid2, c.bid2, 9999) r2
  FROM (SELECT DISTINCT
         b.rn,
         FIRST_VALUE (a.fid)
          OVER ( PARTITION BY b.rn
          ORDER BY a.fid, a.bid
          ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) fid1,
         LAST_VALUE (a.fid)
          OVER ( PARTITION BY b.rn
          ORDER BY a.fid, a.bid
          ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) fid2,
         FIRST_VALUE (
     DECODE (SIGN (range2 - range1),
      1, a.bid + ( (b.rn - a.range1) * a.chunks1),
      a.bid) )
          OVER (
      PARTITION BY b.rn
      ORDER BY a.fid, a.bid
      ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) bid1,
         LAST_VALUE (
     DECODE (
        SIGN (range2 - range1),
        1, a.bid + ( (b.rn - a.range1 + 1) * a.chunks1) - 1,
        (a.bid + a.blocks - 1)))
          OVER (
      PARTITION BY b.rn
      ORDER BY a.fid, a.bid
      ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) bid2
    FROM (SELECT fid,
          bid,
          blocks,
          chunks1,
          TRUNC ( (sum2 - blocks + 1 - 0.1) / chunks1) range1,
          TRUNC ( (sum2 - 0.1) / chunks1) range2
     FROM (SELECT /*+ rule */
           relative_fno fid,
           block_id bid,
           blocks,
           SUM (blocks) OVER () sum1,
           TRUNC ( (SUM (blocks) OVER ()) / p_chunks) chunks1,
           SUM (blocks) OVER (ORDER BY relative_fno, block_id) sum2
      FROM dba_extents
     WHERE segment_name = UPPER (p_table) AND owner = UPPER(p_schema))
    WHERE sum1 > p_chunks) a,
         (    SELECT ROWNUM - 1 rn
         FROM DUAL
   CONNECT BY LEVEL <= p_chunks) b
   WHERE b.rn BETWEEN a.range1 AND a.range2) c,
  (SELECT MAX (data_object_id) oid
    FROM dba_objects
   WHERE     object_name = UPPER (p_table) AND owner = UPPER (p_schema)
         AND data_object_id IS NOT NULL) d
  ORDER BY d.oid, c.fid1, c.bid1
 ) q
 WHERE q.rn = p_cur_chunk;
END;

Jonathan Lewis也有类似的想法.
http://jonathanlewis.wordpress.com/2010/01/03/pseudo-parallel/

create table t1
pctfree 95
pctused  5
as
with generator as (
	select	--+ materialize
		rownum id
	from dual
	connect by
		rownum <= 10000
)
select
	rownum			id,
	lpad(rownum,10,'0')	small_vc,
	rpad('x',100)		padding
from
	generator	v1,
	generator	v2
where
	rownum <= 10000
;

select
	data_object_id
from 	user_objects
where	object_name = 'T1'
;

select
	extent_id, file_id, block_id, blocks
from
	dba_extents
where
	owner = 'TEST_USER'
and	segment_name = 'T1'
order by
	extent_id
;

set serveroutput off
set linesize 180

with rowid_range as (
	select
		/*+ materialize */
		dbms_rowid.rowid_create(
			1,
			&m_object,
			file_id,
			block_id,0
		)	low_rowid,
		dbms_rowid.rowid_create(
			1,
			&m_object,
			file_id,
			block_id+blocks-1,
			4095
		)	high_rowid
	from
		dba_extents
	where
		owner		= 'TEST_USER'
	and	segment_name	= 'T1'
	and	extent_id	= 8
)
select
	/*+
		gather_plan_statistics
		ordered
		use_nl(t1)
		rowid(t1)
	*/
	t1.rowid,
	t1.small_vc
from
	rowid_range rr,
	t1
where
	t1.rowid between rr.low_rowid and rr.high_rowid
;

select * from table(dbms_xplan.display_cursor(null,null,'allstats last'));

2. 我自己在工作为更新做的手工分任务的方法.

create table batch_job tablespace xxx as
select mod(rownum,50/* batch size*/) batch_id,id pk_id
from target_table
order by rowid;

create table batch_status tablespace xxx as
select batch_id,,0 flag/*imply is this batch processed or not processed*/
from (
  select distinct batch_id from batch_job
);

create batch_job_bid_ind on batch_job(batch_id) tablespace xxx;
create batch_status_pk on batch_status(batch_id) tablespace xxx;

create or replace procedure process_batch as
  v_batch_id number;
begin
  select batch_id into v_batch_id from batch_status where flag = 0 and rownum <= 1 skip locked;
  for rs in (select xxxx from batch_job a,target_table b where a.pk_id = b.id and a.batch_id = v_batch_id)
  loop
     statement processing;
  end loop;
  commit;
end;
/

--对此procedure 稍作修改, 就可以实现使用此procedure 来实现多个进程并发修改数据了. 不过前提是这个数据可以并行去处理,相互之间不要有什么依赖.

3. 如果你的数据库版本为11g, 可以使用DBMS_PARALLEL_EXECUTE来拆分任务.

No related posts.

1 comment to 在Oracle中手工对任务进行分区的方法

Leave a Reply

 

 

 

You can use these HTML tags

<a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <strike> <strong>