Как оптимизировать время выполнения скрипта?

its_me
Дата: 27.11.2014 14:20:14
Один и тот же скрипт с разным условием выбирает разные методы выполнения и как следствие разное время на выходе, как это исправить оставив метод с минимальным временем выполнения?

Инфа:

Скрипт 1:

explain analyze SELECT count(DISTINCT c.card_id) 
FROM  card c 

		 JOIN attribute_value av99 ON ( 
			 		(av99.card_id = c.card_id) 
					AND ( av99.attribute_code IN ('NAME'))
					AND ( av99.string_value IS NOT NULL)
					AND UPPER(av99.string_value) like UPPER('%пись%') 
		 ) 
	INNER JOIN card_access as ca ON ( 
		ca.object_id = c.status_id AND 
		ca.template_id = c.template_id AND 
		ca.permission_type in (2) 
	) 
	LEFT JOIN attribute_value as avUsr ON ( 
		avUsr.card_id = c.card_id AND 
		avUsr.attribute_code = ca.person_attribute_code AND 
		avUsr.number_value = 7820  
	) 
WHERE (1=1) 
AND( 
		(ca.role_code IS NULL AND ca.person_attribute_code IS NULL) 
		OR (  
			(  
				ca.role_code is NULL  
				OR EXISTS(  
						select 1  
						from person_role pr left join person_role_template prt  
							on pr.prole_id = prt.prole_id  
						where   
							COALESCE(prt.template_id, ca.template_id) = ca.template_id  
							and pr.role_code = ca.role_code  
							and pr.person_id = 7820  
				) -- OR EXISTS  
			) --   
			AND (  
				ca.person_attribute_code IS NULL  
				OR avUsr.attribute_code is not NULL  
			) -- AND  
		) -- OR  
) -- AND 

		 AND c.template_id = (2300) 

		 AND c.status_id IN (4,7);


Результат:

"Aggregate  (cost=1511.95..1511.96 rows=1 width=6) (actual time=7435.362..7435.362 rows=1 loops=1)"
" -> Nested Loop Left Join (cost=51.90..1511.95 rows=1 width=6) (actual time=6291.725..7434.568 rows=1 loops=1)"
" Filter: (((ca.role_code IS NULL) AND (ca.person_attribute_code IS NULL)) OR (((ca.role_code IS NULL) OR (SubPlan 1)) AND ((ca.person_attribute_code IS NULL) OR (avusr.attribute_code IS NOT NULL))))"
" -> Nested Loop (cost=51.33..1417.82 rows=1 width=33) (actual time=6291.717..7434.560 rows=1 loops=1)"
" -> Nested Loop (cost=51.05..1416.65 rows=1 width=16) (actual time=6291.684..7434.523 rows=1 loops=1)"
" -> Bitmap Heap Scan on attribute_value av99 (cost=50.62..1213.49 rows=24 width=6) (actual time=1260.930..3970.662 rows=582949 loops=1)"
" Recheck Cond: ((upper((string_value)::text) ~~ '%ПИСЬ%'::text) AND ((attribute_code)::text = 'NAME'::text))"
" Rows Removed by Index Recheck: 10"
" Filter: (string_value IS NOT NULL)"
" -> Bitmap Index Scan on test_pg_trgm_string_value_idx (cost=0.00..50.61 rows=293 width=0) (actual time=1098.093..1098.093 rows=584965 loops=1)"
" Index Cond: (upper((string_value)::text) ~~ '%ПИСЬ%'::text)"
" -> Index Scan using test_card2_id_idx on card c (cost=0.43..8.46 rows=1 width=16) (actual time=0.005..0.005 rows=0 loops=582949)"
" Index Cond: (card_id = av99.card_id)"
" Filter: ((status_id = ANY ('{4,7}'::numeric[])) AND (template_id = 2300::numeric))"
" Rows Removed by Filter: 1"
" -> Index Scan using card_access_idx_tmpl_objid on card_access ca (cost=0.29..1.17 rows=1 width=32) (actual time=0.019..0.023 rows=1 loops=1)"
" Index Cond: ((object_id = c.status_id) AND (template_id = 2300::numeric))"
" Filter: (permission_type = 2::numeric)"
" Rows Removed by Filter: 3"
" -> Index Scan using xif6attribute_value on attribute_value avusr (cost=0.57..16.36 rows=1 width=21) (actual time=0.001..0.001 rows=0 loops=1)"
" Index Cond: ((card_id = c.card_id) AND ((attribute_code)::text = (ca.person_attribute_code)::text))"
" Filter: (number_value = 7820::numeric)"
" SubPlan 1"
" -> Nested Loop Left Join (cost=0.70..77.76 rows=1 width=0) (never executed)"
" Filter: (COALESCE(prt.template_id, ca.template_id) = ca.template_id)"
" -> Index Scan using xif1person_role on person_role pr (cost=0.28..12.76 rows=1 width=6) (never executed)"
" Index Cond: (person_id = 7820::numeric)"
" Filter: ((role_code)::text = (ca.role_code)::text)"
" -> Index Scan using xif2person_role_template on person_role_template prt (cost=0.42..64.60 rows=32 width=12) (never executed)"
" Index Cond: (pr.prole_id = prole_id)"
"Total runtime: 7435.589 ms"

Скрипт 2:

explain analyze SELECT count(DISTINCT c.card_id) 
FROM  card c 

		 JOIN attribute_value av99 ON ( 
			 		(av99.card_id = c.card_id) 
					AND ( av99.attribute_code IN ('NAME'))
					AND ( av99.string_value IS NOT NULL)
					AND UPPER(av99.string_value) like UPPER('%пис%') 
		 ) 
	INNER JOIN card_access as ca ON ( 
		ca.object_id = c.status_id AND 
		ca.template_id = c.template_id AND 
		ca.permission_type in (2) 
	) 
	LEFT JOIN attribute_value as avUsr ON ( 
		avUsr.card_id = c.card_id AND 
		avUsr.attribute_code = ca.person_attribute_code AND 
		avUsr.number_value = 7820  
	) 
WHERE (1=1) 
AND( 
		(ca.role_code IS NULL AND ca.person_attribute_code IS NULL) 
		OR (  
			(  
				ca.role_code is NULL  
				OR EXISTS(  
						select 1  
						from person_role pr left join person_role_template prt  
							on pr.prole_id = prt.prole_id  
						where   
							COALESCE(prt.template_id, ca.template_id) = ca.template_id  
							and pr.role_code = ca.role_code  
							and pr.person_id = 7820  
				) -- OR EXISTS  
			) --   
			AND (  
				ca.person_attribute_code IS NULL  
				OR avUsr.attribute_code is not NULL  
			) -- AND  
		) -- OR  
) -- AND 

		 AND c.template_id = (2300) 

		 AND c.status_id IN (4,7);


Результат:

"Aggregate  (cost=3300.11..3300.12 rows=1 width=6) (actual time=2.329..2.329 rows=1 loops=1)"
" -> Nested Loop Left Join (cost=14.68..3300.10 rows=1 width=6) (actual time=0.257..2.292 rows=6 loops=1)"
" Filter: (((ca.role_code IS NULL) AND (ca.person_attribute_code IS NULL)) OR (((ca.role_code IS NULL) OR (SubPlan 1)) AND ((ca.person_attribute_code IS NULL) OR (avusr.attribute_code IS NOT NULL))))"
" -> Nested Loop (cost=14.12..3205.98 rows=1 width=33) (actual time=0.250..2.280 rows=6 loops=1)"
" -> Nested Loop (cost=13.44..887.25 rows=184 width=33) (actual time=0.114..0.555 rows=214 loops=1)"
" -> Bitmap Heap Scan on card_access ca (cost=4.45..43.07 rows=16 width=32) (actual time=0.022..0.027 rows=29 loops=1)"
" Recheck Cond: ((permission_type = 2::numeric) AND (template_id = 2300::numeric))"
" -> Bitmap Index Scan on card_access_idx_tmpl_perm_objid (cost=0.00..4.45 rows=16 width=0) (actual time=0.016..0.016 rows=29 loops=1)"
" Index Cond: ((permission_type = 2::numeric) AND (template_id = 2300::numeric))"
" -> Bitmap Heap Scan on card c (cost=8.99..52.65 rows=11 width=16) (actual time=0.011..0.015 rows=7 loops=29)"
" Recheck Cond: ((template_id = 2300::numeric) AND (status_id = ca.object_id) AND (status_id = ANY ('{4,7}'::numeric[])))"
" -> Bitmap Index Scan on template_and_status_idx (cost=0.00..8.98 rows=11 width=0) (actual time=0.006..0.006 rows=7 loops=29)"
" Index Cond: ((template_id = 2300::numeric) AND (status_id = ca.object_id) AND (status_id = ANY ('{4,7}'::numeric[])))"
" -> Index Scan using attribute_value_name_string_value_idx on attribute_value av99 (cost=0.68..12.59 rows=1 width=6) (actual time=0.008..0.008 rows=0 loops=214)"
" Index Cond: ((card_id = c.card_id) AND (string_value IS NOT NULL))"
" Filter: (upper((string_value)::text) ~~ '%ПИС%'::text)"
" Rows Removed by Filter: 1"
" -> Index Scan using xif6attribute_value on attribute_value avusr (cost=0.57..16.36 rows=1 width=21) (actual time=0.001..0.001 rows=0 loops=6)"
" Index Cond: ((card_id = c.card_id) AND ((attribute_code)::text = (ca.person_attribute_code)::text))"
" Filter: (number_value = 7820::numeric)"
" SubPlan 1"
" -> Nested Loop Left Join (cost=0.70..77.76 rows=1 width=0) (never executed)"
" Filter: (COALESCE(prt.template_id, ca.template_id) = ca.template_id)"
" -> Index Scan using xif1person_role on person_role pr (cost=0.28..12.76 rows=1 width=6) (never executed)"
" Index Cond: (person_id = 7820::numeric)"
" Filter: ((role_code)::text = (ca.role_code)::text)"
" -> Index Scan using xif2person_role_template on person_role_template prt (cost=0.42..64.60 rows=32 width=12) (never executed)"
" Index Cond: (pr.prole_id = prole_id)"
"Total runtime: 2.462 ms"

Индексы:

(данный индекс создавался для другого похожего запроса и оправдал себя в нём на 2000%, в отличие от данного)
CREATE INDEX test_pg_trgm_string_value_idx
  ON attribute_value
  USING gist
  (upper(string_value::text) COLLATE pg_catalog."default" gist_trgm_ops)
  WHERE attribute_code::text = 'NAME'::text;


CREATE INDEX attribute_value_name_string_value_idx
  ON attribute_value
  USING btree
  (card_id, string_value COLLATE pg_catalog."default")
  WHERE attribute_code::text = 'NAME'::text;


Разница в скриптах в отличие от времени их выполнения смешная:
%пись%
и
%пис%
.
Maxim Boguk
Дата: 27.11.2014 15:00:12
its_me,

тут явно неудачно оценивается в первом запросе селективность триграмного индекса
можно отключить его использование костылем через замену
AND UPPER(av99.string_value) like UPPER('%пись%') 

на например
UPPER(av99.string_value)||'' like UPPER('%пись%')


--Maxim Boguk
www.postgresql-consulting.ru
its_me
Дата: 27.11.2014 15:02:32
Забыл про один момент - скрипт неизменен(вшит в приложение безвозвратно).
Maxim Boguk
Дата: 27.11.2014 15:11:07
its_me
Забыл про один момент - скрипт неизменен(вшит в приложение безвозвратно).


никак или удалить trigram index

--Maxim Boguk
www.postgresql-consulting.ru
its_me
Дата: 27.11.2014 15:19:01
Maxim Boguk
its_me
Забыл про один момент - скрипт неизменен(вшит в приложение безвозвратно).


никак или удалить trigram index

--Maxim Boguk
www.postgresql-consulting.ru


а причину почему он так меняет своё настроение от одной буквы(варианты, предположения)?
Maxim Boguk
Дата: 27.11.2014 15:59:22
its_me
Maxim Boguk
пропущено...


никак или удалить trigram index

--Maxim Boguk
www.postgresql-consulting.ru


а причину почему он так меняет своё настроение от одной буквы(варианты, предположения)?


прогоните:

explain analyze select count(*) from attribute_value where UPPER(string_value) like UPPER('%пис%');
explain analyze select count(*) from attribute_value where UPPER(string_value) like UPPER('%пись%');

explain analyze select count(*) from attribute_value where UPPER(string_value) like UPPER('%пис%') and attribute_code IN ('NAME');
explain analyze select count(*) from attribute_value where UPPER(string_value) like UPPER('%пись%') and attribute_code IN ('NAME');



тогда я что то смогу сказать.

Но на практике использование attribute_value таблиц - к проблемам на любой базе ВСЕГДА И БЕЗ ВАРИАНТОВ.
Это самый худший известный мне дизайн-антипаттерн для базы.

--Maxim Boguk
www.postgresql-consulting.ru
its_me
Дата: 27.11.2014 16:26:13
explain analyze select count(*) from attribute_value where UPPER(string_value) like UPPER('%пис%');

"Aggregate  (cost=2658611.39..2658611.40 rows=1 width=0) (actual time=60314.300..60314.300 rows=1 loops=1)"
" -> Seq Scan on attribute_value (cost=0.00..2658529.12 rows=32908 width=0) (actual time=0.064..60107.549 rows=1327281 loops=1)"
" Filter: (upper((string_value)::text) ~~ '%ПИС%'::text)"
" Rows Removed by Filter: 97970140"
"Total runtime: 60314.370 ms"
explain analyze select count(*) from attribute_value where UPPER(string_value) like UPPER('%пись%');

"Aggregate  (cost=2658902.64..2658902.65 rows=1 width=0) (actual time=34218.475..34218.475 rows=1 loops=1)"
" -> Seq Scan on attribute_value (cost=0.00..2658876.92 rows=10285 width=0) (actual time=0.097..34053.918 rows=1171865 loops=1)"
" Filter: (upper((string_value)::text) ~~ '%ПИСЬ%'::text)"
" Rows Removed by Filter: 98126052"
"Total runtime: 34218.552 ms"
explain analyze select count(*) from attribute_value where UPPER(string_value) like UPPER('%пис%') and attribute_code IN ('NAME');

"Aggregate  (cost=3844.76..3844.77 rows=1 width=0) (actual time=4449.045..4449.046 rows=1 loops=1)"
" -> Bitmap Heap Scan on attribute_value (cost=151.69..3842.42 rows=939 width=0) (actual time=1354.105..4364.992 rows=657892 loops=1)"
" Recheck Cond: ((upper((string_value)::text) ~~ '%ПИС%'::text) AND ((attribute_code)::text = 'NAME'::text))"
" -> Bitmap Index Scan on test_pg_trgm_string_value_idx (cost=0.00..151.46 rows=939 width=0) (actual time=1180.696..1180.696 rows=660266 loops=1)"
" Index Cond: (upper((string_value)::text) ~~ '%ПИС%'::text)"
"Total runtime: 4450.540 ms"
explain analyze select count(*) from attribute_value where UPPER(string_value) like UPPER('%пись%') and attribute_code IN ('NAME');

"Aggregate  (cost=1214.30..1214.31 rows=1 width=0) (actual time=4087.474..4087.474 rows=1 loops=1)"
" -> Bitmap Heap Scan on attribute_value (cost=50.69..1213.57 rows=293 width=0) (actual time=1561.460..4022.354 rows=583112 loops=1)"
" Recheck Cond: ((upper((string_value)::text) ~~ '%ПИСЬ%'::text) AND ((attribute_code)::text = 'NAME'::text))"
" Rows Removed by Index Recheck: 10"
" -> Bitmap Index Scan on test_pg_trgm_string_value_idx (cost=0.00..50.61 rows=293 width=0) (actual time=1404.071..1404.071 rows=585265 loops=1)"
" Index Cond: (upper((string_value)::text) ~~ '%ПИСЬ%'::text)"
"Total runtime: 4088.617 ms"
Maxim Boguk
Дата: 27.11.2014 16:50:29
its_me,

сложно сказать
но основная проблема в том что база ожидает 10000 строк отUPPER(string_value) like UPPER('%пись%')
а получает миллион строк...
какой уж тут эффективный план угадать.

без переписывания запросы вы эту задачу не решите.
its_me
Дата: 27.11.2014 16:59:39
можно ли создать другой индекс, чтобы на него клюнул, например типа:

CREATE INDEX test_pg_trgm_card_id_string_value_idx
  ON attribute_value
  USING gist
  (card_id, upper(string_value::text) gist_trgm_ops)
  WHERE attribute_code = 'NAME';

но такой выдает ошибку...
Maxim Boguk
Дата: 27.11.2014 17:09:19
its_me,

для такого индекса надо btree_gin contrib доставить (и то без гарантий)