I am trying to create a view using a table have more than 5 billion rows.
using the following query:-
with
date_month as
(select distinct as_on_month from prod_edp_db_trusted.keyword_metrics_serp_dataforseo_v2_details
where as_on_date >= date_add('Month',-2,current_date)
),
sim_data as
(select *,date as sim_date,substring(date,1,4)||substring(date,6,2) as sim_yr_month
from ( select tenant,locale,search_engine,device_type,url,keyword,traffic,cast(date as varchar(255)) as
date,cast(organic_rank as decimal(38,0)) as organic_rank,create_date
,row_number() over(partition by tenant,locale,search_engine,device_type,url,keyword order by date
desc,create_date) as rn
from prod_edp_db_trusted.simulated_score_v3_4 WHERE save_type='simulation' ) where rn=1
),
serp_data as
(
select * from(
select sim.tenant,sim.locale,sim.search_engine,sim.device_type,sim.url,sim.keyword,sim.traffic,sim.organic_rank
as sim_rank,sim.sim_date as simulation_date,sim.sim_yr_month,srp.position as rank,
srp_mn.as_on_month as serp_year_month, srp.as_on_date as serp_as_on_date,
row_number() over(partition by
sim.tenant,sim.locale,sim.search_engine,sim.device_type,sim.url,sim.keyword,srp_mn.as_on_month
order by srp.as_on_date desc,srp.created_date) as rn
from sim_data sim
join date_month as srp_mn
on srp_mn.as_on_month<=sim.sim_yr_month or srp_mn.as_on_month>=sim.sim_yr_month
left join prod_edp_db_trusted.keyword_metrics_serp_dataforseo_v2_details srp
on srp.locale=sim.locale and srp.search_engine=sim.search_engine and srp.device_type=sim.device_type
and srp.url=sim.url and srp.keyword=sim.keyword --and srp.as_on_date>=sim.sim_date
and srp.as_on_month=srp_mn.as_on_month and
srp.as_on_date>=date_add('Month',-2,date_trunc('Month',current_date))
) where rn=1
),
sv_data AS
(
select tenant,locale,url,search_engine,keyword,search_volume,sv_yr_month
from
(
select
LKP.tenant
,MAIN.locale
,lkp.url
,MAIN.search_engine
,MAIN.keyword
,MAIN.count as search_volume
,MAIN.as_on_month as sv_yr_month
,row_number() over(partition by lkp.tenant,main.locale,lkp.url,main.search_engine,main.keyword
,as_on_month order by as_on_date desc,created_date) rn
from prod_edp_db_trusted.keyword_metrics_search_volume_dataforseo_v2 MAIN
JOIN (SELECT tenant,locale,url,search_engine,keyword,Min(SIM_YR_MONTH) AS SIM_YR_MONTH FROM
sim_data GROUP BY 1,2,3,4,5) LKP
ON MAIN.locale=LKP.locale AND MAIN.search_engine=LKP.search_engine and main.keyword=LKP.keyword
where MAIN.as_on_month IN(SELECT * FROM date_month )
) where rn=1
),
base_dataset AS
(
select
srp.tenant,srp.locale,srp.search_engine,srp.device_type,srp.url,srp.keyword,srp.sim_rank,srp.rank,srp.serp_year_
month,srp.traffic
,srp.serp_as_on_date,srp.simulation_date,srp.SIM_YR_MONTH
,sv.search_volume
from serp_data srp
left join sv_data sv on srp.tenant=sv.tenant and srp.locale=sv.locale and
srp.search_engine=sv.search_engine and srp.keyword=sv.keyword and srp.url=sv.url
and srp.serp_year_month=sv.sv_yr_month
),
sim_wavg as
(
(select * from base_dataset where sim_yr_month=serp_year_month)
union
(select tenant,locale,search_engine,device_type,url,keyword,sim_rank,rank,serp_year_month,traffic
,serp_as_on_date,simulation_date,SIM_YR_MONTH
,search_volume from (select *,row_number() over(partition by
tenant,locale,search_engine,device_type,url,keyword,simulation_date, SIM_YR_MONTH order by
serp_year_month) base_rn from base_dataset where sim_yr_month<(select min(as_on_month) from
date_month))t3 where base_rn=1 )
),
sim_latest_mnth as
(
select *
FROM
(
select *,row_number() over(partition by tenant,locale,search_engine,device_type,url,keyword ,simulation_date,
SIM_YR_MONTH order by serp_year_month desc) base_rn
from base_dataset
) t3 where base_rn=1
),
final_base AS
(
select
sim.tenant,sim.locale,sim.search_engine,sim.device_type,sim.url,sim.keyword,sim.sim_rank,sim.search_volume
,lkp.serp_year_month,lkp.serp_as_on_date,lkp.RANK as SERP_Rank,lkp.traffic
,sim.simulation_date,sim.SIM_YR_MONTH,lkp.search_volume as month_sv ,lst.search_volume as
latest_month_sv,sim.sim_rank-lst.rank as rank_diff
from sim_wavg sim
left join base_dataset lkp
on sim.tenant=lkp.tenant and sim.locale=lkp.locale and sim.search_engine=lkp.search_engine
and sim.device_type=lkp.device_type and sim.url=lkp.url and sim.keyword=lkp.keyword
left join sim_latest_mnth lst
on sim.tenant=lst.tenant and sim.locale=lst.locale and sim.search_engine=lst.search_engine
and sim.device_type=lst.device_type and sim.url=lst.url and sim.keyword=lst.keyword
)
select
tenant,locale,search_engine,device_type,url,keyword
,search_volume as "Simulation Month Search Volume",simulation_date,latest_month_sv,traffic as
"SIMULATION MONTH TRAFFIC"
,sim_rank as "Rank as on Last Simulation",rank_diff as "Rank Difference With Latest Month"
,kw_imp['202204'] as Current_Month
,kw_imp['202203'] as Last_Month
,kw_imp['202202'] as "2nd_Last_Month"
,kw_imp['202201'] as "3rd_Last_Month"
FROM
(
select
tenant,locale,search_engine,device_type,url,keyword,search_volume,latest_month_sv,simulation_date,traffic,si
m_rank ,rank_diff
,map_agg(serp_year_month,SERP_Rank) as kw_imp
from final_base
group by
tenant,locale,search_engine,device_type,url,keyword,search_volume,latest_month_sv,simulation_date,traffic,si
m_rank ,rank_diff
)
order by tenant,locale,search_engine,device_type,url,keyword;
Please make the effort to make your query readable here.