Hi!
I am trying to incorporate media "burn-in" effects by having n_media_times = n_times + max_lag, as described in the API reference (media attribute). I suspect that there is an issue in the code though as the DataFrameInputDataBuilder.with_media method sets time_col. In contrast, the DataFrameInputDataBuilder.with_organic_media sets media_time_col.
Thus when I have my input dataframe that has max_lag more rows in the beginning for my media data than for my kpi I get the following error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[107], line 13
1 data_loader = DataFrameDataLoader(
2 df=df,
3 coord_to_columns=coord_to_columns,
(...)
11 #organic_frequency_to_channel=organic_frequency_to_channel,
12 )
---> 13 data = data_loader.load()
File /opt/conda/lib/python3.10/site-packages/meridian/data/load.py:759, in DataFrameDataLoader.load(self)
754 # So now we can use one of the channel mapper dicts to get the canonical
755 # channel names for each column.
756 media_channel_names = [
757 self.media_to_channel[c] for c in self.coord_to_columns.media
758 ]
--> 759 builder.with_media(
760 self.df,
761 media_execution_columns,
762 media_spend_columns,
763 media_channel_names,
764 self.coord_to_columns.time,
765 self.coord_to_columns.geo,
766 )
768 if (
769 self.reach_to_channel is not None
770 and self.frequency_to_channel is not None
(...)
774 # listed in `reach`, `frequency`, and `rf_spend` are already validated
775 # to correspond to the same channels, in user-given order.
776 reach_columns = list(self.coord_to_columns.reach)
File /opt/conda/lib/python3.10/site-packages/meridian/data/data_frame_input_data_builder.py:381, in DataFrameInputDataBuilder.with_media(self, df, media_cols, media_spend_cols, media_channels, time_col, geo_col)
379 media_spend_data = media_df.set_index([geo_col, time_col])[media_spend_cols]
380 media_spend_data.columns = media_channels
--> 381 self.media_spend = (
382 media_spend_data.stack()
383 .rename(constants.MEDIA_SPEND)
384 .rename_axis([
385 constants.GEO,
386 constants.TIME,
387 constants.MEDIA_CHANNEL,
388 ])
389 .to_xarray()
390 )
391 return self
File /opt/conda/lib/python3.10/site-packages/meridian/data/input_data_builder.py:338, in InputDataBuilder.media_spend(self, media_spend)
336 self._media_spend = self._normalize_coords(media_spend, constants.TIME)
337 self.geos = self.media_spend.coords[constants.GEO].values.tolist()
--> 338 self.time_coords = self.media_spend.coords[constants.TIME].values.tolist()
File /opt/conda/lib/python3.10/site-packages/meridian/data/input_data_builder.py:85, in InputDataBuilder.time_coords(self, value)
83 raise ValueError('`times` coords must be unique.')
84 if self.time_coords is not None and set(self.time_coords) != set(value):
---> 85 raise ValueError(f'`times` coords already set to {self.time_coords}.')
86 if self.media_time_coords is not None and not set(value).issubset(
87 self.media_time_coords
88 ):
89 raise ValueError(
90 '`times` coords must be subset of previously set `media_times`'
91 ' coords.'
92 )
ValueError: `times` coords already set to ['2022-07-21', '2022-07-22', ...
It seems that there is a mismatch in the time value, which stems from the fact that my media data has more rows than my remaining data.
Hi!
I am trying to incorporate media "burn-in" effects by having
n_media_times = n_times + max_lag, as described in the API reference (media attribute). I suspect that there is an issue in the code though as the DataFrameInputDataBuilder.with_media method setstime_col. In contrast, the DataFrameInputDataBuilder.with_organic_media setsmedia_time_col.Thus when I have my input dataframe that has
max_lagmore rows in the beginning for my media data than for my kpi I get the following error:--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[107], line 13 1 data_loader = DataFrameDataLoader( 2 df=df, 3 coord_to_columns=coord_to_columns, (...) 11 #organic_frequency_to_channel=organic_frequency_to_channel, 12 ) ---> 13 data = data_loader.load() File /opt/conda/lib/python3.10/site-packages/meridian/data/load.py:759, in DataFrameDataLoader.load(self) 754 # So now we can use one of the channel mapper dicts to get the canonical 755 # channel names for each column. 756 media_channel_names = [ 757 self.media_to_channel[c] for c in self.coord_to_columns.media 758 ] --> 759 builder.with_media( 760 self.df, 761 media_execution_columns, 762 media_spend_columns, 763 media_channel_names, 764 self.coord_to_columns.time, 765 self.coord_to_columns.geo, 766 ) 768 if ( 769 self.reach_to_channel is not None 770 and self.frequency_to_channel is not None (...) 774 # listed in `reach`, `frequency`, and `rf_spend` are already validated 775 # to correspond to the same channels, in user-given order. 776 reach_columns = list(self.coord_to_columns.reach) File /opt/conda/lib/python3.10/site-packages/meridian/data/data_frame_input_data_builder.py:381, in DataFrameInputDataBuilder.with_media(self, df, media_cols, media_spend_cols, media_channels, time_col, geo_col) 379 media_spend_data = media_df.set_index([geo_col, time_col])[media_spend_cols] 380 media_spend_data.columns = media_channels --> 381 self.media_spend = ( 382 media_spend_data.stack() 383 .rename(constants.MEDIA_SPEND) 384 .rename_axis([ 385 constants.GEO, 386 constants.TIME, 387 constants.MEDIA_CHANNEL, 388 ]) 389 .to_xarray() 390 ) 391 return self File /opt/conda/lib/python3.10/site-packages/meridian/data/input_data_builder.py:338, in InputDataBuilder.media_spend(self, media_spend) 336 self._media_spend = self._normalize_coords(media_spend, constants.TIME) 337 self.geos = self.media_spend.coords[constants.GEO].values.tolist() --> 338 self.time_coords = self.media_spend.coords[constants.TIME].values.tolist() File /opt/conda/lib/python3.10/site-packages/meridian/data/input_data_builder.py:85, in InputDataBuilder.time_coords(self, value) 83 raise ValueError('`times` coords must be unique.') 84 if self.time_coords is not None and set(self.time_coords) != set(value): ---> 85 raise ValueError(f'`times` coords already set to {self.time_coords}.') 86 if self.media_time_coords is not None and not set(value).issubset( 87 self.media_time_coords 88 ): 89 raise ValueError( 90 '`times` coords must be subset of previously set `media_times`' 91 ' coords.' 92 ) ValueError: `times` coords already set to ['2022-07-21', '2022-07-22', ...It seems that there is a mismatch in the time value, which stems from the fact that my media data has more rows than my remaining data.