feat: linear imputation in Resample (#19393)
This commit is contained in:
parent
6b9113a17b
commit
a39dd4493e
|
|
@ -170,6 +170,7 @@ export const advancedAnalyticsControls: ControlPanelSectionConfig = {
|
|||
choices: [
|
||||
['asfreq', 'Null imputation'],
|
||||
['zerofill', 'Zero imputation'],
|
||||
['linear', 'Linear interpolation'],
|
||||
['ffill', 'Forward values'],
|
||||
['bfill', 'Backward values'],
|
||||
['median', 'Median values'],
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ import pandas as pd
|
|||
from flask_babel import gettext as _
|
||||
|
||||
from superset.exceptions import InvalidPostProcessingError
|
||||
from superset.utils.pandas_postprocessing.utils import RESAMPLE_METHOD
|
||||
|
||||
|
||||
def resample(
|
||||
|
|
@ -40,9 +41,15 @@ def resample(
|
|||
"""
|
||||
if not isinstance(df.index, pd.DatetimeIndex):
|
||||
raise InvalidPostProcessingError(_("Resample operation requires DatetimeIndex"))
|
||||
if method not in RESAMPLE_METHOD:
|
||||
raise InvalidPostProcessingError(
|
||||
_("Resample method should in ") + ", ".join(RESAMPLE_METHOD) + "."
|
||||
)
|
||||
|
||||
if method == "asfreq" and fill_value is not None:
|
||||
_df = df.resample(rule).asfreq(fill_value=fill_value)
|
||||
elif method == "linear":
|
||||
_df = df.resample(rule).interpolate()
|
||||
else:
|
||||
_df = getattr(df.resample(rule), method)()
|
||||
return _df
|
||||
|
|
|
|||
|
|
@ -92,6 +92,8 @@ PROPHET_TIME_GRAIN_MAP = {
|
|||
"P1W/1970-01-04T00:00:00Z": "W",
|
||||
}
|
||||
|
||||
RESAMPLE_METHOD = ("asfreq", "bfill", "ffill", "linear", "median", "mean", "sum")
|
||||
|
||||
FLAT_COLUMN_SEPARATOR = ", "
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -14,8 +14,10 @@
|
|||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pytest
|
||||
from pandas import to_datetime
|
||||
|
||||
from superset.exceptions import InvalidPostProcessingError
|
||||
from superset.utils import pandas_postprocessing as pp
|
||||
|
|
@ -151,3 +153,47 @@ def test_resample_should_raise_ex():
|
|||
pp.resample(
|
||||
df=categories_df, rule="1D", method="asfreq",
|
||||
)
|
||||
|
||||
with pytest.raises(InvalidPostProcessingError):
|
||||
pp.resample(
|
||||
df=timeseries_df, rule="1D", method="foobar",
|
||||
)
|
||||
|
||||
|
||||
def test_resample_linear():
|
||||
df = pd.DataFrame(
|
||||
index=to_datetime(["2019-01-01", "2019-01-05", "2019-01-08"]),
|
||||
data={"label": ["a", "e", "j"], "y": [1.0, 5.0, 8.0]},
|
||||
)
|
||||
post_df = pp.resample(df=df, rule="1D", method="linear")
|
||||
"""
|
||||
label y
|
||||
2019-01-01 a 1.0
|
||||
2019-01-02 NaN 2.0
|
||||
2019-01-03 NaN 3.0
|
||||
2019-01-04 NaN 4.0
|
||||
2019-01-05 e 5.0
|
||||
2019-01-06 NaN 6.0
|
||||
2019-01-07 NaN 7.0
|
||||
2019-01-08 j 8.0
|
||||
"""
|
||||
assert post_df.equals(
|
||||
pd.DataFrame(
|
||||
index=pd.to_datetime(
|
||||
[
|
||||
"2019-01-01",
|
||||
"2019-01-02",
|
||||
"2019-01-03",
|
||||
"2019-01-04",
|
||||
"2019-01-05",
|
||||
"2019-01-06",
|
||||
"2019-01-07",
|
||||
"2019-01-08",
|
||||
]
|
||||
),
|
||||
data={
|
||||
"label": ["a", np.NaN, np.NaN, np.NaN, "e", np.NaN, np.NaN, "j"],
|
||||
"y": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
|
||||
},
|
||||
)
|
||||
)
|
||||
|
|
|
|||
Loading…
Reference in New Issue