问题

预测模型执行代码过慢,想要快速预测。
每次都是将历史数据作为训练数据喂给模型,然后让模型输出预测数据。这个模型的训练过程太慢。

解决

(1)讲训练的模型存储起来,每次预测的时候直接用存储的模型进行预测,躲开训练环节;
(2)有新数据的时候,可以在不紧急的时候训练模型并存储起来;依照执行速度,可以设定更新频率,比如2个小时更新一次模型,1天更新一次模型。

实现

建立路径:
在这里插入图片描述
quikly_predict.py 内容

import os

from fbprophet import Prophet
import pandas as pd
import matplotlib.pyplot as plt
import math
import pickle



class data_quikly_predict:
    pwd = os.path.dirname(os.path.abspath(__file__))
    data_dir = os.path.join(pwd, "data")

    def __init__(self):
        if not os.path.exists(self.data_dir):
            os.mkdir(self.data_dir)
        self.model_list = self.find_all_file(self.data_dir)

    def update_model_list(self):
        self.model_list = self.find_all_file(self.data_dir)

    def find_all_file(self, dirname):
        result = []
        for maindir, subdir, file_name_list in os.walk(dirname):
            for filename in file_name_list:
                apath = os.path.join(maindir, filename)
                result.append(apath)
        return result

    def create_new_model(self, df: pd.DataFrame, new_model_name: str):
        """
        :param df: 训练新模型所需要的数据
        :param new_model_name: 模型名称
        :return:
        """
        assert ("ds" in df.columns) and ("y" in df.columns)
        df['ds'] = df['ds'].astype('datetime64[ns]')
        model = Prophet(yearly_seasonality=True, weekly_seasonality=True)
        model.fit(df)  # 训练模型m

        model_file = os.path.join(self.data_dir, new_model_name)
        if model_file in self.model_list:
            print("模型名称已经被使用,但即将被置换\r\n")

        with open(model_file, 'wb') as f:
            pickle.dump(model, f)

        self.update_model_list()

    def predict(self, future_time: pd.DataFrame, model_name: str):
        """
        :param future_time:未来需要预测的时间点DataFrame,只有一行'ds'
        :param model_name:想要使用的模型名称
        :return:
        """
        model_file = os.path.join(self.data_dir, model_name)

        if model_file not in self.model_list:
            raise print("没有在data里找到这个模型文件,请确保用数据训练出了可使用的模型\r\n")
        with open(model_file, 'rb') as f:
            model = pickle.load(f)
            forecast = model.predict(future_time)  # 开始预测
            forecast = forecast[['ds', 'yhat']]
        return forecast

    def dummy_predict(self, periods, freq: str, model_name: str):
        """
        傻瓜化预测
        :param periods: 需要预测多少个数据结果
        :param freq: 从训练数据末尾调用频率
        :param model_name: 模型名称
        :return:
        """
        model_file = os.path.join(self.data_dir, model_name)
        if model_file not in self.model_list:
            raise print("没有在data里找到这个模型文件,请确保用数据训练出了可使用的模型\r\n")
        with open(model_file, 'rb') as f:
            model = pickle.load(f)
        future_time = model.make_future_dataframe(periods=periods, freq=freq, include_history=False)  # 预测的设置 还没预测
        forecast = model.predict(future_time)  # 开始预测
        forecast = forecast[['ds', 'yhat']]
        return forecast

    def quikly_plot_forecast(self, forecast: pd.DataFrame):
        plt.plot(list(forecast['ds']), list(forecast['yhat']), color='b')
        plt.show()



if __name__ == '__main__':
    dataqp = data_quikly_predict()

    timelist = list(pd.date_range(start='2021-01-01 00:00:00', end='2022-01-01 00:00:00', freq='H'))
    y = [math.sin(data.hour) for k, data in enumerate(timelist)]
    data_df = pd.DataFrame({'ds': timelist, 'y': y})

    dataqp.create_new_model(data_df, "camera1")

    future = pd.DataFrame({'ds': list(pd.date_range(start='2022-05-01 00:00:00',
                                                    end='2022-05-05 01:00:00',
                                                    freq='H'))})  # 预测的设置 还没预测
    forecast_res = dataqp.predict(future, model_name="camera1")
    dataqp.quikly_plot_forecast(forecast_res)

如何使用

下面代码是正常过程,先执行训练,然后预测数据。

import math

import pandas as pd

from fbprophet_persistent_prodict.quikly_predict import data_quikly_predict

dataqp = data_quikly_predict()

timelist = list(pd.date_range(start='2021-01-01 00:00:00', end='2022-01-01 00:00:00', freq='H'))
y = [math.sin(data.hour) for k, data in enumerate(timelist)]
data_df = pd.DataFrame({'ds': timelist, 'y': y})

dataqp.create_new_model(data_df, "camera6")

future = pd.DataFrame({'ds': list(pd.date_range(start='2022-05-01 00:00:00',
                                            end='2022-05-05 01:00:00',
                                            freq='H'))})  # 预测的设置 还没预测
forecast_res = dataqp.predict(future, model_name="camera6")
dataqp.quikly_plot_forecast(forecast_res)

先预测,后用训练更新模型:

import math
import pandas as pd
from fbprophet_persistent_prodict.quikly_predict import data_quikly_predict

dataqp = data_quikly_predict()
future = pd.DataFrame({'ds': list(pd.date_range(start='2022-05-01 00:00:00',
                                            end='2022-05-05 01:00:00',
                                            freq='H'))})  # 预测的设置 还没预测
forecast_res = dataqp.predict(future, model_name="camera6")



timelist = list(pd.date_range(start='2021-01-01 00:00:00', end='2022-01-01 00:00:00', freq='H'))
y = [math.sin(data.hour) for k, data in enumerate(timelist)]
data_df = pd.DataFrame({'ds': timelist, 'y': y})
dataqp.create_new_model(data_df, "camera6")
Logo

技术共进,成长同行——讯飞AI开发者社区

更多推荐