first commit
This commit is contained in:
888
андрюха1.ipynb
Normal file
888
андрюха1.ipynb
Normal file
@@ -0,0 +1,888 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Лабораторная работа 1\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "FileNotFoundError",
|
||||
"evalue": "[Errno 2] No such file or directory: 'C:\\\\Users\\\\Cyber\\\\Downloads\\\\daily-weather-dataset_chronological-order.xlsx'",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[0;32mIn[2], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_excel\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mr\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mC:\u001b[39;49m\u001b[38;5;124;43m\\\u001b[39;49m\u001b[38;5;124;43mUsers\u001b[39;49m\u001b[38;5;124;43m\\\u001b[39;49m\u001b[38;5;124;43mCyber\u001b[39;49m\u001b[38;5;124;43m\\\u001b[39;49m\u001b[38;5;124;43mDownloads\u001b[39;49m\u001b[38;5;124;43m\\\u001b[39;49m\u001b[38;5;124;43mdaily-weather-dataset_chronological-order.xlsx\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msheet_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdaily\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
|
||||
"File \u001b[0;32m~/Nextcloud/#Учёба/институт/#4 Курс/Системы искусственного интеллекта/Jupyter/venv/lib/python3.13/site-packages/pandas/io/excel/_base.py:495\u001b[0m, in \u001b[0;36mread_excel\u001b[0;34m(io, sheet_name, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, parse_dates, date_parser, date_format, thousands, decimal, comment, skipfooter, storage_options, dtype_backend, engine_kwargs)\u001b[0m\n\u001b[1;32m 493\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(io, ExcelFile):\n\u001b[1;32m 494\u001b[0m should_close \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m--> 495\u001b[0m io \u001b[38;5;241m=\u001b[39m \u001b[43mExcelFile\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 496\u001b[0m \u001b[43m \u001b[49m\u001b[43mio\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 497\u001b[0m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 498\u001b[0m \u001b[43m \u001b[49m\u001b[43mengine\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 499\u001b[0m \u001b[43m \u001b[49m\u001b[43mengine_kwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mengine_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 500\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 501\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m engine \u001b[38;5;129;01mand\u001b[39;00m engine \u001b[38;5;241m!=\u001b[39m io\u001b[38;5;241m.\u001b[39mengine:\n\u001b[1;32m 502\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 503\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mEngine should not be specified when passing \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 504\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124man ExcelFile - ExcelFile already has the engine set\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 505\u001b[0m )\n",
|
||||
"File \u001b[0;32m~/Nextcloud/#Учёба/институт/#4 Курс/Системы искусственного интеллекта/Jupyter/venv/lib/python3.13/site-packages/pandas/io/excel/_base.py:1550\u001b[0m, in \u001b[0;36mExcelFile.__init__\u001b[0;34m(self, path_or_buffer, engine, storage_options, engine_kwargs)\u001b[0m\n\u001b[1;32m 1548\u001b[0m ext \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mxls\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1549\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1550\u001b[0m ext \u001b[38;5;241m=\u001b[39m \u001b[43minspect_excel_format\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1551\u001b[0m \u001b[43m \u001b[49m\u001b[43mcontent_or_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_options\u001b[49m\n\u001b[1;32m 1552\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1553\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ext \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 1554\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 1555\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExcel file format cannot be determined, you must specify \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1556\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124man engine manually.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1557\u001b[0m )\n",
|
||||
"File \u001b[0;32m~/Nextcloud/#Учёба/институт/#4 Курс/Системы искусственного интеллекта/Jupyter/venv/lib/python3.13/site-packages/pandas/io/excel/_base.py:1402\u001b[0m, in \u001b[0;36minspect_excel_format\u001b[0;34m(content_or_path, storage_options)\u001b[0m\n\u001b[1;32m 1399\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(content_or_path, \u001b[38;5;28mbytes\u001b[39m):\n\u001b[1;32m 1400\u001b[0m content_or_path \u001b[38;5;241m=\u001b[39m BytesIO(content_or_path)\n\u001b[0;32m-> 1402\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[43mget_handle\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1403\u001b[0m \u001b[43m \u001b[49m\u001b[43mcontent_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrb\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_options\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mis_text\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\n\u001b[1;32m 1404\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m handle:\n\u001b[1;32m 1405\u001b[0m stream \u001b[38;5;241m=\u001b[39m handle\u001b[38;5;241m.\u001b[39mhandle\n\u001b[1;32m 1406\u001b[0m stream\u001b[38;5;241m.\u001b[39mseek(\u001b[38;5;241m0\u001b[39m)\n",
|
||||
"File \u001b[0;32m~/Nextcloud/#Учёба/институт/#4 Курс/Системы искусственного интеллекта/Jupyter/venv/lib/python3.13/site-packages/pandas/io/common.py:882\u001b[0m, in \u001b[0;36mget_handle\u001b[0;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[1;32m 873\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(\n\u001b[1;32m 874\u001b[0m handle,\n\u001b[1;32m 875\u001b[0m ioargs\u001b[38;5;241m.\u001b[39mmode,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 878\u001b[0m newline\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 879\u001b[0m )\n\u001b[1;32m 880\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 881\u001b[0m \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[0;32m--> 882\u001b[0m handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mhandle\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmode\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 883\u001b[0m handles\u001b[38;5;241m.\u001b[39mappend(handle)\n\u001b[1;32m 885\u001b[0m \u001b[38;5;66;03m# Convert BytesIO or file objects passed with an encoding\u001b[39;00m\n",
|
||||
"\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'C:\\\\Users\\\\Cyber\\\\Downloads\\\\daily-weather-dataset_chronological-order.xlsx'"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"data = pd.read_excel(r\"C:\\Users\\Cyber\\Downloads\\daily-weather-dataset_chronological-order.xlsx\", sheet_name=\"daily\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data.tail()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data.shape # Параметр .shape показывает размерность датафрейма"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data.size # Параметр .size показывает количество элементов в датафрейме"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data.count() # Метод count считает сколько всего непустых записей в каждом столбце"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data.info() # Метод .info() показывает тип каждого столбца и занимаемую память"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data.dtypes # Параметр .dtypes показывает просто тип каждого столбца"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data.isna().head() # Метод .isna() вместо каждого значения подставит True (значение NaN) или False (действительное значение)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data.isna().sum() # Подсчитаем количество пропусков в каждом столбце с помощью метода .sum()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"proc = data.isna().sum().sum() # Подсчитаем сколько всего пропусков (во всех столбцах) в нашем датафрейме\n",
|
||||
"print(proc) # Отобразим количество посчитанных пропусков"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"proc = data.isna().sum().sum() / data.size\n",
|
||||
"print(round(100*proc,1), '%', sep='')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df = pd.DataFrame(data)\n",
|
||||
"# Функция для удаления выбросов по IQR\n",
|
||||
"def remove_outliers(df, column):\n",
|
||||
" Q1 = df[column].quantile(0.25)\n",
|
||||
" Q3 = df[column].quantile(0.75)\n",
|
||||
" IQR = Q3 - Q1\n",
|
||||
" lower_bound = Q1 - 1.5 * IQR\n",
|
||||
" upper_bound = Q3 + 1.5 * IQR\n",
|
||||
" return df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]\n",
|
||||
"\n",
|
||||
"# Удаляем выбросы из указанных столбцов\n",
|
||||
"columns_to_clean = [\"Cloud coverage\"]\n",
|
||||
"for col in columns_to_clean:\n",
|
||||
" df = remove_outliers(df, col)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df = df.dropna(axis=1, how='all')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#3 Для прогнозирования солнечной генерации применена группировка по месяцу и времени года – чтобы учитывать сезонность выработки.\n",
|
||||
"df[\"Date\"] = pd.to_datetime(df[\"Date\"]) # Преобразуем столбец 'Date' в формат datetime\n",
|
||||
"df[\"Month\"] = df[\"Date\"].dt.month # Добавляем столбец 'Month' для группировки по месяцам\n",
|
||||
"df_monthly = df.groupby(\"Month\").mean() # Группируем по месяцу и вычисляем средние значения\n",
|
||||
"df_monthly"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#5\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import seaborn as sns\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 5.1 \n",
|
||||
"# Выбираем числовые столбцы, исключая \"Month\"\n",
|
||||
"numeric_features = df.select_dtypes(include=[\"number\"]).columns\n",
|
||||
"numeric_features = numeric_features.drop(\"Month\") # Убираем \"Month\"\n",
|
||||
"\n",
|
||||
"# Строим графики для всех числовых признаков (кроме \"Month\")\n",
|
||||
"for col in numeric_features:\n",
|
||||
" plt.figure(figsize=(14, 4))\n",
|
||||
"\n",
|
||||
" # График плотности (KDE)\n",
|
||||
" plt.subplot(121)\n",
|
||||
" sns.kdeplot(data=df, x=col)\n",
|
||||
" plt.title(f\"Распределение: {col}\")\n",
|
||||
"\n",
|
||||
" # Boxplot (ящик с усами)\n",
|
||||
" plt.subplot(122)\n",
|
||||
" sns.boxplot(data=df, x=col)\n",
|
||||
" plt.title(f\"Boxplot: {col}\")\n",
|
||||
"\n",
|
||||
" plt.show()\n",
|
||||
"\n",
|
||||
"#plt.figure(figsize=(8, 5))\n",
|
||||
"#sns.histplot(df['Visibility'], bins=20, kde=True)\n",
|
||||
"#plt.title(\"Гистограмма распределения параметра видимости\")\n",
|
||||
"#plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 5.3. Матрица корреляции\n",
|
||||
"# Позволяет увидеть, как связаны между собой числовые переменные и для поиска зависимостей между погодными параметрами\n",
|
||||
"# Чем ближе значение к 1 или -1, тем сильнее положительная или отрицательная корреляция\n",
|
||||
"plt.figure(figsize=(8, 5))\n",
|
||||
"sns.heatmap(df_monthly.corr(), annot=True, cmap='coolwarm', fmt=\".2f\")\n",
|
||||
"plt.title(\"Матрица корреляции погодных параметров\")\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 5.4. Диаграмма рассеяния облачности и солнечной энергии\n",
|
||||
"# Показывает взаимосвязь между облачностью и уровнем солнечной энергии.\n",
|
||||
"plt.figure(figsize=(8, 5))\n",
|
||||
"sns.scatterplot(x=df['Cloud coverage'], y=df['Solar energy'])\n",
|
||||
"plt.title(\"Диаграмма рассеяния: Облачность vs Солнечная энергия\")\n",
|
||||
"plt.show()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 5.5. Среднее значение облачности по месяцам\n",
|
||||
"#Позволяет проанализировать сезонные тренды облачности.\n",
|
||||
"#Позволяет быстро увидеть, в какие месяцы облачность выше или ниже.\n",
|
||||
"plt.figure(figsize=(8, 5))\n",
|
||||
"sns.barplot(x=df['Month'], y=df['Cloud coverage'], estimator=sum)\n",
|
||||
"plt.title(\"Среднее значение облачности по месяцам\")\n",
|
||||
"plt.show()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.feature_selection import VarianceThreshold\n",
|
||||
"\n",
|
||||
"# Оставляем только числовые признаки\n",
|
||||
"numeric_features = df.select_dtypes(include=[\"number\"])\n",
|
||||
"\n",
|
||||
"# Удаляем признаки с дисперсией ниже 0.01\n",
|
||||
"selector = VarianceThreshold(threshold=0.01)\n",
|
||||
"df_var = selector.fit_transform(numeric_features)\n",
|
||||
"\n",
|
||||
"# Получаем оставшиеся названия признаков\n",
|
||||
"selected_features = numeric_features.columns[selector.get_support()]\n",
|
||||
"df_selected = df[selected_features]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.feature_selection import SelectKBest, f_regression\n",
|
||||
"\n",
|
||||
"# Убираем столбец с датами и выбираем только числовые признаки\n",
|
||||
"X = df.select_dtypes(include=[\"number\"]).drop(columns=[\"Solar energy\"]) \n",
|
||||
"y = df[\"Solar energy\"] # Целевая переменная\n",
|
||||
"\n",
|
||||
"# Отбор 5 лучших признаков\n",
|
||||
"selector = SelectKBest(score_func=f_regression, k=5)\n",
|
||||
"X_new = selector.fit_transform(X, y)\n",
|
||||
"\n",
|
||||
"# Выводим выбранные признаки\n",
|
||||
"selected_features = X.columns[selector.get_support()]\n",
|
||||
"print(\"Выбранные признаки:\", selected_features)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.feature_selection import RFE\n",
|
||||
"from sklearn.linear_model import LinearRegression\n",
|
||||
"\n",
|
||||
"model = LinearRegression()\n",
|
||||
"rfe = RFE(model, n_features_to_select=5)\n",
|
||||
"X_rfe = rfe.fit_transform(X, y)\n",
|
||||
"\n",
|
||||
"selected_features = X.columns[rfe.support_]\n",
|
||||
"print(\"Новые лучшие признаки:\", selected_features)\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df[\"Temp_Cloud\"] = df[\"Temperature\"] * df[\"Cloud coverage\"]\n",
|
||||
"print(\"\\nDataFrame с добавленным признаком 'Temp_Cloud':\")\n",
|
||||
"df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 7\n",
|
||||
"plt.figure(figsize=(8, 5))\n",
|
||||
"sns.histplot(df[\"Solar energy\"], bins=30, kde=True)\n",
|
||||
"plt.title(\"Распределение целевой переменной (Solar Energy)\")\n",
|
||||
"plt.xlabel(\"Solar Energy\")\n",
|
||||
"plt.ylabel(\"Частота\")\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"plt.figure(figsize=(12, 6))\n",
|
||||
"sns.boxplot(x=df[\"Month\"], y=df[\"Solar energy\"])\n",
|
||||
"plt.title(\"Распределение Solar Energy по месяцам\")\n",
|
||||
"plt.show()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#8\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"df = df.drop(columns=[\"Date\"])\n",
|
||||
"df = df.drop(columns=[\"Month\"])\n",
|
||||
"y = df[\"Solar energy\"]\n",
|
||||
"# Удалим целевую переменную\n",
|
||||
"X = df.drop(columns=[\"Solar energy\"])\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Разбиение (80% train, 20% test)\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
|
||||
"# Вывод размеров получившихся выборок\n",
|
||||
"print(f\"Размер X_train: {X_train.shape}\")\n",
|
||||
"print(f\"Размер X_test: {X_test.shape}\")\n",
|
||||
"print(f\"Размер y_train: {y_train.shape}\")\n",
|
||||
"print(f\"Размер y_test: {y_test.shape}\")\n",
|
||||
"print(\"Обучающая выборка X:\")\n",
|
||||
"print(X_train)\n",
|
||||
"print(\"\\nТестовая выборка X:\")\n",
|
||||
"print(X_test)\n",
|
||||
"print(\"\\nОбучающая выборка y:\")\n",
|
||||
"print(y_train)\n",
|
||||
"print(\"\\nТестовая выборка y:\")\n",
|
||||
"print(y_test)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Лабораторная работа 2\n",
|
||||
"from sklearn.preprocessing import StandardScaler\n",
|
||||
"from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n",
|
||||
"\n",
|
||||
"# Модели машинного обучения\n",
|
||||
"from sklearn.neighbors import KNeighborsRegressor\n",
|
||||
"from sklearn.tree import DecisionTreeRegressor\n",
|
||||
"from sklearn.ensemble import RandomForestRegressor\n",
|
||||
"from catboost import CatBoostRegressor\n",
|
||||
"from tensorflow import keras\n",
|
||||
"from tensorflow.keras import layers\n",
|
||||
"from tensorflow.keras.callbacks import EarlyStopping\n",
|
||||
"from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, KFold\n",
|
||||
"from sklearn.neural_network import MLPRegressor\n",
|
||||
"cv = KFold(n_splits=5, shuffle=True, random_state=42)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"scaler = StandardScaler()\n",
|
||||
"X_train = scaler.fit_transform(X_train)\n",
|
||||
"X_test = scaler.transform(X_test)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Knn\n",
|
||||
"knn_params = {\n",
|
||||
" 'n_neighbors': [3, 5, 7, 9],\n",
|
||||
" 'weights': ['uniform', 'distance'],\n",
|
||||
" 'metric': ['euclidean', 'manhattan']\n",
|
||||
"}\n",
|
||||
"knn_grid = GridSearchCV(KNeighborsRegressor(), knn_params, cv=cv, scoring='r2', n_jobs=-1)\n",
|
||||
"knn_grid.fit(X_train, y_train)\n",
|
||||
"print(\"Best KNN:\", knn_grid.best_params_, \"Best R²:\", knn_grid.best_score_)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Дерево решений\n",
|
||||
"dt_params = {\n",
|
||||
" 'max_depth': [3, 5, 10, 15],\n",
|
||||
" 'min_samples_split': [2, 5, 10],\n",
|
||||
" 'min_samples_leaf': [1, 2, 5]\n",
|
||||
"}\n",
|
||||
"dt_grid = GridSearchCV(DecisionTreeRegressor(random_state=42), dt_params, cv=cv, scoring='r2', n_jobs=-1)\n",
|
||||
"dt_grid.fit(X_train, y_train)\n",
|
||||
"print(\"Best Decision Tree:\", dt_grid.best_params_, \"Best R²:\", dt_grid.best_score_)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Случайный лес\n",
|
||||
"rf_params = {\n",
|
||||
" 'n_estimators': [100, 200, 300],\n",
|
||||
" 'max_depth': [5, 10, 15],\n",
|
||||
" 'min_samples_split': [2, 5, 10]\n",
|
||||
"}\n",
|
||||
"rf_grid = GridSearchCV(RandomForestRegressor(random_state=42), rf_params, cv=cv, scoring='r2', n_jobs=-1)\n",
|
||||
"rf_grid.fit(X_train, y_train)\n",
|
||||
"print(\"Best Random Forest:\", rf_grid.best_params_, \"Best R²:\", rf_grid.best_score_)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Градиентный бустинг\n",
|
||||
"cat_params = {\n",
|
||||
" 'iterations': [100, 500, 1000],\n",
|
||||
" 'learning_rate': [0.01, 0.05, 0.1],\n",
|
||||
" 'depth': [4, 6, 8]\n",
|
||||
"}\n",
|
||||
"cat_grid = RandomizedSearchCV(CatBoostRegressor(verbose=0, random_state=42), cat_params, cv=cv, scoring='r2', n_jobs=-1, n_iter=10)\n",
|
||||
"cat_grid.fit(X_train, y_train)\n",
|
||||
"print(\"Best CatBoost:\", cat_grid.best_params_, \"Best R²:\", cat_grid.best_score_)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Нейронная сеть\n",
|
||||
"mlp_params = {\n",
|
||||
" 'hidden_layer_sizes': [(50,), (100,), (50, 50)],\n",
|
||||
" 'activation': ['relu', 'tanh'],\n",
|
||||
" 'alpha': [0.0001, 0.001, 0.01]\n",
|
||||
"}\n",
|
||||
"mlp_grid = RandomizedSearchCV(MLPRegressor(max_iter=5500, random_state=42), mlp_params, cv=cv, scoring='r2', n_jobs=-1, n_iter=10)\n",
|
||||
"mlp_grid.fit(X_train, y_train)\n",
|
||||
"print(\"Best MLP:\", mlp_grid.best_params_, \"Best R²:\", mlp_grid.best_score_)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Сравнение моделей\n",
|
||||
"models = [\"KNN\", \"Decision Tree\", \"Random Forest\", \"CatBoost\", \"Neural Network\"]\n",
|
||||
"scores = [\n",
|
||||
" knn_grid.best_score_,\n",
|
||||
" dt_grid.best_score_,\n",
|
||||
" rf_grid.best_score_,\n",
|
||||
" cat_grid.best_score_,\n",
|
||||
" mlp_grid.best_score_\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"plt.figure(figsize=(10, 5))\n",
|
||||
"sns.barplot(x=models, y=scores)\n",
|
||||
"plt.ylabel(\"R2 Score\")\n",
|
||||
"plt.title(\"Сравнение моделей машинного обучения\")\n",
|
||||
"plt.show()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Лабораторная работа №3 - Оценка моделей\n",
|
||||
"knn_best = knn_grid.best_estimator_\n",
|
||||
"dt_best = dt_grid.best_estimator_\n",
|
||||
"rf_best = rf_grid.best_estimator_\n",
|
||||
"cat_best = cat_grid.best_estimator_\n",
|
||||
"mlp_best = mlp_grid.best_estimator_\n",
|
||||
"def evaluate_model(model, X_test, y_test):\n",
|
||||
" y_pred = model.predict(X_test)\n",
|
||||
" mae = mean_absolute_error(y_test, y_pred)\n",
|
||||
" mse = mean_squared_error(y_test, y_pred)\n",
|
||||
" rmse = np.sqrt(mse)\n",
|
||||
" mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100\n",
|
||||
" r2 = r2_score(y_test, y_pred)\n",
|
||||
" return {'MAE': mae, 'MSE': mse, 'RMSE': rmse, 'MAPE': mape, 'R2': r2}\n",
|
||||
"\n",
|
||||
"models = {'KNN': knn_best, 'Decision Tree': dt_best, 'Random Forest': rf_best, 'CatBoost': cat_best, 'MLP': mlp_best}\n",
|
||||
"\n",
|
||||
"for name, model in models.items():\n",
|
||||
" results = evaluate_model(model, X_test, y_test)\n",
|
||||
" print(f\"{name} Evaluation: {results}\")\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Визуализация результатов\n",
|
||||
"# Словарь для хранения результатов\n",
|
||||
"metrics = {'Model': [], 'MAE': [], 'MSE': [], 'RMSE': [], 'MAPE': [], 'R2': []}\n",
|
||||
"\n",
|
||||
"# Оценка каждой модели\n",
|
||||
"for name, model in models.items():\n",
|
||||
" results = evaluate_model(model, X_test, y_test)\n",
|
||||
" metrics['Model'].append(name)\n",
|
||||
" for key in results:\n",
|
||||
" metrics[key].append(results[key])\n",
|
||||
"\n",
|
||||
"# Преобразование в DataFrame\n",
|
||||
"metrics_df = pd.DataFrame(metrics)\n",
|
||||
"metrics_df.set_index('Model', inplace=True)\n",
|
||||
"\n",
|
||||
"# Названия метрик и их описания для графиков\n",
|
||||
"metric_labels = {\n",
|
||||
" 'MAE': 'Средняя абсолютная ошибка (MAE)',\n",
|
||||
" 'MSE': 'Среднеквадратическая ошибка (MSE)',\n",
|
||||
" 'RMSE': 'Корень из MSE (RMSE)',\n",
|
||||
" 'MAPE': 'Средняя абсолютная процентная ошибка (MAPE)',\n",
|
||||
" 'R2': 'Коэффициент детерминации (R²)'\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"# Отображение каждого графика отдельно\n",
|
||||
"for metric in metrics_df.columns:\n",
|
||||
" plt.figure(figsize=(8, 5))\n",
|
||||
" sns.barplot(\n",
|
||||
" x=metrics_df.index, \n",
|
||||
" y=metrics_df[metric], \n",
|
||||
" hue=metrics_df.index, # Добавляем hue\n",
|
||||
" palette='viridis', \n",
|
||||
" edgecolor='black',\n",
|
||||
" legend=False # Отключаем легенду, так как цвета соответствуют x\n",
|
||||
" )\n",
|
||||
" plt.title(f'Сравнение моделей по {metric_labels[metric]}', fontsize=14)\n",
|
||||
" plt.xlabel(\"Модель\", fontsize=12)\n",
|
||||
" plt.ylabel(metric_labels[metric], fontsize=12)\n",
|
||||
" plt.xticks(rotation=45)\n",
|
||||
" plt.grid(axis='y', linestyle='--', alpha=0.7)\n",
|
||||
" plt.show()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Визуализация фактических vs предсказанных значений для лучшей модели (по R²)\n",
|
||||
"best_model_name = metrics_df.sort_values(by='R2', ascending=False).index[0]\n",
|
||||
"best_model = models[best_model_name]\n",
|
||||
"\n",
|
||||
"y_pred_best = best_model.predict(X_test)\n",
|
||||
"\n",
|
||||
"plt.figure(figsize=(8, 8))\n",
|
||||
"sns.scatterplot(x=y_test, y=y_pred_best, alpha=0.6)\n",
|
||||
"plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], '--r', label=\"Идеальное предсказание\")\n",
|
||||
"plt.xlabel(\"Фактические значения\", fontsize=12)\n",
|
||||
"plt.ylabel(\"Предсказанные значения\", fontsize=12)\n",
|
||||
"plt.title(f\"Фактические vs. Предсказанные ({best_model_name})\", fontsize=14)\n",
|
||||
"plt.legend()\n",
|
||||
"plt.grid(True)\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Лабораторная работа №4\n",
|
||||
"df = df.drop(columns=[\"Altimeter\"])\n",
|
||||
"df = df.drop(columns=[\"Temp_Cloud\"])\n",
|
||||
"y = df[\"Solar energy\"]\n",
|
||||
"X = df.drop(columns=[\"Solar energy\"])\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
|
||||
"scaler = StandardScaler()\n",
|
||||
"X_train = scaler.fit_transform(X_train)\n",
|
||||
"X_test = scaler.transform(X_test)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Knn\n",
|
||||
"\n",
|
||||
"knn_params = {\n",
|
||||
"\n",
|
||||
" 'n_neighbors': [3, 5, 7, 9],\n",
|
||||
"\n",
|
||||
" 'weights': ['uniform', 'distance'],\n",
|
||||
"\n",
|
||||
" 'metric': ['euclidean', 'manhattan']\n",
|
||||
"\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"knn_grid = GridSearchCV(KNeighborsRegressor(), knn_params, cv=cv, scoring='r2', n_jobs=-1)\n",
|
||||
"\n",
|
||||
"knn_grid.fit(X_train, y_train)\n",
|
||||
"\n",
|
||||
"print(\"Best KNN:\", knn_grid.best_params_, \"Best R²:\", knn_grid.best_score_)\n",
|
||||
"\n",
|
||||
"# Дерево решений\n",
|
||||
"\n",
|
||||
"dt_params = {\n",
|
||||
"\n",
|
||||
" 'max_depth': [3, 5, 10, 15],\n",
|
||||
"\n",
|
||||
" 'min_samples_split': [2, 5, 10],\n",
|
||||
"\n",
|
||||
" 'min_samples_leaf': [1, 2, 5]\n",
|
||||
"\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"dt_grid = GridSearchCV(DecisionTreeRegressor(random_state=42), dt_params, cv=cv, scoring='r2', n_jobs=-1)\n",
|
||||
"\n",
|
||||
"dt_grid.fit(X_train, y_train)\n",
|
||||
"\n",
|
||||
"print(\"Best Decision Tree:\", dt_grid.best_params_, \"Best R²:\", dt_grid.best_score_)\n",
|
||||
"\n",
|
||||
"# Случайный лес\n",
|
||||
"\n",
|
||||
"rf_params = {\n",
|
||||
"\n",
|
||||
" 'n_estimators': [100, 200, 300],\n",
|
||||
"\n",
|
||||
" 'max_depth': [5, 10, 15],\n",
|
||||
"\n",
|
||||
" 'min_samples_split': [2, 5, 10]\n",
|
||||
"\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"rf_grid = GridSearchCV(RandomForestRegressor(random_state=42), rf_params, cv=cv, scoring='r2', n_jobs=-1)\n",
|
||||
"\n",
|
||||
"rf_grid.fit(X_train, y_train)\n",
|
||||
"\n",
|
||||
"print(\"Best Random Forest:\", rf_grid.best_params_, \"Best R²:\", rf_grid.best_score_)\n",
|
||||
"\n",
|
||||
"# Градиентный бустинг\n",
|
||||
"\n",
|
||||
"cat_params = {\n",
|
||||
"\n",
|
||||
" 'iterations': [100, 500, 1000],\n",
|
||||
"\n",
|
||||
" 'learning_rate': [0.01, 0.05, 0.1],\n",
|
||||
"\n",
|
||||
" 'depth': [4, 6, 8]\n",
|
||||
"\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"cat_grid = RandomizedSearchCV(CatBoostRegressor(verbose=0, random_state=42), cat_params, cv=cv, scoring='r2', n_jobs=-1, n_iter=10)\n",
|
||||
"\n",
|
||||
"cat_grid.fit(X_train, y_train)\n",
|
||||
"\n",
|
||||
"print(\"Best CatBoost:\", cat_grid.best_params_, \"Best R²:\", cat_grid.best_score_)\n",
|
||||
"\n",
|
||||
"# Нейронная сеть\n",
|
||||
"\n",
|
||||
"mlp_params = {\n",
|
||||
"\n",
|
||||
" 'hidden_layer_sizes': [(50,), (100,), (50, 50)],\n",
|
||||
"\n",
|
||||
" 'activation': ['relu', 'tanh'],\n",
|
||||
"\n",
|
||||
" 'alpha': [0.0001, 0.001, 0.01]\n",
|
||||
"\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"mlp_grid = RandomizedSearchCV(MLPRegressor(max_iter=5500, random_state=42), mlp_params, cv=cv, scoring='r2', n_jobs=-1, n_iter=10)\n",
|
||||
"\n",
|
||||
"mlp_grid.fit(X_train, y_train)\n",
|
||||
"\n",
|
||||
"print(\"Best MLP:\", mlp_grid.best_params_, \"Best R²:\", mlp_grid.best_score_)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"knn_best = knn_grid.best_estimator_\n",
|
||||
"dt_best = dt_grid.best_estimator_\n",
|
||||
"rf_best = rf_grid.best_estimator_\n",
|
||||
"cat_best = cat_grid.best_estimator_\n",
|
||||
"mlp_best = mlp_grid.best_estimator_\n",
|
||||
"def evaluate_model(model, X_test, y_test):\n",
|
||||
" y_pred = model.predict(X_test)\n",
|
||||
" mae = mean_absolute_error(y_test, y_pred)\n",
|
||||
" mse = mean_squared_error(y_test, y_pred)\n",
|
||||
" rmse = np.sqrt(mse)\n",
|
||||
" mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100\n",
|
||||
" r2 = r2_score(y_test, y_pred)\n",
|
||||
" return {'MAE': mae, 'MSE': mse, 'RMSE': rmse, 'MAPE': mape, 'R2': r2}\n",
|
||||
"\n",
|
||||
"models = {'KNN': knn_best, 'Decision Tree': dt_best, 'Random Forest': rf_best, 'CatBoost': cat_best, 'MLP': mlp_best}\n",
|
||||
"\n",
|
||||
"for name, model in models.items():\n",
|
||||
" results = evaluate_model(model, X_test, y_test)\n",
|
||||
" print(f\"{name} Evaluation: {results}\")\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"# Список моделей и их названий\n",
|
||||
"models = {\n",
|
||||
" \"KNN\": knn_best,\n",
|
||||
" \"Decision Tree\": dt_best,\n",
|
||||
" \"Random Forest\": rf_best,\n",
|
||||
" \"CatBoost\": cat_best,\n",
|
||||
" \"MLP\": mlp_best\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"# Создаем DataFrame для предсказанных значений\n",
|
||||
"predictions_df = pd.DataFrame({\"Actual\": y_test})\n",
|
||||
"\n",
|
||||
"# Генерируем предсказания для каждой модели\n",
|
||||
"for name, model in models.items():\n",
|
||||
" predictions_df[name] = model.predict(X_test)\n",
|
||||
"\n",
|
||||
"# Выводим первые 10 строк предсказаний\n",
|
||||
"print(predictions_df.head(10))\n",
|
||||
"\n",
|
||||
"# Визуализация предсказаний\n",
|
||||
"plt.figure(figsize=(12, 6))\n",
|
||||
"plt.plot(predictions_df[\"Actual\"].values, label=\"Actual\", color=\"black\", linewidth=2)\n",
|
||||
"\n",
|
||||
"for name in models.keys():\n",
|
||||
" plt.plot(predictions_df[name].values, label=name, linestyle=\"--\")\n",
|
||||
"\n",
|
||||
"plt.legend()\n",
|
||||
"plt.title(\"Actual vs Predicted Values\")\n",
|
||||
"plt.xlabel(\"Samples\")\n",
|
||||
"plt.ylabel(\"Solar Energy Output\")\n",
|
||||
"plt.show()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pickle\n",
|
||||
"\n",
|
||||
"# Список моделей и их названий\n",
|
||||
"models = {\n",
|
||||
" \"KNN\": knn_best,\n",
|
||||
" \"Decision_Tree\": dt_best,\n",
|
||||
" \"Random_Forest\": rf_best,\n",
|
||||
" \"CatBoost\": cat_best,\n",
|
||||
" \"MLP\": mlp_best\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"# Сохраняем каждую модель в файл .pkl\n",
|
||||
"for name, model in models.items():\n",
|
||||
" with open(f\"{name}.pkl\", \"wb\") as file:\n",
|
||||
" pickle.dump(model, file)\n",
|
||||
"\n",
|
||||
"print(\"Все модели сохранены в формате .pkl!\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
Reference in New Issue
Block a user