first commit

2025-04-04 13:28:56 +03:00
commit 2adb419f83
89 changed files with 113532 additions and 0 deletions
--- a/андрюха1.ipynb
+++ b/андрюха1.ipynb
@@ -0,0 +1,888 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Лабораторная работа 1\n",
+    "import pandas as pd\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "FileNotFoundError",
+     "evalue": "[Errno 2] No such file or directory: 'C:\\\\Users\\\\Cyber\\\\Downloads\\\\daily-weather-dataset_chronological-order.xlsx'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[2], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_excel\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mr\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mC:\u001b[39;49m\u001b[38;5;124;43m\\\u001b[39;49m\u001b[38;5;124;43mUsers\u001b[39;49m\u001b[38;5;124;43m\\\u001b[39;49m\u001b[38;5;124;43mCyber\u001b[39;49m\u001b[38;5;124;43m\\\u001b[39;49m\u001b[38;5;124;43mDownloads\u001b[39;49m\u001b[38;5;124;43m\\\u001b[39;49m\u001b[38;5;124;43mdaily-weather-dataset_chronological-order.xlsx\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msheet_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdaily\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Nextcloud/#Учёба/институт/#4 Курс/Системы искусственного интеллекта/Jupyter/venv/lib/python3.13/site-packages/pandas/io/excel/_base.py:495\u001b[0m, in \u001b[0;36mread_excel\u001b[0;34m(io, sheet_name, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, parse_dates, date_parser, date_format, thousands, decimal, comment, skipfooter, storage_options, dtype_backend, engine_kwargs)\u001b[0m\n\u001b[1;32m    493\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(io, ExcelFile):\n\u001b[1;32m    494\u001b[0m     should_close \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m--> 495\u001b[0m     io \u001b[38;5;241m=\u001b[39m \u001b[43mExcelFile\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    496\u001b[0m \u001b[43m        \u001b[49m\u001b[43mio\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    497\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    498\u001b[0m \u001b[43m        \u001b[49m\u001b[43mengine\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    499\u001b[0m \u001b[43m        \u001b[49m\u001b[43mengine_kwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mengine_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    500\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    501\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m engine \u001b[38;5;129;01mand\u001b[39;00m engine \u001b[38;5;241m!=\u001b[39m io\u001b[38;5;241m.\u001b[39mengine:\n\u001b[1;32m    502\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m    503\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mEngine should not be specified when passing \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    504\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124man ExcelFile - ExcelFile already has the engine set\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    505\u001b[0m     )\n",
+      "File \u001b[0;32m~/Nextcloud/#Учёба/институт/#4 Курс/Системы искусственного интеллекта/Jupyter/venv/lib/python3.13/site-packages/pandas/io/excel/_base.py:1550\u001b[0m, in \u001b[0;36mExcelFile.__init__\u001b[0;34m(self, path_or_buffer, engine, storage_options, engine_kwargs)\u001b[0m\n\u001b[1;32m   1548\u001b[0m     ext \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mxls\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1549\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1550\u001b[0m     ext \u001b[38;5;241m=\u001b[39m \u001b[43minspect_excel_format\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1551\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcontent_or_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_options\u001b[49m\n\u001b[1;32m   1552\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1553\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m ext \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m   1554\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m   1555\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExcel file format cannot be determined, you must specify \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1556\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124man engine manually.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1557\u001b[0m         )\n",
+      "File \u001b[0;32m~/Nextcloud/#Учёба/институт/#4 Курс/Системы искусственного интеллекта/Jupyter/venv/lib/python3.13/site-packages/pandas/io/excel/_base.py:1402\u001b[0m, in \u001b[0;36minspect_excel_format\u001b[0;34m(content_or_path, storage_options)\u001b[0m\n\u001b[1;32m   1399\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(content_or_path, \u001b[38;5;28mbytes\u001b[39m):\n\u001b[1;32m   1400\u001b[0m     content_or_path \u001b[38;5;241m=\u001b[39m BytesIO(content_or_path)\n\u001b[0;32m-> 1402\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[43mget_handle\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1403\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcontent_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrb\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_options\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mis_text\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\n\u001b[1;32m   1404\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m handle:\n\u001b[1;32m   1405\u001b[0m     stream \u001b[38;5;241m=\u001b[39m handle\u001b[38;5;241m.\u001b[39mhandle\n\u001b[1;32m   1406\u001b[0m     stream\u001b[38;5;241m.\u001b[39mseek(\u001b[38;5;241m0\u001b[39m)\n",
+      "File \u001b[0;32m~/Nextcloud/#Учёба/институт/#4 Курс/Системы искусственного интеллекта/Jupyter/venv/lib/python3.13/site-packages/pandas/io/common.py:882\u001b[0m, in \u001b[0;36mget_handle\u001b[0;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[1;32m    873\u001b[0m         handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(\n\u001b[1;32m    874\u001b[0m             handle,\n\u001b[1;32m    875\u001b[0m             ioargs\u001b[38;5;241m.\u001b[39mmode,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    878\u001b[0m             newline\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m    879\u001b[0m         )\n\u001b[1;32m    880\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    881\u001b[0m         \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[0;32m--> 882\u001b[0m         handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mhandle\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmode\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    883\u001b[0m     handles\u001b[38;5;241m.\u001b[39mappend(handle)\n\u001b[1;32m    885\u001b[0m \u001b[38;5;66;03m# Convert BytesIO or file objects passed with an encoding\u001b[39;00m\n",
+      "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'C:\\\\Users\\\\Cyber\\\\Downloads\\\\daily-weather-dataset_chronological-order.xlsx'"
+     ]
+    }
+   ],
+   "source": [
+    "data = pd.read_excel(r\"C:\\Users\\Cyber\\Downloads\\daily-weather-dataset_chronological-order.xlsx\", sheet_name=\"daily\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data.tail()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data.shape # Параметр .shape показывает размерность датафрейма"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data.size  # Параметр .size показывает количество элементов в датафрейме"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data.count() # Метод count считает сколько всего непустых записей в каждом столбце"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data.info() # Метод .info() показывает тип каждого столбца и занимаемую память"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data.dtypes # Параметр .dtypes показывает просто тип каждого столбца"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data.isna().head() # Метод .isna() вместо каждого значения подставит True (значение NaN) или False (действительное значение)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data.isna().sum() # Подсчитаем количество пропусков в каждом столбце с помощью метода .sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "proc = data.isna().sum().sum() # Подсчитаем сколько всего пропусков (во всех столбцах) в нашем датафрейме\n",
+    "print(proc) # Отобразим количество посчитанных пропусков"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "proc = data.isna().sum().sum() / data.size\n",
+    "print(round(100*proc,1), '%', sep='')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.DataFrame(data)\n",
+    "# Функция для удаления выбросов по IQR\n",
+    "def remove_outliers(df, column):\n",
+    "    Q1 = df[column].quantile(0.25)\n",
+    "    Q3 = df[column].quantile(0.75)\n",
+    "    IQR = Q3 - Q1\n",
+    "    lower_bound = Q1 - 1.5 * IQR\n",
+    "    upper_bound = Q3 + 1.5 * IQR\n",
+    "    return df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]\n",
+    "\n",
+    "# Удаляем выбросы из указанных столбцов\n",
+    "columns_to_clean = [\"Cloud coverage\"]\n",
+    "for col in columns_to_clean:\n",
+    "    df = remove_outliers(df, col)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = df.dropna(axis=1, how='all')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#3 Для прогнозирования солнечной генерации применена группировка по месяцу и времени года – чтобы учитывать сезонность выработки.\n",
+    "df[\"Date\"] = pd.to_datetime(df[\"Date\"]) # Преобразуем столбец 'Date' в формат datetime\n",
+    "df[\"Month\"] = df[\"Date\"].dt.month # Добавляем столбец 'Month' для группировки по месяцам\n",
+    "df_monthly = df.groupby(\"Month\").mean() # Группируем по месяцу и вычисляем средние значения\n",
+    "df_monthly"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#5\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 5.1 \n",
+    "# Выбираем числовые столбцы, исключая \"Month\"\n",
+    "numeric_features = df.select_dtypes(include=[\"number\"]).columns\n",
+    "numeric_features = numeric_features.drop(\"Month\")  # Убираем \"Month\"\n",
+    "\n",
+    "# Строим графики для всех числовых признаков (кроме \"Month\")\n",
+    "for col in numeric_features:\n",
+    "    plt.figure(figsize=(14, 4))\n",
+    "\n",
+    "    # График плотности (KDE)\n",
+    "    plt.subplot(121)\n",
+    "    sns.kdeplot(data=df, x=col)\n",
+    "    plt.title(f\"Распределение: {col}\")\n",
+    "\n",
+    "    # Boxplot (ящик с усами)\n",
+    "    plt.subplot(122)\n",
+    "    sns.boxplot(data=df, x=col)\n",
+    "    plt.title(f\"Boxplot: {col}\")\n",
+    "\n",
+    "    plt.show()\n",
+    "\n",
+    "#plt.figure(figsize=(8, 5))\n",
+    "#sns.histplot(df['Visibility'], bins=20, kde=True)\n",
+    "#plt.title(\"Гистограмма распределения параметра видимости\")\n",
+    "#plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 5.3. Матрица корреляции\n",
+    "# Позволяет увидеть, как связаны между собой числовые переменные и для поиска зависимостей между погодными параметрами\n",
+    "# Чем ближе значение к 1 или -1, тем сильнее положительная или отрицательная корреляция\n",
+    "plt.figure(figsize=(8, 5))\n",
+    "sns.heatmap(df_monthly.corr(), annot=True, cmap='coolwarm', fmt=\".2f\")\n",
+    "plt.title(\"Матрица корреляции погодных параметров\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 5.4. Диаграмма рассеяния облачности и солнечной энергии\n",
+    "# Показывает взаимосвязь между облачностью и уровнем солнечной энергии.\n",
+    "plt.figure(figsize=(8, 5))\n",
+    "sns.scatterplot(x=df['Cloud coverage'], y=df['Solar energy'])\n",
+    "plt.title(\"Диаграмма рассеяния: Облачность vs Солнечная энергия\")\n",
+    "plt.show()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 5.5. Среднее значение облачности по месяцам\n",
+    "#Позволяет проанализировать сезонные тренды облачности.\n",
+    "#Позволяет быстро увидеть, в какие месяцы облачность выше или ниже.\n",
+    "plt.figure(figsize=(8, 5))\n",
+    "sns.barplot(x=df['Month'], y=df['Cloud coverage'], estimator=sum)\n",
+    "plt.title(\"Среднее значение облачности по месяцам\")\n",
+    "plt.show()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.feature_selection import VarianceThreshold\n",
+    "\n",
+    "# Оставляем только числовые признаки\n",
+    "numeric_features = df.select_dtypes(include=[\"number\"])\n",
+    "\n",
+    "# Удаляем признаки с дисперсией ниже 0.01\n",
+    "selector = VarianceThreshold(threshold=0.01)\n",
+    "df_var = selector.fit_transform(numeric_features)\n",
+    "\n",
+    "# Получаем оставшиеся названия признаков\n",
+    "selected_features = numeric_features.columns[selector.get_support()]\n",
+    "df_selected = df[selected_features]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.feature_selection import SelectKBest, f_regression\n",
+    "\n",
+    "# Убираем столбец с датами и выбираем только числовые признаки\n",
+    "X = df.select_dtypes(include=[\"number\"]).drop(columns=[\"Solar energy\"])  \n",
+    "y = df[\"Solar energy\"]  # Целевая переменная\n",
+    "\n",
+    "# Отбор 5 лучших признаков\n",
+    "selector = SelectKBest(score_func=f_regression, k=5)\n",
+    "X_new = selector.fit_transform(X, y)\n",
+    "\n",
+    "# Выводим выбранные признаки\n",
+    "selected_features = X.columns[selector.get_support()]\n",
+    "print(\"Выбранные признаки:\", selected_features)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.feature_selection import RFE\n",
+    "from sklearn.linear_model import LinearRegression\n",
+    "\n",
+    "model = LinearRegression()\n",
+    "rfe = RFE(model, n_features_to_select=5)\n",
+    "X_rfe = rfe.fit_transform(X, y)\n",
+    "\n",
+    "selected_features = X.columns[rfe.support_]\n",
+    "print(\"Новые лучшие признаки:\", selected_features)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df[\"Temp_Cloud\"] = df[\"Temperature\"] * df[\"Cloud coverage\"]\n",
+    "print(\"\\nDataFrame с добавленным признаком 'Temp_Cloud':\")\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 7\n",
+    "plt.figure(figsize=(8, 5))\n",
+    "sns.histplot(df[\"Solar energy\"], bins=30, kde=True)\n",
+    "plt.title(\"Распределение целевой переменной (Solar Energy)\")\n",
+    "plt.xlabel(\"Solar Energy\")\n",
+    "plt.ylabel(\"Частота\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(12, 6))\n",
+    "sns.boxplot(x=df[\"Month\"], y=df[\"Solar energy\"])\n",
+    "plt.title(\"Распределение Solar Energy по месяцам\")\n",
+    "plt.show()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#8\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "df = df.drop(columns=[\"Date\"])\n",
+    "df = df.drop(columns=[\"Month\"])\n",
+    "y = df[\"Solar energy\"]\n",
+    "# Удалим целевую переменную\n",
+    "X = df.drop(columns=[\"Solar energy\"])\n",
+    "\n",
+    "\n",
+    "# Разбиение (80% train, 20% test)\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
+    "# Вывод размеров получившихся выборок\n",
+    "print(f\"Размер X_train: {X_train.shape}\")\n",
+    "print(f\"Размер X_test: {X_test.shape}\")\n",
+    "print(f\"Размер y_train: {y_train.shape}\")\n",
+    "print(f\"Размер y_test: {y_test.shape}\")\n",
+    "print(\"Обучающая выборка X:\")\n",
+    "print(X_train)\n",
+    "print(\"\\nТестовая выборка X:\")\n",
+    "print(X_test)\n",
+    "print(\"\\nОбучающая выборка y:\")\n",
+    "print(y_train)\n",
+    "print(\"\\nТестовая выборка y:\")\n",
+    "print(y_test)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Лабораторная работа 2\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n",
+    "\n",
+    "# Модели машинного обучения\n",
+    "from sklearn.neighbors import KNeighborsRegressor\n",
+    "from sklearn.tree import DecisionTreeRegressor\n",
+    "from sklearn.ensemble import RandomForestRegressor\n",
+    "from catboost import CatBoostRegressor\n",
+    "from tensorflow import keras\n",
+    "from tensorflow.keras import layers\n",
+    "from tensorflow.keras.callbacks import EarlyStopping\n",
+    "from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, KFold\n",
+    "from sklearn.neural_network import MLPRegressor\n",
+    "cv = KFold(n_splits=5, shuffle=True, random_state=42)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scaler = StandardScaler()\n",
+    "X_train = scaler.fit_transform(X_train)\n",
+    "X_test = scaler.transform(X_test)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Knn\n",
+    "knn_params = {\n",
+    "    'n_neighbors': [3, 5, 7, 9],\n",
+    "    'weights': ['uniform', 'distance'],\n",
+    "    'metric': ['euclidean', 'manhattan']\n",
+    "}\n",
+    "knn_grid = GridSearchCV(KNeighborsRegressor(), knn_params, cv=cv, scoring='r2', n_jobs=-1)\n",
+    "knn_grid.fit(X_train, y_train)\n",
+    "print(\"Best KNN:\", knn_grid.best_params_, \"Best R²:\", knn_grid.best_score_)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Дерево решений\n",
+    "dt_params = {\n",
+    "    'max_depth': [3, 5, 10, 15],\n",
+    "    'min_samples_split': [2, 5, 10],\n",
+    "    'min_samples_leaf': [1, 2, 5]\n",
+    "}\n",
+    "dt_grid = GridSearchCV(DecisionTreeRegressor(random_state=42), dt_params, cv=cv, scoring='r2', n_jobs=-1)\n",
+    "dt_grid.fit(X_train, y_train)\n",
+    "print(\"Best Decision Tree:\", dt_grid.best_params_, \"Best R²:\", dt_grid.best_score_)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Случайный лес\n",
+    "rf_params = {\n",
+    "    'n_estimators': [100, 200, 300],\n",
+    "    'max_depth': [5, 10, 15],\n",
+    "    'min_samples_split': [2, 5, 10]\n",
+    "}\n",
+    "rf_grid = GridSearchCV(RandomForestRegressor(random_state=42), rf_params, cv=cv, scoring='r2', n_jobs=-1)\n",
+    "rf_grid.fit(X_train, y_train)\n",
+    "print(\"Best Random Forest:\", rf_grid.best_params_, \"Best R²:\", rf_grid.best_score_)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Градиентный бустинг\n",
+    "cat_params = {\n",
+    "    'iterations': [100, 500, 1000],\n",
+    "    'learning_rate': [0.01, 0.05, 0.1],\n",
+    "    'depth': [4, 6, 8]\n",
+    "}\n",
+    "cat_grid = RandomizedSearchCV(CatBoostRegressor(verbose=0, random_state=42), cat_params, cv=cv, scoring='r2', n_jobs=-1, n_iter=10)\n",
+    "cat_grid.fit(X_train, y_train)\n",
+    "print(\"Best CatBoost:\", cat_grid.best_params_, \"Best R²:\", cat_grid.best_score_)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Нейронная сеть\n",
+    "mlp_params = {\n",
+    "    'hidden_layer_sizes': [(50,), (100,), (50, 50)],\n",
+    "    'activation': ['relu', 'tanh'],\n",
+    "    'alpha': [0.0001, 0.001, 0.01]\n",
+    "}\n",
+    "mlp_grid = RandomizedSearchCV(MLPRegressor(max_iter=5500, random_state=42), mlp_params, cv=cv, scoring='r2', n_jobs=-1, n_iter=10)\n",
+    "mlp_grid.fit(X_train, y_train)\n",
+    "print(\"Best MLP:\", mlp_grid.best_params_, \"Best R²:\", mlp_grid.best_score_)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Сравнение моделей\n",
+    "models = [\"KNN\", \"Decision Tree\", \"Random Forest\", \"CatBoost\", \"Neural Network\"]\n",
+    "scores = [\n",
+    "    knn_grid.best_score_,\n",
+    "    dt_grid.best_score_,\n",
+    "    rf_grid.best_score_,\n",
+    "    cat_grid.best_score_,\n",
+    "    mlp_grid.best_score_\n",
+    "]\n",
+    "\n",
+    "plt.figure(figsize=(10, 5))\n",
+    "sns.barplot(x=models, y=scores)\n",
+    "plt.ylabel(\"R2 Score\")\n",
+    "plt.title(\"Сравнение моделей машинного обучения\")\n",
+    "plt.show()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Лабораторная работа №3 - Оценка моделей\n",
+    "knn_best = knn_grid.best_estimator_\n",
+    "dt_best = dt_grid.best_estimator_\n",
+    "rf_best = rf_grid.best_estimator_\n",
+    "cat_best = cat_grid.best_estimator_\n",
+    "mlp_best = mlp_grid.best_estimator_\n",
+    "def evaluate_model(model, X_test, y_test):\n",
+    "    y_pred = model.predict(X_test)\n",
+    "    mae = mean_absolute_error(y_test, y_pred)\n",
+    "    mse = mean_squared_error(y_test, y_pred)\n",
+    "    rmse = np.sqrt(mse)\n",
+    "    mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100\n",
+    "    r2 = r2_score(y_test, y_pred)\n",
+    "    return {'MAE': mae, 'MSE': mse, 'RMSE': rmse, 'MAPE': mape, 'R2': r2}\n",
+    "\n",
+    "models = {'KNN': knn_best, 'Decision Tree': dt_best, 'Random Forest': rf_best, 'CatBoost': cat_best, 'MLP': mlp_best}\n",
+    "\n",
+    "for name, model in models.items():\n",
+    "    results = evaluate_model(model, X_test, y_test)\n",
+    "    print(f\"{name} Evaluation: {results}\")\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Визуализация результатов\n",
+    "# Словарь для хранения результатов\n",
+    "metrics = {'Model': [], 'MAE': [], 'MSE': [], 'RMSE': [], 'MAPE': [], 'R2': []}\n",
+    "\n",
+    "# Оценка каждой модели\n",
+    "for name, model in models.items():\n",
+    "    results = evaluate_model(model, X_test, y_test)\n",
+    "    metrics['Model'].append(name)\n",
+    "    for key in results:\n",
+    "        metrics[key].append(results[key])\n",
+    "\n",
+    "# Преобразование в DataFrame\n",
+    "metrics_df = pd.DataFrame(metrics)\n",
+    "metrics_df.set_index('Model', inplace=True)\n",
+    "\n",
+    "# Названия метрик и их описания для графиков\n",
+    "metric_labels = {\n",
+    "    'MAE': 'Средняя абсолютная ошибка (MAE)',\n",
+    "    'MSE': 'Среднеквадратическая ошибка (MSE)',\n",
+    "    'RMSE': 'Корень из MSE (RMSE)',\n",
+    "    'MAPE': 'Средняя абсолютная процентная ошибка (MAPE)',\n",
+    "    'R2': 'Коэффициент детерминации (R²)'\n",
+    "}\n",
+    "\n",
+    "# Отображение каждого графика отдельно\n",
+    "for metric in metrics_df.columns:\n",
+    "    plt.figure(figsize=(8, 5))\n",
+    "    sns.barplot(\n",
+    "        x=metrics_df.index, \n",
+    "        y=metrics_df[metric], \n",
+    "        hue=metrics_df.index,  # Добавляем hue\n",
+    "        palette='viridis', \n",
+    "        edgecolor='black',\n",
+    "        legend=False  # Отключаем легенду, так как цвета соответствуют x\n",
+    "    )\n",
+    "    plt.title(f'Сравнение моделей по {metric_labels[metric]}', fontsize=14)\n",
+    "    plt.xlabel(\"Модель\", fontsize=12)\n",
+    "    plt.ylabel(metric_labels[metric], fontsize=12)\n",
+    "    plt.xticks(rotation=45)\n",
+    "    plt.grid(axis='y', linestyle='--', alpha=0.7)\n",
+    "    plt.show()\n",
+    "\n",
+    "\n",
+    "# Визуализация фактических vs предсказанных значений для лучшей модели (по R²)\n",
+    "best_model_name = metrics_df.sort_values(by='R2', ascending=False).index[0]\n",
+    "best_model = models[best_model_name]\n",
+    "\n",
+    "y_pred_best = best_model.predict(X_test)\n",
+    "\n",
+    "plt.figure(figsize=(8, 8))\n",
+    "sns.scatterplot(x=y_test, y=y_pred_best, alpha=0.6)\n",
+    "plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], '--r', label=\"Идеальное предсказание\")\n",
+    "plt.xlabel(\"Фактические значения\", fontsize=12)\n",
+    "plt.ylabel(\"Предсказанные значения\", fontsize=12)\n",
+    "plt.title(f\"Фактические vs. Предсказанные ({best_model_name})\", fontsize=14)\n",
+    "plt.legend()\n",
+    "plt.grid(True)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Лабораторная работа №4\n",
+    "df = df.drop(columns=[\"Altimeter\"])\n",
+    "df = df.drop(columns=[\"Temp_Cloud\"])\n",
+    "y = df[\"Solar energy\"]\n",
+    "X = df.drop(columns=[\"Solar energy\"])\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
+    "scaler = StandardScaler()\n",
+    "X_train = scaler.fit_transform(X_train)\n",
+    "X_test = scaler.transform(X_test)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Knn\n",
+    "\n",
+    "knn_params = {\n",
+    "\n",
+    "    'n_neighbors': [3, 5, 7, 9],\n",
+    "\n",
+    "    'weights': ['uniform', 'distance'],\n",
+    "\n",
+    "    'metric': ['euclidean', 'manhattan']\n",
+    "\n",
+    "}\n",
+    "\n",
+    "knn_grid = GridSearchCV(KNeighborsRegressor(), knn_params, cv=cv, scoring='r2', n_jobs=-1)\n",
+    "\n",
+    "knn_grid.fit(X_train, y_train)\n",
+    "\n",
+    "print(\"Best KNN:\", knn_grid.best_params_, \"Best R²:\", knn_grid.best_score_)\n",
+    "\n",
+    "# Дерево решений\n",
+    "\n",
+    "dt_params = {\n",
+    "\n",
+    "    'max_depth': [3, 5, 10, 15],\n",
+    "\n",
+    "    'min_samples_split': [2, 5, 10],\n",
+    "\n",
+    "    'min_samples_leaf': [1, 2, 5]\n",
+    "\n",
+    "}\n",
+    "\n",
+    "dt_grid = GridSearchCV(DecisionTreeRegressor(random_state=42), dt_params, cv=cv, scoring='r2', n_jobs=-1)\n",
+    "\n",
+    "dt_grid.fit(X_train, y_train)\n",
+    "\n",
+    "print(\"Best Decision Tree:\", dt_grid.best_params_, \"Best R²:\", dt_grid.best_score_)\n",
+    "\n",
+    "# Случайный лес\n",
+    "\n",
+    "rf_params = {\n",
+    "\n",
+    "    'n_estimators': [100, 200, 300],\n",
+    "\n",
+    "    'max_depth': [5, 10, 15],\n",
+    "\n",
+    "    'min_samples_split': [2, 5, 10]\n",
+    "\n",
+    "}\n",
+    "\n",
+    "rf_grid = GridSearchCV(RandomForestRegressor(random_state=42), rf_params, cv=cv, scoring='r2', n_jobs=-1)\n",
+    "\n",
+    "rf_grid.fit(X_train, y_train)\n",
+    "\n",
+    "print(\"Best Random Forest:\", rf_grid.best_params_, \"Best R²:\", rf_grid.best_score_)\n",
+    "\n",
+    "# Градиентный бустинг\n",
+    "\n",
+    "cat_params = {\n",
+    "\n",
+    "    'iterations': [100, 500, 1000],\n",
+    "\n",
+    "    'learning_rate': [0.01, 0.05, 0.1],\n",
+    "\n",
+    "    'depth': [4, 6, 8]\n",
+    "\n",
+    "}\n",
+    "\n",
+    "cat_grid = RandomizedSearchCV(CatBoostRegressor(verbose=0, random_state=42), cat_params, cv=cv, scoring='r2', n_jobs=-1, n_iter=10)\n",
+    "\n",
+    "cat_grid.fit(X_train, y_train)\n",
+    "\n",
+    "print(\"Best CatBoost:\", cat_grid.best_params_, \"Best R²:\", cat_grid.best_score_)\n",
+    "\n",
+    "# Нейронная сеть\n",
+    "\n",
+    "mlp_params = {\n",
+    "\n",
+    "    'hidden_layer_sizes': [(50,), (100,), (50, 50)],\n",
+    "\n",
+    "    'activation': ['relu', 'tanh'],\n",
+    "\n",
+    "    'alpha': [0.0001, 0.001, 0.01]\n",
+    "\n",
+    "}\n",
+    "\n",
+    "mlp_grid = RandomizedSearchCV(MLPRegressor(max_iter=5500, random_state=42), mlp_params, cv=cv, scoring='r2', n_jobs=-1, n_iter=10)\n",
+    "\n",
+    "mlp_grid.fit(X_train, y_train)\n",
+    "\n",
+    "print(\"Best MLP:\", mlp_grid.best_params_, \"Best R²:\", mlp_grid.best_score_)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "knn_best = knn_grid.best_estimator_\n",
+    "dt_best = dt_grid.best_estimator_\n",
+    "rf_best = rf_grid.best_estimator_\n",
+    "cat_best = cat_grid.best_estimator_\n",
+    "mlp_best = mlp_grid.best_estimator_\n",
+    "def evaluate_model(model, X_test, y_test):\n",
+    "    y_pred = model.predict(X_test)\n",
+    "    mae = mean_absolute_error(y_test, y_pred)\n",
+    "    mse = mean_squared_error(y_test, y_pred)\n",
+    "    rmse = np.sqrt(mse)\n",
+    "    mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100\n",
+    "    r2 = r2_score(y_test, y_pred)\n",
+    "    return {'MAE': mae, 'MSE': mse, 'RMSE': rmse, 'MAPE': mape, 'R2': r2}\n",
+    "\n",
+    "models = {'KNN': knn_best, 'Decision Tree': dt_best, 'Random Forest': rf_best, 'CatBoost': cat_best, 'MLP': mlp_best}\n",
+    "\n",
+    "for name, model in models.items():\n",
+    "    results = evaluate_model(model, X_test, y_test)\n",
+    "    print(f\"{name} Evaluation: {results}\")\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "# Список моделей и их названий\n",
+    "models = {\n",
+    "    \"KNN\": knn_best,\n",
+    "    \"Decision Tree\": dt_best,\n",
+    "    \"Random Forest\": rf_best,\n",
+    "    \"CatBoost\": cat_best,\n",
+    "    \"MLP\": mlp_best\n",
+    "}\n",
+    "\n",
+    "# Создаем DataFrame для предсказанных значений\n",
+    "predictions_df = pd.DataFrame({\"Actual\": y_test})\n",
+    "\n",
+    "# Генерируем предсказания для каждой модели\n",
+    "for name, model in models.items():\n",
+    "    predictions_df[name] = model.predict(X_test)\n",
+    "\n",
+    "# Выводим первые 10 строк предсказаний\n",
+    "print(predictions_df.head(10))\n",
+    "\n",
+    "# Визуализация предсказаний\n",
+    "plt.figure(figsize=(12, 6))\n",
+    "plt.plot(predictions_df[\"Actual\"].values, label=\"Actual\", color=\"black\", linewidth=2)\n",
+    "\n",
+    "for name in models.keys():\n",
+    "    plt.plot(predictions_df[name].values, label=name, linestyle=\"--\")\n",
+    "\n",
+    "plt.legend()\n",
+    "plt.title(\"Actual vs Predicted Values\")\n",
+    "plt.xlabel(\"Samples\")\n",
+    "plt.ylabel(\"Solar Energy Output\")\n",
+    "plt.show()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pickle\n",
+    "\n",
+    "# Список моделей и их названий\n",
+    "models = {\n",
+    "    \"KNN\": knn_best,\n",
+    "    \"Decision_Tree\": dt_best,\n",
+    "    \"Random_Forest\": rf_best,\n",
+    "    \"CatBoost\": cat_best,\n",
+    "    \"MLP\": mlp_best\n",
+    "}\n",
+    "\n",
+    "# Сохраняем каждую модель в файл .pkl\n",
+    "for name, model in models.items():\n",
+    "    with open(f\"{name}.pkl\", \"wb\") as file:\n",
+    "        pickle.dump(model, file)\n",
+    "\n",
+    "print(\"Все модели сохранены в формате .pkl!\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}