Files
artificial_intelligence_sys…/Архив/4 лекция (Pandas ч.2).ipynb
2025-04-04 13:28:56 +03:00

4821 lines
191 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "ojlhGzdxhkwR"
},
"outputs": [],
"source": [
"## Pandas. Загрузка библиотек"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "DqYWosnHhkwU"
},
"outputs": [],
"source": [
"import pandas as pd # Загружаем библиотеку Pandas\n",
"import numpy as np # Загружаем библиотеку numpy"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"id": "vX8TZMleKHiw"
},
"outputs": [],
"source": [
"data = pd.read_csv('wine_base.csv', index_col = 0) # С помощью метода read_csv загружаем файл wine_base.csv и записываем данные в data"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"id": "ggbR-UGu7wJZ",
"outputId": "735190d5-000c-479d-fb04-d7973bd58dd4"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>country</th>\n",
" <th>description</th>\n",
" <th>designation</th>\n",
" <th>points</th>\n",
" <th>price</th>\n",
" <th>province</th>\n",
" <th>region_1</th>\n",
" <th>region_2</th>\n",
" <th>variety</th>\n",
" <th>winery</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>US</td>\n",
" <td>This tremendous 100% varietal wine hails from ...</td>\n",
" <td>Martha's Vineyard</td>\n",
" <td>96</td>\n",
" <td>235.0</td>\n",
" <td>California</td>\n",
" <td>Napa Valley</td>\n",
" <td>Napa</td>\n",
" <td>Cabernet Sauvignon</td>\n",
" <td>Heitz</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Spain</td>\n",
" <td>Ripe aromas of fig, blackberry and cassis are ...</td>\n",
" <td>Carodorum Selección Especial Reserva</td>\n",
" <td>96</td>\n",
" <td>110.0</td>\n",
" <td>Northern Spain</td>\n",
" <td>Toro</td>\n",
" <td>NaN</td>\n",
" <td>Tinta de Toro</td>\n",
" <td>Bodega Carmen Rodríguez</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>US</td>\n",
" <td>Mac Watson honors the memory of a wine once ma...</td>\n",
" <td>Special Selected Late Harvest</td>\n",
" <td>96</td>\n",
" <td>90.0</td>\n",
" <td>California</td>\n",
" <td>Knights Valley</td>\n",
" <td>Sonoma</td>\n",
" <td>Sauvignon Blanc</td>\n",
" <td>Macauley</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>US</td>\n",
" <td>This spent 20 months in 30% new French oak, an...</td>\n",
" <td>Reserve</td>\n",
" <td>96</td>\n",
" <td>65.0</td>\n",
" <td>Oregon</td>\n",
" <td>Willamette Valley</td>\n",
" <td>Willamette Valley</td>\n",
" <td>Pinot Noir</td>\n",
" <td>Ponzi</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>France</td>\n",
" <td>This is the top wine from La Bégude, named aft...</td>\n",
" <td>La Brûlade</td>\n",
" <td>95</td>\n",
" <td>66.0</td>\n",
" <td>Provence</td>\n",
" <td>Bandol</td>\n",
" <td>NaN</td>\n",
" <td>Provence red blend</td>\n",
" <td>Domaine de la Bégude</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" country description \\\n",
"0 US This tremendous 100% varietal wine hails from ... \n",
"1 Spain Ripe aromas of fig, blackberry and cassis are ... \n",
"2 US Mac Watson honors the memory of a wine once ma... \n",
"3 US This spent 20 months in 30% new French oak, an... \n",
"4 France This is the top wine from La Bégude, named aft... \n",
"\n",
" designation points price province \\\n",
"0 Martha's Vineyard 96 235.0 California \n",
"1 Carodorum Selección Especial Reserva 96 110.0 Northern Spain \n",
"2 Special Selected Late Harvest 96 90.0 California \n",
"3 Reserve 96 65.0 Oregon \n",
"4 La Brûlade 95 66.0 Provence \n",
"\n",
" region_1 region_2 variety \\\n",
"0 Napa Valley Napa Cabernet Sauvignon \n",
"1 Toro NaN Tinta de Toro \n",
"2 Knights Valley Sonoma Sauvignon Blanc \n",
"3 Willamette Valley Willamette Valley Pinot Noir \n",
"4 Bandol NaN Provence red blend \n",
"\n",
" winery \n",
"0 Heitz \n",
"1 Bodega Carmen Rodríguez \n",
"2 Macauley \n",
"3 Ponzi \n",
"4 Domaine de la Bégude "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"id": "_mybfuLlhkzU"
},
"outputs": [],
"source": [
"## 1. Конкатенация\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"id": "vUn3NnurhkzV",
"outputId": "11da5650-2592-40f3-c0d6-59a503352985"
},
"outputs": [
{
"data": {
"text/plain": [
"[4, 5, 6, 7, 234, 23, 0, 1, 2, 3, 4, 5, 6, 7, 234, 23, 0, 1, 2, 3, 0, 1, 2, 3]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a = [0, 1, 2, 3] # Создаем список а\n",
"b = [4, 5, 6, 7, 234, 23] # Создаем список b\n",
"b + a + b + a + a # Делаем конкатенацию (объединение) списков"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"id": "DzVWtFnXhkzX"
},
"outputs": [],
"source": [
"data_1 = data[0:15].copy() # Берем первые 15 строк из data и делаем их копию в data_1\n",
"data_2 = data[15:30].copy() # Берем строки с 15 по 30 из data и делаем их копию в data_2"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"id": "T2Nq4UnUhkza",
"outputId": "0d7c713f-8eae-4e25-878c-c7e1cf26f8d5"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>country</th>\n",
" <th>description</th>\n",
" <th>designation</th>\n",
" <th>points</th>\n",
" <th>price</th>\n",
" <th>province</th>\n",
" <th>region_1</th>\n",
" <th>region_2</th>\n",
" <th>variety</th>\n",
" <th>winery</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>US</td>\n",
" <td>First made in 2006, this succulent luscious Ch...</td>\n",
" <td>Sigrid</td>\n",
" <td>95</td>\n",
" <td>90.0</td>\n",
" <td>Oregon</td>\n",
" <td>Willamette Valley</td>\n",
" <td>Willamette Valley</td>\n",
" <td>Chardonnay</td>\n",
" <td>Bergström</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>US</td>\n",
" <td>This blockbuster, powerhouse of a wine suggest...</td>\n",
" <td>Rainin Vineyard</td>\n",
" <td>95</td>\n",
" <td>325.0</td>\n",
" <td>California</td>\n",
" <td>Diamond Mountain District</td>\n",
" <td>Napa</td>\n",
" <td>Cabernet Sauvignon</td>\n",
" <td>Hall</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>Spain</td>\n",
" <td>Nicely oaked blackberry, licorice, vanilla and...</td>\n",
" <td>6 Años Reserva Premium</td>\n",
" <td>95</td>\n",
" <td>80.0</td>\n",
" <td>Northern Spain</td>\n",
" <td>Ribera del Duero</td>\n",
" <td>NaN</td>\n",
" <td>Tempranillo</td>\n",
" <td>Valduero</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>France</td>\n",
" <td>Coming from a seven-acre vineyard named after ...</td>\n",
" <td>Le Pigeonnier</td>\n",
" <td>95</td>\n",
" <td>290.0</td>\n",
" <td>Southwest France</td>\n",
" <td>Cahors</td>\n",
" <td>NaN</td>\n",
" <td>Malbec</td>\n",
" <td>Château Lagrézette</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>US</td>\n",
" <td>This fresh and lively medium-bodied wine is be...</td>\n",
" <td>Gap's Crown Vineyard</td>\n",
" <td>95</td>\n",
" <td>75.0</td>\n",
" <td>California</td>\n",
" <td>Sonoma Coast</td>\n",
" <td>Sonoma</td>\n",
" <td>Pinot Noir</td>\n",
" <td>Gary Farrell</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" country description \\\n",
"15 US First made in 2006, this succulent luscious Ch... \n",
"16 US This blockbuster, powerhouse of a wine suggest... \n",
"17 Spain Nicely oaked blackberry, licorice, vanilla and... \n",
"18 France Coming from a seven-acre vineyard named after ... \n",
"19 US This fresh and lively medium-bodied wine is be... \n",
"\n",
" designation points price province \\\n",
"15 Sigrid 95 90.0 Oregon \n",
"16 Rainin Vineyard 95 325.0 California \n",
"17 6 Años Reserva Premium 95 80.0 Northern Spain \n",
"18 Le Pigeonnier 95 290.0 Southwest France \n",
"19 Gap's Crown Vineyard 95 75.0 California \n",
"\n",
" region_1 region_2 variety \\\n",
"15 Willamette Valley Willamette Valley Chardonnay \n",
"16 Diamond Mountain District Napa Cabernet Sauvignon \n",
"17 Ribera del Duero NaN Tempranillo \n",
"18 Cahors NaN Malbec \n",
"19 Sonoma Coast Sonoma Pinot Noir \n",
"\n",
" winery \n",
"15 Bergström \n",
"16 Hall \n",
"17 Valduero \n",
"18 Château Lagrézette \n",
"19 Gary Farrell "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_2.head() # Выводим первые 5 строк data_2"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"id": "hQn7iScFhkzb",
"outputId": "1dfa87c8-4278-49f5-c1d4-8507d2d13667"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>country</th>\n",
" <th>description</th>\n",
" <th>designation</th>\n",
" <th>points</th>\n",
" <th>price</th>\n",
" <th>province</th>\n",
" <th>region_1</th>\n",
" <th>region_2</th>\n",
" <th>variety</th>\n",
" <th>winery</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>US</td>\n",
" <td>This tremendous 100% varietal wine hails from ...</td>\n",
" <td>Martha's Vineyard</td>\n",
" <td>96</td>\n",
" <td>235.0</td>\n",
" <td>California</td>\n",
" <td>Napa Valley</td>\n",
" <td>Napa</td>\n",
" <td>Cabernet Sauvignon</td>\n",
" <td>Heitz</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Spain</td>\n",
" <td>Ripe aromas of fig, blackberry and cassis are ...</td>\n",
" <td>Carodorum Selección Especial Reserva</td>\n",
" <td>96</td>\n",
" <td>110.0</td>\n",
" <td>Northern Spain</td>\n",
" <td>Toro</td>\n",
" <td>NaN</td>\n",
" <td>Tinta de Toro</td>\n",
" <td>Bodega Carmen Rodríguez</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>US</td>\n",
" <td>Mac Watson honors the memory of a wine once ma...</td>\n",
" <td>Special Selected Late Harvest</td>\n",
" <td>96</td>\n",
" <td>90.0</td>\n",
" <td>California</td>\n",
" <td>Knights Valley</td>\n",
" <td>Sonoma</td>\n",
" <td>Sauvignon Blanc</td>\n",
" <td>Macauley</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>US</td>\n",
" <td>This spent 20 months in 30% new French oak, an...</td>\n",
" <td>Reserve</td>\n",
" <td>96</td>\n",
" <td>65.0</td>\n",
" <td>Oregon</td>\n",
" <td>Willamette Valley</td>\n",
" <td>Willamette Valley</td>\n",
" <td>Pinot Noir</td>\n",
" <td>Ponzi</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>France</td>\n",
" <td>This is the top wine from La Bégude, named aft...</td>\n",
" <td>La Brûlade</td>\n",
" <td>95</td>\n",
" <td>66.0</td>\n",
" <td>Provence</td>\n",
" <td>Bandol</td>\n",
" <td>NaN</td>\n",
" <td>Provence red blend</td>\n",
" <td>Domaine de la Bégude</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Spain</td>\n",
" <td>Deep, dense and pure from the opening bell, th...</td>\n",
" <td>Numanthia</td>\n",
" <td>95</td>\n",
" <td>73.0</td>\n",
" <td>Northern Spain</td>\n",
" <td>Toro</td>\n",
" <td>NaN</td>\n",
" <td>Tinta de Toro</td>\n",
" <td>Numanthia</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Spain</td>\n",
" <td>Slightly gritty black-fruit aromas include a s...</td>\n",
" <td>San Román</td>\n",
" <td>95</td>\n",
" <td>65.0</td>\n",
" <td>Northern Spain</td>\n",
" <td>Toro</td>\n",
" <td>NaN</td>\n",
" <td>Tinta de Toro</td>\n",
" <td>Maurodos</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Spain</td>\n",
" <td>Lush cedary black-fruit aromas are luxe and of...</td>\n",
" <td>Carodorum Único Crianza</td>\n",
" <td>95</td>\n",
" <td>110.0</td>\n",
" <td>Northern Spain</td>\n",
" <td>Toro</td>\n",
" <td>NaN</td>\n",
" <td>Tinta de Toro</td>\n",
" <td>Bodega Carmen Rodríguez</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>US</td>\n",
" <td>This re-named vineyard was formerly bottled as...</td>\n",
" <td>Silice</td>\n",
" <td>95</td>\n",
" <td>65.0</td>\n",
" <td>Oregon</td>\n",
" <td>Chehalem Mountains</td>\n",
" <td>Willamette Valley</td>\n",
" <td>Pinot Noir</td>\n",
" <td>Bergström</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>US</td>\n",
" <td>The producer sources from two blocks of the vi...</td>\n",
" <td>Gap's Crown Vineyard</td>\n",
" <td>95</td>\n",
" <td>60.0</td>\n",
" <td>California</td>\n",
" <td>Sonoma Coast</td>\n",
" <td>Sonoma</td>\n",
" <td>Pinot Noir</td>\n",
" <td>Blue Farm</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Italy</td>\n",
" <td>Elegance, complexity and structure come togeth...</td>\n",
" <td>Ronco della Chiesa</td>\n",
" <td>95</td>\n",
" <td>80.0</td>\n",
" <td>Northeastern Italy</td>\n",
" <td>Collio</td>\n",
" <td>NaN</td>\n",
" <td>Friulano</td>\n",
" <td>Borgo del Tiglio</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>US</td>\n",
" <td>From 18-year-old vines, this supple well-balan...</td>\n",
" <td>Estate Vineyard Wadensvil Block</td>\n",
" <td>95</td>\n",
" <td>48.0</td>\n",
" <td>Oregon</td>\n",
" <td>Ribbon Ridge</td>\n",
" <td>Willamette Valley</td>\n",
" <td>Pinot Noir</td>\n",
" <td>Patricia Green Cellars</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>US</td>\n",
" <td>A standout even in this terrific lineup of 201...</td>\n",
" <td>Weber Vineyard</td>\n",
" <td>95</td>\n",
" <td>48.0</td>\n",
" <td>Oregon</td>\n",
" <td>Dundee Hills</td>\n",
" <td>Willamette Valley</td>\n",
" <td>Pinot Noir</td>\n",
" <td>Patricia Green Cellars</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>France</td>\n",
" <td>This wine is in peak condition. The tannins an...</td>\n",
" <td>Château Montus Prestige</td>\n",
" <td>95</td>\n",
" <td>90.0</td>\n",
" <td>Southwest France</td>\n",
" <td>Madiran</td>\n",
" <td>NaN</td>\n",
" <td>Tannat</td>\n",
" <td>Vignobles Brumont</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>US</td>\n",
" <td>With its sophisticated mix of mineral, acid an...</td>\n",
" <td>Grace Vineyard</td>\n",
" <td>95</td>\n",
" <td>185.0</td>\n",
" <td>Oregon</td>\n",
" <td>Dundee Hills</td>\n",
" <td>Willamette Valley</td>\n",
" <td>Pinot Noir</td>\n",
" <td>Domaine Serene</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>US</td>\n",
" <td>First made in 2006, this succulent luscious Ch...</td>\n",
" <td>Sigrid</td>\n",
" <td>95</td>\n",
" <td>90.0</td>\n",
" <td>Oregon</td>\n",
" <td>Willamette Valley</td>\n",
" <td>Willamette Valley</td>\n",
" <td>Chardonnay</td>\n",
" <td>Bergström</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>US</td>\n",
" <td>This blockbuster, powerhouse of a wine suggest...</td>\n",
" <td>Rainin Vineyard</td>\n",
" <td>95</td>\n",
" <td>325.0</td>\n",
" <td>California</td>\n",
" <td>Diamond Mountain District</td>\n",
" <td>Napa</td>\n",
" <td>Cabernet Sauvignon</td>\n",
" <td>Hall</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>Spain</td>\n",
" <td>Nicely oaked blackberry, licorice, vanilla and...</td>\n",
" <td>6 Años Reserva Premium</td>\n",
" <td>95</td>\n",
" <td>80.0</td>\n",
" <td>Northern Spain</td>\n",
" <td>Ribera del Duero</td>\n",
" <td>NaN</td>\n",
" <td>Tempranillo</td>\n",
" <td>Valduero</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>France</td>\n",
" <td>Coming from a seven-acre vineyard named after ...</td>\n",
" <td>Le Pigeonnier</td>\n",
" <td>95</td>\n",
" <td>290.0</td>\n",
" <td>Southwest France</td>\n",
" <td>Cahors</td>\n",
" <td>NaN</td>\n",
" <td>Malbec</td>\n",
" <td>Château Lagrézette</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>US</td>\n",
" <td>This fresh and lively medium-bodied wine is be...</td>\n",
" <td>Gap's Crown Vineyard</td>\n",
" <td>95</td>\n",
" <td>75.0</td>\n",
" <td>California</td>\n",
" <td>Sonoma Coast</td>\n",
" <td>Sonoma</td>\n",
" <td>Pinot Noir</td>\n",
" <td>Gary Farrell</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>US</td>\n",
" <td>Heitz has made this stellar rosé from the rare...</td>\n",
" <td>Grignolino</td>\n",
" <td>95</td>\n",
" <td>24.0</td>\n",
" <td>California</td>\n",
" <td>Napa Valley</td>\n",
" <td>Napa</td>\n",
" <td>Rosé</td>\n",
" <td>Heitz</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>Spain</td>\n",
" <td>Alluring, complex and powerful aromas of grill...</td>\n",
" <td>Prado Enea Gran Reserva</td>\n",
" <td>95</td>\n",
" <td>79.0</td>\n",
" <td>Northern Spain</td>\n",
" <td>Rioja</td>\n",
" <td>NaN</td>\n",
" <td>Tempranillo Blend</td>\n",
" <td>Muga</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>Spain</td>\n",
" <td>Tarry blackberry and cheesy oak aromas are app...</td>\n",
" <td>Termanthia</td>\n",
" <td>95</td>\n",
" <td>220.0</td>\n",
" <td>Northern Spain</td>\n",
" <td>Toro</td>\n",
" <td>NaN</td>\n",
" <td>Tinta de Toro</td>\n",
" <td>Numanthia</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>US</td>\n",
" <td>The apogee of this ambitious winery's white wi...</td>\n",
" <td>Giallo Solare</td>\n",
" <td>95</td>\n",
" <td>60.0</td>\n",
" <td>California</td>\n",
" <td>Edna Valley</td>\n",
" <td>Central Coast</td>\n",
" <td>Chardonnay</td>\n",
" <td>Center of Effort</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>US</td>\n",
" <td>San Jose-based producer Adam Comartin heads 1,...</td>\n",
" <td>R-Bar-R Ranch</td>\n",
" <td>95</td>\n",
" <td>45.0</td>\n",
" <td>California</td>\n",
" <td>Santa Cruz Mountains</td>\n",
" <td>Central Coast</td>\n",
" <td>Pinot Noir</td>\n",
" <td>Comartin</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>New Zealand</td>\n",
" <td>Yields were down in 2015, but intensity is up,...</td>\n",
" <td>Maté's Vineyard</td>\n",
" <td>94</td>\n",
" <td>57.0</td>\n",
" <td>Kumeu</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Chardonnay</td>\n",
" <td>Kumeu River</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>US</td>\n",
" <td>Bergström has made a Shea designate since 2003...</td>\n",
" <td>Shea Vineyard</td>\n",
" <td>94</td>\n",
" <td>62.0</td>\n",
" <td>Oregon</td>\n",
" <td>Willamette Valley</td>\n",
" <td>NaN</td>\n",
" <td>Pinot Noir</td>\n",
" <td>Bergström</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>US</td>\n",
" <td>Focused and dense, this intense wine captures ...</td>\n",
" <td>Abetina</td>\n",
" <td>94</td>\n",
" <td>105.0</td>\n",
" <td>Oregon</td>\n",
" <td>Willamette Valley</td>\n",
" <td>Willamette Valley</td>\n",
" <td>Pinot Noir</td>\n",
" <td>Ponzi</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>US</td>\n",
" <td>Cranberry, baked rhubarb, anise and crushed sl...</td>\n",
" <td>Garys' Vineyard</td>\n",
" <td>94</td>\n",
" <td>60.0</td>\n",
" <td>California</td>\n",
" <td>Santa Lucia Highlands</td>\n",
" <td>Central Coast</td>\n",
" <td>Pinot Noir</td>\n",
" <td>Roar</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>US</td>\n",
" <td>This standout Rocks District wine brings earth...</td>\n",
" <td>The Funk Estate</td>\n",
" <td>94</td>\n",
" <td>60.0</td>\n",
" <td>Washington</td>\n",
" <td>Walla Walla Valley (WA)</td>\n",
" <td>Columbia Valley</td>\n",
" <td>Syrah</td>\n",
" <td>Saviah</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" country description \\\n",
"0 US This tremendous 100% varietal wine hails from ... \n",
"1 Spain Ripe aromas of fig, blackberry and cassis are ... \n",
"2 US Mac Watson honors the memory of a wine once ma... \n",
"3 US This spent 20 months in 30% new French oak, an... \n",
"4 France This is the top wine from La Bégude, named aft... \n",
"5 Spain Deep, dense and pure from the opening bell, th... \n",
"6 Spain Slightly gritty black-fruit aromas include a s... \n",
"7 Spain Lush cedary black-fruit aromas are luxe and of... \n",
"8 US This re-named vineyard was formerly bottled as... \n",
"9 US The producer sources from two blocks of the vi... \n",
"10 Italy Elegance, complexity and structure come togeth... \n",
"11 US From 18-year-old vines, this supple well-balan... \n",
"12 US A standout even in this terrific lineup of 201... \n",
"13 France This wine is in peak condition. The tannins an... \n",
"14 US With its sophisticated mix of mineral, acid an... \n",
"15 US First made in 2006, this succulent luscious Ch... \n",
"16 US This blockbuster, powerhouse of a wine suggest... \n",
"17 Spain Nicely oaked blackberry, licorice, vanilla and... \n",
"18 France Coming from a seven-acre vineyard named after ... \n",
"19 US This fresh and lively medium-bodied wine is be... \n",
"20 US Heitz has made this stellar rosé from the rare... \n",
"21 Spain Alluring, complex and powerful aromas of grill... \n",
"22 Spain Tarry blackberry and cheesy oak aromas are app... \n",
"23 US The apogee of this ambitious winery's white wi... \n",
"24 US San Jose-based producer Adam Comartin heads 1,... \n",
"25 New Zealand Yields were down in 2015, but intensity is up,... \n",
"26 US Bergström has made a Shea designate since 2003... \n",
"27 US Focused and dense, this intense wine captures ... \n",
"28 US Cranberry, baked rhubarb, anise and crushed sl... \n",
"29 US This standout Rocks District wine brings earth... \n",
"\n",
" designation points price province \\\n",
"0 Martha's Vineyard 96 235.0 California \n",
"1 Carodorum Selección Especial Reserva 96 110.0 Northern Spain \n",
"2 Special Selected Late Harvest 96 90.0 California \n",
"3 Reserve 96 65.0 Oregon \n",
"4 La Brûlade 95 66.0 Provence \n",
"5 Numanthia 95 73.0 Northern Spain \n",
"6 San Román 95 65.0 Northern Spain \n",
"7 Carodorum Único Crianza 95 110.0 Northern Spain \n",
"8 Silice 95 65.0 Oregon \n",
"9 Gap's Crown Vineyard 95 60.0 California \n",
"10 Ronco della Chiesa 95 80.0 Northeastern Italy \n",
"11 Estate Vineyard Wadensvil Block 95 48.0 Oregon \n",
"12 Weber Vineyard 95 48.0 Oregon \n",
"13 Château Montus Prestige 95 90.0 Southwest France \n",
"14 Grace Vineyard 95 185.0 Oregon \n",
"15 Sigrid 95 90.0 Oregon \n",
"16 Rainin Vineyard 95 325.0 California \n",
"17 6 Años Reserva Premium 95 80.0 Northern Spain \n",
"18 Le Pigeonnier 95 290.0 Southwest France \n",
"19 Gap's Crown Vineyard 95 75.0 California \n",
"20 Grignolino 95 24.0 California \n",
"21 Prado Enea Gran Reserva 95 79.0 Northern Spain \n",
"22 Termanthia 95 220.0 Northern Spain \n",
"23 Giallo Solare 95 60.0 California \n",
"24 R-Bar-R Ranch 95 45.0 California \n",
"25 Maté's Vineyard 94 57.0 Kumeu \n",
"26 Shea Vineyard 94 62.0 Oregon \n",
"27 Abetina 94 105.0 Oregon \n",
"28 Garys' Vineyard 94 60.0 California \n",
"29 The Funk Estate 94 60.0 Washington \n",
"\n",
" region_1 region_2 variety \\\n",
"0 Napa Valley Napa Cabernet Sauvignon \n",
"1 Toro NaN Tinta de Toro \n",
"2 Knights Valley Sonoma Sauvignon Blanc \n",
"3 Willamette Valley Willamette Valley Pinot Noir \n",
"4 Bandol NaN Provence red blend \n",
"5 Toro NaN Tinta de Toro \n",
"6 Toro NaN Tinta de Toro \n",
"7 Toro NaN Tinta de Toro \n",
"8 Chehalem Mountains Willamette Valley Pinot Noir \n",
"9 Sonoma Coast Sonoma Pinot Noir \n",
"10 Collio NaN Friulano \n",
"11 Ribbon Ridge Willamette Valley Pinot Noir \n",
"12 Dundee Hills Willamette Valley Pinot Noir \n",
"13 Madiran NaN Tannat \n",
"14 Dundee Hills Willamette Valley Pinot Noir \n",
"15 Willamette Valley Willamette Valley Chardonnay \n",
"16 Diamond Mountain District Napa Cabernet Sauvignon \n",
"17 Ribera del Duero NaN Tempranillo \n",
"18 Cahors NaN Malbec \n",
"19 Sonoma Coast Sonoma Pinot Noir \n",
"20 Napa Valley Napa Rosé \n",
"21 Rioja NaN Tempranillo Blend \n",
"22 Toro NaN Tinta de Toro \n",
"23 Edna Valley Central Coast Chardonnay \n",
"24 Santa Cruz Mountains Central Coast Pinot Noir \n",
"25 NaN NaN Chardonnay \n",
"26 Willamette Valley NaN Pinot Noir \n",
"27 Willamette Valley Willamette Valley Pinot Noir \n",
"28 Santa Lucia Highlands Central Coast Pinot Noir \n",
"29 Walla Walla Valley (WA) Columbia Valley Syrah \n",
"\n",
" winery \n",
"0 Heitz \n",
"1 Bodega Carmen Rodríguez \n",
"2 Macauley \n",
"3 Ponzi \n",
"4 Domaine de la Bégude \n",
"5 Numanthia \n",
"6 Maurodos \n",
"7 Bodega Carmen Rodríguez \n",
"8 Bergström \n",
"9 Blue Farm \n",
"10 Borgo del Tiglio \n",
"11 Patricia Green Cellars \n",
"12 Patricia Green Cellars \n",
"13 Vignobles Brumont \n",
"14 Domaine Serene \n",
"15 Bergström \n",
"16 Hall \n",
"17 Valduero \n",
"18 Château Lagrézette \n",
"19 Gary Farrell \n",
"20 Heitz \n",
"21 Muga \n",
"22 Numanthia \n",
"23 Center of Effort \n",
"24 Comartin \n",
"25 Kumeu River \n",
"26 Bergström \n",
"27 Ponzi \n",
"28 Roar \n",
"29 Saviah "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_concat = pd.concat([data_1, data_2]) # Объединяем два датафрейма\n",
"df_concat # Отображаем объединенный датафрейм"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"id": "Y36JPmmyhkzd",
"outputId": "3c39bb6e-2353-4360-a4fa-c9dc0af22d51"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>country</th>\n",
" <th>description</th>\n",
" <th>designation</th>\n",
" <th>points</th>\n",
" <th>price</th>\n",
" <th>province</th>\n",
" <th>region_1</th>\n",
" <th>region_2</th>\n",
" <th>variety</th>\n",
" <th>winery</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>US</td>\n",
" <td>This tremendous 100% varietal wine hails from ...</td>\n",
" <td>Martha's Vineyard</td>\n",
" <td>96</td>\n",
" <td>235.0</td>\n",
" <td>California</td>\n",
" <td>Napa Valley</td>\n",
" <td>Napa</td>\n",
" <td>Cabernet Sauvignon</td>\n",
" <td>Heitz</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Spain</td>\n",
" <td>Ripe aromas of fig, blackberry and cassis are ...</td>\n",
" <td>Carodorum Selección Especial Reserva</td>\n",
" <td>96</td>\n",
" <td>110.0</td>\n",
" <td>Northern Spain</td>\n",
" <td>Toro</td>\n",
" <td>NaN</td>\n",
" <td>Tinta de Toro</td>\n",
" <td>Bodega Carmen Rodríguez</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>US</td>\n",
" <td>Mac Watson honors the memory of a wine once ma...</td>\n",
" <td>Special Selected Late Harvest</td>\n",
" <td>96</td>\n",
" <td>90.0</td>\n",
" <td>California</td>\n",
" <td>Knights Valley</td>\n",
" <td>Sonoma</td>\n",
" <td>Sauvignon Blanc</td>\n",
" <td>Macauley</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>US</td>\n",
" <td>This spent 20 months in 30% new French oak, an...</td>\n",
" <td>Reserve</td>\n",
" <td>96</td>\n",
" <td>65.0</td>\n",
" <td>Oregon</td>\n",
" <td>Willamette Valley</td>\n",
" <td>Willamette Valley</td>\n",
" <td>Pinot Noir</td>\n",
" <td>Ponzi</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>France</td>\n",
" <td>This is the top wine from La Bégude, named aft...</td>\n",
" <td>La Brûlade</td>\n",
" <td>95</td>\n",
" <td>66.0</td>\n",
" <td>Provence</td>\n",
" <td>Bandol</td>\n",
" <td>NaN</td>\n",
" <td>Provence red blend</td>\n",
" <td>Domaine de la Bégude</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" country description \\\n",
"0 US This tremendous 100% varietal wine hails from ... \n",
"1 Spain Ripe aromas of fig, blackberry and cassis are ... \n",
"2 US Mac Watson honors the memory of a wine once ma... \n",
"3 US This spent 20 months in 30% new French oak, an... \n",
"4 France This is the top wine from La Bégude, named aft... \n",
"\n",
" designation points price province \\\n",
"0 Martha's Vineyard 96 235.0 California \n",
"1 Carodorum Selección Especial Reserva 96 110.0 Northern Spain \n",
"2 Special Selected Late Harvest 96 90.0 California \n",
"3 Reserve 96 65.0 Oregon \n",
"4 La Brûlade 95 66.0 Provence \n",
"\n",
" region_1 region_2 variety \\\n",
"0 Napa Valley Napa Cabernet Sauvignon \n",
"1 Toro NaN Tinta de Toro \n",
"2 Knights Valley Sonoma Sauvignon Blanc \n",
"3 Willamette Valley Willamette Valley Pinot Noir \n",
"4 Bandol NaN Provence red blend \n",
"\n",
" winery \n",
"0 Heitz \n",
"1 Bodega Carmen Rodríguez \n",
"2 Macauley \n",
"3 Ponzi \n",
"4 Domaine de la Bégude "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_1.head() # Выводим первые 5 строк data_1"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 111
},
"id": "2H9dAxpQhkze",
"outputId": "06098996-d31b-4409-ddfa-773a3fa8bef2"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>country</th>\n",
" <th>description</th>\n",
" <th>designation</th>\n",
" <th>points</th>\n",
" <th>price</th>\n",
" <th>province</th>\n",
" <th>region_1</th>\n",
" <th>region_2</th>\n",
" <th>variety</th>\n",
" <th>winery</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>US</td>\n",
" <td>Keras</td>\n",
" <td>Martha's Vineyard</td>\n",
" <td>96</td>\n",
" <td>235.0</td>\n",
" <td>California</td>\n",
" <td>Napa Valley</td>\n",
" <td>Napa</td>\n",
" <td>Cabernet Sauvignon</td>\n",
" <td>Heitz</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Spain</td>\n",
" <td>Ripe aromas of fig, blackberry and cassis are ...</td>\n",
" <td>Carodorum Selección Especial Reserva</td>\n",
" <td>96</td>\n",
" <td>110.0</td>\n",
" <td>Northern Spain</td>\n",
" <td>Toro</td>\n",
" <td>NaN</td>\n",
" <td>Tinta de Toro</td>\n",
" <td>Bodega Carmen Rodríguez</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" country description \\\n",
"0 US Keras \n",
"1 Spain Ripe aromas of fig, blackberry and cassis are ... \n",
"\n",
" designation points price province \\\n",
"0 Martha's Vineyard 96 235.0 California \n",
"1 Carodorum Selección Especial Reserva 96 110.0 Northern Spain \n",
"\n",
" region_1 region_2 variety winery \n",
"0 Napa Valley Napa Cabernet Sauvignon Heitz \n",
"1 Toro NaN Tinta de Toro Bodega Carmen Rodríguez "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_1.iloc[0,1] = 'Keras' # Изменяем значение в ячейке data_1\n",
"data_1.head(2) # Выводим на экран 2 первые строки data_1"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 111
},
"id": "z_98pMQ9MHFm",
"outputId": "671b4223-d59a-4230-c0d2-7b3e33dc1ff4"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>country</th>\n",
" <th>description</th>\n",
" <th>designation</th>\n",
" <th>points</th>\n",
" <th>price</th>\n",
" <th>province</th>\n",
" <th>region_1</th>\n",
" <th>region_2</th>\n",
" <th>variety</th>\n",
" <th>winery</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>US</td>\n",
" <td>This tremendous 100% varietal wine hails from ...</td>\n",
" <td>Martha's Vineyard</td>\n",
" <td>96</td>\n",
" <td>235.0</td>\n",
" <td>California</td>\n",
" <td>Napa Valley</td>\n",
" <td>Napa</td>\n",
" <td>Cabernet Sauvignon</td>\n",
" <td>Heitz</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Spain</td>\n",
" <td>Ripe aromas of fig, blackberry and cassis are ...</td>\n",
" <td>Carodorum Selección Especial Reserva</td>\n",
" <td>96</td>\n",
" <td>110.0</td>\n",
" <td>Northern Spain</td>\n",
" <td>Toro</td>\n",
" <td>NaN</td>\n",
" <td>Tinta de Toro</td>\n",
" <td>Bodega Carmen Rodríguez</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" country description \\\n",
"0 US This tremendous 100% varietal wine hails from ... \n",
"1 Spain Ripe aromas of fig, blackberry and cassis are ... \n",
"\n",
" designation points price province \\\n",
"0 Martha's Vineyard 96 235.0 California \n",
"1 Carodorum Selección Especial Reserva 96 110.0 Northern Spain \n",
"\n",
" region_1 region_2 variety winery \n",
"0 Napa Valley Napa Cabernet Sauvignon Heitz \n",
"1 Toro NaN Tinta de Toro Bodega Carmen Rodríguez "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_concat.head(2) # Выводим на экран 2 первые строки df_concat"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"id": "0Chntu25WIF2"
},
"outputs": [],
"source": [
"## 2. Добавление столбцов и строк"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 173
},
"id": "niLdtd4xWMb0",
"outputId": "48bda55e-a9ef-4026-fbd1-70f9021f2b84"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" <th>D</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>9</td>\n",
" <td>10</td>\n",
" <td>11</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>14</td>\n",
" <td>15</td>\n",
" <td>16</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B C D\n",
"0 1 2 3 4\n",
"1 5 6 7 8\n",
"2 9 10 11 12\n",
"3 13 14 15 16"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"columns = ['A', 'B', 'C', 'D'] # Создаем список с названием столбцов\n",
"values = [[1,2,3,4], [5,6,7,8],[9,10,11,12], [13,14,15,16]] # Создаем список со значениями\n",
"df_for_add = pd.DataFrame(values, columns=columns) # Создаем датафрейм из сформированных значений\n",
"df_for_add # Выводим на экран датафрейм df_for_add"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 173
},
"id": "NGK40s7PWhcK",
"outputId": "2507bf8f-a895-4adf-e43c-a245664eac7d"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" <th>D</th>\n",
" <th>E</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>9</td>\n",
" <td>10</td>\n",
" <td>11</td>\n",
" <td>12</td>\n",
" <td>19</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>14</td>\n",
" <td>15</td>\n",
" <td>16</td>\n",
" <td>20</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B C D E\n",
"0 1 2 3 4 17\n",
"1 5 6 7 8 18\n",
"2 9 10 11 12 19\n",
"3 13 14 15 16 20"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_for_add['E'] = [17,18,19,20] # Добавляем столбец\n",
"df_for_add # Выводим на экран датафрейм df_for_add"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 173
},
"id": "WVt6bsnC9l90",
"outputId": "29cb192e-bb63-4128-b191-7ff683162b54"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" <th>D</th>\n",
" <th>E</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>9</td>\n",
" <td>10</td>\n",
" <td>11</td>\n",
" <td>12</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>14</td>\n",
" <td>15</td>\n",
" <td>16</td>\n",
" <td>27</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B C D E\n",
"0 1 2 3 4 17\n",
"1 5 6 7 8 17\n",
"2 9 10 11 12 17\n",
"3 13 14 15 16 27"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_for_add['E'] = [17,17,17,27] # Добавляем столбец\n",
"df_for_add # Выводим на экран датафрейм df_for_add"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"id": "T2UEW9MfW0pj",
"outputId": "de7a0376-366f-4621-f8fc-5828e76ac008"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" <th>D</th>\n",
" <th>E</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>9</td>\n",
" <td>10</td>\n",
" <td>11</td>\n",
" <td>12</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>14</td>\n",
" <td>15</td>\n",
" <td>16</td>\n",
" <td>27</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B C D E\n",
"0 1 2 3 4 17\n",
"1 5 6 7 8 17\n",
"2 9 10 11 12 17\n",
"3 13 14 15 16 27\n",
"4 1 2 3 4 5"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_for_add.loc[4] = [1,2,3,4,5] # Добавляем строку \n",
"df_for_add # Выводим на экран датафрейм df_for_add"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 235
},
"id": "ZFRI_F9L-CGs",
"outputId": "57289a3c-a004-4503-f353-5a3bc2cefb19"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" <th>D</th>\n",
" <th>E</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>9</td>\n",
" <td>10</td>\n",
" <td>11</td>\n",
" <td>12</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13</td>\n",
" <td>14</td>\n",
" <td>15</td>\n",
" <td>16</td>\n",
" <td>27</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B C D E\n",
"0 1 2 3 4 17\n",
"1 5 6 7 8 17\n",
"2 9 10 11 12 17\n",
"3 13 14 15 16 27\n",
"4 1 2 3 4 5\n",
"8 1 2 3 4 5"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_for_add.loc[8] = [1,2,3,4,5] # Добавляем строку \n",
"df_for_add # Выводим на экран датафрейм df_for_add"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"id": "Af1_T2N8jAfF"
},
"outputs": [],
"source": [
"## 3. Groupby"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"id": "3QFEHZdYjAew",
"outputId": "c51e1577-23d8-4707-cf6c-42fc94d0fd86"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>country</th>\n",
" <th>description</th>\n",
" <th>designation</th>\n",
" <th>points</th>\n",
" <th>price</th>\n",
" <th>province</th>\n",
" <th>region_1</th>\n",
" <th>region_2</th>\n",
" <th>variety</th>\n",
" <th>winery</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>US</td>\n",
" <td>This tremendous 100% varietal wine hails from ...</td>\n",
" <td>Martha's Vineyard</td>\n",
" <td>96</td>\n",
" <td>235.0</td>\n",
" <td>California</td>\n",
" <td>Napa Valley</td>\n",
" <td>Napa</td>\n",
" <td>Cabernet Sauvignon</td>\n",
" <td>Heitz</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Spain</td>\n",
" <td>Ripe aromas of fig, blackberry and cassis are ...</td>\n",
" <td>Carodorum Selección Especial Reserva</td>\n",
" <td>96</td>\n",
" <td>110.0</td>\n",
" <td>Northern Spain</td>\n",
" <td>Toro</td>\n",
" <td>NaN</td>\n",
" <td>Tinta de Toro</td>\n",
" <td>Bodega Carmen Rodríguez</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>US</td>\n",
" <td>Mac Watson honors the memory of a wine once ma...</td>\n",
" <td>Special Selected Late Harvest</td>\n",
" <td>96</td>\n",
" <td>90.0</td>\n",
" <td>California</td>\n",
" <td>Knights Valley</td>\n",
" <td>Sonoma</td>\n",
" <td>Sauvignon Blanc</td>\n",
" <td>Macauley</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>US</td>\n",
" <td>This spent 20 months in 30% new French oak, an...</td>\n",
" <td>Reserve</td>\n",
" <td>96</td>\n",
" <td>65.0</td>\n",
" <td>Oregon</td>\n",
" <td>Willamette Valley</td>\n",
" <td>Willamette Valley</td>\n",
" <td>Pinot Noir</td>\n",
" <td>Ponzi</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>France</td>\n",
" <td>This is the top wine from La Bégude, named aft...</td>\n",
" <td>La Brûlade</td>\n",
" <td>95</td>\n",
" <td>66.0</td>\n",
" <td>Provence</td>\n",
" <td>Bandol</td>\n",
" <td>NaN</td>\n",
" <td>Provence red blend</td>\n",
" <td>Domaine de la Bégude</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" country description \\\n",
"0 US This tremendous 100% varietal wine hails from ... \n",
"1 Spain Ripe aromas of fig, blackberry and cassis are ... \n",
"2 US Mac Watson honors the memory of a wine once ma... \n",
"3 US This spent 20 months in 30% new French oak, an... \n",
"4 France This is the top wine from La Bégude, named aft... \n",
"\n",
" designation points price province \\\n",
"0 Martha's Vineyard 96 235.0 California \n",
"1 Carodorum Selección Especial Reserva 96 110.0 Northern Spain \n",
"2 Special Selected Late Harvest 96 90.0 California \n",
"3 Reserve 96 65.0 Oregon \n",
"4 La Brûlade 95 66.0 Provence \n",
"\n",
" region_1 region_2 variety \\\n",
"0 Napa Valley Napa Cabernet Sauvignon \n",
"1 Toro NaN Tinta de Toro \n",
"2 Knights Valley Sonoma Sauvignon Blanc \n",
"3 Willamette Valley Willamette Valley Pinot Noir \n",
"4 Bandol NaN Provence red blend \n",
"\n",
" winery \n",
"0 Heitz \n",
"1 Bodega Carmen Rodríguez \n",
"2 Macauley \n",
"3 Ponzi \n",
"4 Domaine de la Bégude "
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head() # Выведем первые 5 строк data"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ItPspZfLjAfL"
},
"source": [
"Groupby - это очень важный и широко используемый метод. Позволяет делать группировку данных по какому-либо столбцу. \n",
"\n",
"Пример:\n",
"* посчитать средние баллы и цену в разрезе по странам и провинциям"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 235
},
"id": "UM89p_pcjAfO",
"outputId": "dee3be1a-1cf3-44cb-8af2-4390811c9cce"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/wx/b8w5bvpx5hjdgyvqlfp38x940000gn/T/ipykernel_15726/32687945.py:1: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" df_mean = data.groupby(['country']).mean() # Группируем данные по странам и считаем среднее значение (считается для числовых столбцов)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>points</th>\n",
" <th>price</th>\n",
" </tr>\n",
" <tr>\n",
" <th>country</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Albania</th>\n",
" <td>88.000000</td>\n",
" <td>20.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Argentina</th>\n",
" <td>85.996093</td>\n",
" <td>20.794881</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Australia</th>\n",
" <td>87.892475</td>\n",
" <td>31.258480</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Austria</th>\n",
" <td>89.276742</td>\n",
" <td>31.192106</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Bosnia and Herzegovina</th>\n",
" <td>84.750000</td>\n",
" <td>12.750000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" points price\n",
"country \n",
"Albania 88.000000 20.000000\n",
"Argentina 85.996093 20.794881\n",
"Australia 87.892475 31.258480\n",
"Austria 89.276742 31.192106\n",
"Bosnia and Herzegovina 84.750000 12.750000"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_mean = data.groupby(['country']).mean() # Группируем данные по странам и считаем среднее значение (считается для числовых столбцов)\n",
"df_mean.head() # Выводим первые 5 строк df_mean"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 269
},
"id": "xLhe1562suR1",
"outputId": "26e6853b-cb62-464f-9f4e-48f912c84429"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/wx/b8w5bvpx5hjdgyvqlfp38x940000gn/T/ipykernel_15726/30053901.py:1: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.\n",
" df_max = data.groupby('country')['points','price'].max() # Группируем данные по странам и считаем максимальное значение\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>points</th>\n",
" <th>price</th>\n",
" </tr>\n",
" <tr>\n",
" <th>country</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Albania</th>\n",
" <td>88</td>\n",
" <td>20.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Argentina</th>\n",
" <td>97</td>\n",
" <td>250.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Australia</th>\n",
" <td>100</td>\n",
" <td>850.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Austria</th>\n",
" <td>98</td>\n",
" <td>1100.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Bosnia and Herzegovina</th>\n",
" <td>88</td>\n",
" <td>13.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" points price\n",
"country \n",
"Albania 88 20.0\n",
"Argentina 97 250.0\n",
"Australia 100 850.0\n",
"Austria 98 1100.0\n",
"Bosnia and Herzegovina 88 13.0"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_max = data.groupby('country')['points','price'].max() # Группируем данные по странам и считаем максимальное значение\n",
"df_max.head() # # Выводим первые 5 строк df_max"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 390
},
"id": "6RifUrsujAfY",
"outputId": "ca2d153c-a2bc-4491-b6cd-c5ac1fa1f588"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>points</th>\n",
" <th>price</th>\n",
" </tr>\n",
" <tr>\n",
" <th>country</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>England</th>\n",
" <td>92.888889</td>\n",
" <td>47.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Austria</th>\n",
" <td>89.276742</td>\n",
" <td>31.192106</td>\n",
" </tr>\n",
" <tr>\n",
" <th>France</th>\n",
" <td>88.925870</td>\n",
" <td>45.619885</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Germany</th>\n",
" <td>88.626427</td>\n",
" <td>39.011078</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Italy</th>\n",
" <td>88.413664</td>\n",
" <td>37.547913</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Canada</th>\n",
" <td>88.239796</td>\n",
" <td>34.628866</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Slovenia</th>\n",
" <td>88.234043</td>\n",
" <td>28.061728</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Morocco</th>\n",
" <td>88.166667</td>\n",
" <td>18.833333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Turkey</th>\n",
" <td>88.096154</td>\n",
" <td>25.800000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Portugal</th>\n",
" <td>88.057685</td>\n",
" <td>26.332615</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" points price\n",
"country \n",
"England 92.888889 47.500000\n",
"Austria 89.276742 31.192106\n",
"France 88.925870 45.619885\n",
"Germany 88.626427 39.011078\n",
"Italy 88.413664 37.547913\n",
"Canada 88.239796 34.628866\n",
"Slovenia 88.234043 28.061728\n",
"Morocco 88.166667 18.833333\n",
"Turkey 88.096154 25.800000\n",
"Portugal 88.057685 26.332615"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# сортировка по столбцу points\n",
"sorted_data = df_mean.sort_values('points', ascending=False) # Сортируем датафрейм со средними значениями по убыванию points\n",
"sorted_data.head(10) # Выводим первые 10 строк sorted_data"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 390
},
"id": "_7_n-qrOjAfq",
"outputId": "ce1da65e-7c1b-4ad1-9634-96b814600ea1"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/wx/b8w5bvpx5hjdgyvqlfp38x940000gn/T/ipykernel_15726/1962610003.py:2: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" data.groupby(['country', 'region_1', 'region_2']).mean().head(10) # Выполняем группировку по трем столбцам (считаем среднее значение) и выводим первые 10 строк\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th>points</th>\n",
" <th>price</th>\n",
" </tr>\n",
" <tr>\n",
" <th>country</th>\n",
" <th>region_1</th>\n",
" <th>region_2</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"10\" valign=\"top\">US</th>\n",
" <th>Adelaida District</th>\n",
" <th>Central Coast</th>\n",
" <td>90.120000</td>\n",
" <td>48.560000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Alexander Valley</th>\n",
" <th>Sonoma</th>\n",
" <td>87.773486</td>\n",
" <td>35.901582</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Alta Mesa</th>\n",
" <th>Central Valley</th>\n",
" <td>87.181818</td>\n",
" <td>15.636364</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Amador County</th>\n",
" <th>Sierra Foothills</th>\n",
" <td>86.676790</td>\n",
" <td>24.341304</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Amador-Napa</th>\n",
" <th>California Other</th>\n",
" <td>84.000000</td>\n",
" <td>12.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Ancient Lakes</th>\n",
" <th>Columbia Valley</th>\n",
" <td>86.555556</td>\n",
" <td>18.777778</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Anderson Valley</th>\n",
" <th>Mendocino/Lake Counties</th>\n",
" <td>89.617470</td>\n",
" <td>40.626506</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Anderson Valley-Sonoma County-Cole Ranch</th>\n",
" <th>North Coast</th>\n",
" <td>87.000000</td>\n",
" <td>18.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Antelope Valley of the California High Desert</th>\n",
" <th>South Coast</th>\n",
" <td>89.000000</td>\n",
" <td>21.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Applegate Valley</th>\n",
" <th>Southern Oregon</th>\n",
" <td>87.392405</td>\n",
" <td>31.253165</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" points \\\n",
"country region_1 region_2 \n",
"US Adelaida District Central Coast 90.120000 \n",
" Alexander Valley Sonoma 87.773486 \n",
" Alta Mesa Central Valley 87.181818 \n",
" Amador County Sierra Foothills 86.676790 \n",
" Amador-Napa California Other 84.000000 \n",
" Ancient Lakes Columbia Valley 86.555556 \n",
" Anderson Valley Mendocino/Lake Counties 89.617470 \n",
" Anderson Valley-Sonoma County-Cole Ranch North Coast 87.000000 \n",
" Antelope Valley of the California High Desert South Coast 89.000000 \n",
" Applegate Valley Southern Oregon 87.392405 \n",
"\n",
" price \n",
"country region_1 region_2 \n",
"US Adelaida District Central Coast 48.560000 \n",
" Alexander Valley Sonoma 35.901582 \n",
" Alta Mesa Central Valley 15.636364 \n",
" Amador County Sierra Foothills 24.341304 \n",
" Amador-Napa California Other 12.000000 \n",
" Ancient Lakes Columbia Valley 18.777778 \n",
" Anderson Valley Mendocino/Lake Counties 40.626506 \n",
" Anderson Valley-Sonoma County-Cole Ranch North Coast 18.000000 \n",
" Antelope Valley of the California High Desert South Coast 21.000000 \n",
" Applegate Valley Southern Oregon 31.253165 "
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# группировка по трем столбцам\n",
"data.groupby(['country', 'region_1', 'region_2']).mean().head(10) # Выполняем группировку по трем столбцам (считаем среднее значение) и выводим первые 10 строк"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"id": "X6KDR8HPjAf3"
},
"outputs": [
{
"ename": "SyntaxError",
"evalue": "invalid syntax (1262463914.py, line 3)",
"output_type": "error",
"traceback": [
"\u001b[0;36m Cell \u001b[0;32mIn[25], line 3\u001b[0;36m\u001b[0m\n\u001b[0;31m Join - позволяет объединять несколько таблиц в единую по ключам (одинаковым столбцам).\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
]
}
],
"source": [
"## 4. Join\n",
"\n",
"Join - позволяет объединять несколько таблиц в единую по ключам (одинаковым столбцам). \n",
"\n",
"Join бывает разный:\n",
"* Left\n",
"* Right\n",
"* Inner\n",
"* FullJoin"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "fI0-DwH7jAf-"
},
"source": [
"Посмотрим на примерах, как это работает.\n",
"\n",
"Допустим, у нас есть два датафрейма:\n",
"* первый содержит информацию о студентах и ID курсах, которые они посещают,\n",
"* второй - описание курсов."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 235
},
"id": "8LQUr6fBjAgB",
"outputId": "a39750b8-76dc-4be6-da45-239a8690d30a"
},
"outputs": [],
"source": [
"# создаем первый датафрейм\n",
"data_1 = np.array([['100500', '200600', '100500', '300700', '200600', '500900'],\n",
" ['Анастасия', 'Екатерина', 'Светлана', 'Максим', 'Станислав', 'Данила'], \n",
" ['Андреева', 'Петрова', 'Иванова', 'Егоров', 'Лесницкий', 'Кудряшев']])\n",
"\n",
"names = pd.DataFrame(data_1.T, columns = ['subject_id', 'first_name', 'last_name']) # Создаем датафрейм из сформированных данных\n",
"names # Выводим датафрейм names на экран"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 173
},
"id": "gIFHRi2QjAgS",
"outputId": "5e70e778-0d82-4903-924e-51c52d56dce8"
},
"outputs": [],
"source": [
"# создаем второй датафрейм\n",
"data_2 = np.array([['100500', '200600', '300700', '400700'],\n",
" ['Математика', 'Программирование', 'Анализ данных', 'Мат. анализ'], \n",
" ['продвинутые методы математической статистики', 'расширенный курс по с++',\\\n",
" 'краткий курс для чайников', 'многомерный анализ']])\n",
"subjects = pd.DataFrame(data_2.T, columns = ['subject_id','course_name', 'description']) # Создаем датафрейм из сформированных данных\n",
"subjects # Выводим датафрейм subjects на экран"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ME_fbI0FjAge"
},
"source": [
"#### Делаем Join:\n",
"\n",
"Left Join"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 235
},
"id": "BgXi9IbYjAgg",
"outputId": "8718fb50-57cc-46a6-d258-cafc2806e61f"
},
"outputs": [],
"source": [
"names.merge(subjects, on='subject_id', how='left') # Объединяем два датафрейма по столбцу subject_id (join-left)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 235
},
"id": "oG6gcJB0_n8U",
"outputId": "9892e965-72d8-4c2f-aea9-2549271b3b8f"
},
"outputs": [],
"source": [
"pd.merge(names, subjects, on='subject_id', how='right') # Объединяем два датафрейма по столбцу subject_id (join-right)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "H4M4MQfsjAgm"
},
"source": [
"Inner Join"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"id": "Fr9FeDknjAgq",
"outputId": "eae99595-f9f3-4106-b81b-c6171911d70d"
},
"outputs": [],
"source": [
"names.merge(subjects, on='subject_id', how='inner') # Объединяем два датафрейма по столбцу subject_id (join-inner)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "vi9i7xeFjAhH"
},
"source": [
"Outer Join"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 266
},
"id": "2oCn_zOrjAhJ",
"outputId": "b8c898ff-c6b8-41e7-b6d6-fb8dce7210e5"
},
"outputs": [],
"source": [
"df = names.merge(subjects, on='subject_id', how='outer') # Объединяем два датафрейма по столбцу subject_id (join-outer)\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "8YCox3IxjAhW"
},
"source": [
"Join при разных названиях столбцов с ключом:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 235
},
"id": "oxIBiXBLjAhY",
"outputId": "78fedded-83a6-47e6-81cf-3e3866f11685"
},
"outputs": [],
"source": [
"names_2 = names.copy() # Создаем копию датафрейма names\n",
"cols = names.columns.tolist() # Получаем список названий столбцов\n",
"cols[0] = 'ID' # Меняем название нулевого столбца на 'ID'\n",
"names_2.columns = cols # Присваиваем датафрейму names_2 новые названия столбцов\n",
"names_2 # Выводим на экран датафрейм names_2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"id": "T68tbKU0jAhj",
"outputId": "0bd367c2-ad2f-4317-d815-7162fbddf35a"
},
"outputs": [],
"source": [
"# делаем join\n",
"pd.merge(names_2, subjects, left_on='ID', right_on='subject_id', how='inner') # Объединяем два датафрейма по столбцам ID и subject_id (join-inner)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Mg6fQ0UwjAhn"
},
"source": [
"Join по двум ключам:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "s_BZm0fbjAhq"
},
"outputs": [],
"source": [
"cols_a = ['A', 'B', 'value_1'] # Создаем список названий колонок cols_a\n",
"cols_b = ['A', 'B', 'value_2'] # Создаем список названий колонок cols_b\n",
"data_a = [[1, 1, 23], [1, 2, 34], [2, 1, 2342], [2, 2, 333]] # Создаем список значение data_a\n",
"data_b = [[1, 1, 0.1], [1, 2, 0.2], [2, 2, 0.13], [2, 3, 0.33]] # Создаем список значение data_b\n",
"df_a = pd.DataFrame(data_a, columns=cols_a) # Формируем датафрейм df_a из созданных ранее данных\n",
"df_b = pd.DataFrame(data_b, columns=cols_b) # Формируем датафрейм df_b из созданных ранее данных"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 173
},
"id": "I1bHptln_BWL",
"outputId": "dc229fc9-30dc-4cbf-910d-2f78b983d51f"
},
"outputs": [],
"source": [
"df_a # Выводим на экран датафрейм df_a"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 173
},
"id": "UKsQpT1vjAh3",
"outputId": "8200e7dd-b5bb-4e7d-e1b5-5d4c12d404f1"
},
"outputs": [],
"source": [
"df_b # Выводим на экран датафрейм df_b"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"id": "5MiuPPQZjAiE",
"outputId": "7a5164c9-b899-412a-ff05-1ab874d8dfe0"
},
"outputs": [],
"source": [
"df_a.merge(df_b, on=['A', 'B'], how='outer') # Объединяем два датафрейма по столбцам A и B (join-outer)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "6cRasbOXjAiS"
},
"source": [
"А join по одному ключу в данном случае выглядел бы так:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 297
},
"id": "GrTN-pM9jAiT",
"outputId": "03c0f18d-fdc7-48db-c6ec-d1d50b11f7f8"
},
"outputs": [],
"source": [
"df_a.merge(df_b, on='A', how='inner') # Объединяем два датафрейма по столбцу A (join-inner)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "9V7ArTkzjAia"
},
"source": [
"Или так:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 235
},
"id": "2S02OpysjAic",
"outputId": "74be38cf-e447-45bc-b926-5886a3753860"
},
"outputs": [],
"source": [
"df_a.merge(df_b, on='B', how='inner') # Объединяем два датафрейма по столбцу B (join-inner)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "reSNH-05jAil"
},
"outputs": [],
"source": [
"Или вот так по разным ключам:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 235
},
"id": "vwici4RdjAim",
"outputId": "72122abd-ea12-4fc3-bf40-ce65ed8b376b"
},
"outputs": [],
"source": [
"pd.merge(df_a, df_b, left_on='A', right_on='B', how='inner') # Объединяем два датафрейма по столбцам A и B (join-inner)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "6tg5a_WOjAit"
},
"source": [
"Заполнение пропусков в одном из столбцов:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 266
},
"id": "RsK9n3vVAgK-",
"outputId": "1b5bd072-4650-41e6-cf04-e270d5ac3cc6"
},
"outputs": [],
"source": [
"df # Выведем на экран датафрейм df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 266
},
"id": "eolkgC-0jAi2",
"outputId": "9bdae116-fbd9-4181-9042-674be4fe0a9b"
},
"outputs": [],
"source": [
"df.first_name = df.first_name.fillna('Не заполнено') # Заменяем пропуски в столбце first_name на значение 'Не заполнено'\n",
"df # Выводим на экран датафрейм df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 266
},
"id": "pWhjhgeBjAi9",
"outputId": "b9d16d37-9b18-4db5-d27f-a32e176743d3"
},
"outputs": [],
"source": [
"df.last_name = df.last_name.fillna('Пропущено') # Заменяем пропуски в столбце last_name на значение 'Пропущено'\n",
"df # Выводим на экран датафрейм df"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "XCtORa5gjAjK"
},
"source": [
"Во всей табличке:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 266
},
"id": "NC3cBEusjAjL",
"outputId": "241287e4-9036-43bd-f633-89ad814504a9"
},
"outputs": [],
"source": [
"df = df.fillna('Неизвестно') # Заменяем пропуски во всем датафрейме на значение 'Неизвестно'\n",
"df # Выводим на экран датафрейм df"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Zau_QVx8jAjV"
},
"source": [
"Разными значениями в разных столбцах:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 266
},
"id": "QFNCSrSGjAjV",
"outputId": "fb3975c1-e389-47a1-b591-6fc1f0cdaf08"
},
"outputs": [],
"source": [
"df = names.merge(subjects, on='subject_id', how='outer') # Объединяем два датафрейма по столбцу subject_id (join-outer)\n",
"df # Выводим на экран ДатаФрейм df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"id": "-Vc-1RmSjAje",
"outputId": "0c800ab1-2ae5-4768-8fda-f1a288221c33"
},
"outputs": [],
"source": [
"val = ['Не заполнено', 'Пропущено', 'Неизвестный курс', 'Неизвестное описание'] # Создаем список со значениями, которые будут использоваться вместо пропусков\n",
"cols = df.columns.tolist() # Получаем названия столбцов датафрейма df\n",
"cols.pop(0) # Удаляем 0 столбец (соответствует subject_id)\n",
"print(cols) # Выводим на экран список cols"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 85
},
"id": "sS1LJWsrjAjm",
"outputId": "a01124db-f1f9-4d25-8f28-aabbb3617424"
},
"outputs": [],
"source": [
"# создаем словарь с меппингом названий столбцов к значениям, какими нужно заполнить в них пропуски\n",
"dict_to_fill = dict(zip(cols, val)) # Создаем словарь, в котором в качестве ключей будут значения списка cols, а в качестве соответствующих значений - значения списка val\n",
"dict_to_fill # Выводим на экран словарь dict_to_fill"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 266
},
"id": "9JythrBSjAjp",
"outputId": "32c04fc8-b218-40b4-bfd4-ea79cbe893b8"
},
"outputs": [],
"source": [
"df = df.fillna(dict_to_fill) # Заменяем пропуски в соответствии со словарем dict_to_fill\n",
"df # Выводим на экран датафрейм df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "5_aJnqtHjAjs"
},
"outputs": [],
"source": [
"## 5. DateTime index\n",
"\n",
"DateTime Index - это особый тип индекса в pandas, предназначенный для работы с временными рядами. \n",
"Давайте познакомимся с ним поближе. Для этой цели мы подгрузим известный стандартный датасет \"Occupancy Detection Data Set\", \n",
"он содержит следующие данные:\n",
"* дата и время замера\n",
"* температура в градусах по Цельсию\n",
"* относительная влажность в % \n",
"* освещенность в Люксах\n",
"* CO2 в ppm \n",
"* коэффициент влажности\n",
"* заполненность (1 - заполнено, 0 - не заполнено)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"id": "xICkBqa4jAjt",
"outputId": "37737fb0-d0e6-4bff-89b4-8c83c179886c"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>Temperature</th>\n",
" <th>Humidity</th>\n",
" <th>Light</th>\n",
" <th>CO2</th>\n",
" <th>HumidityRatio</th>\n",
" <th>Occupancy</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2015-02-04 17:51:00</td>\n",
" <td>23.18</td>\n",
" <td>27.2720</td>\n",
" <td>426.0</td>\n",
" <td>721.25</td>\n",
" <td>0.004793</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2015-02-04 17:51:59</td>\n",
" <td>23.15</td>\n",
" <td>27.2675</td>\n",
" <td>429.5</td>\n",
" <td>714.00</td>\n",
" <td>0.004783</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2015-02-04 17:53:00</td>\n",
" <td>23.15</td>\n",
" <td>27.2450</td>\n",
" <td>426.0</td>\n",
" <td>713.50</td>\n",
" <td>0.004779</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2015-02-04 17:54:00</td>\n",
" <td>23.15</td>\n",
" <td>27.2000</td>\n",
" <td>426.0</td>\n",
" <td>708.25</td>\n",
" <td>0.004772</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>2015-02-04 17:55:00</td>\n",
" <td>23.10</td>\n",
" <td>27.2000</td>\n",
" <td>426.0</td>\n",
" <td>704.50</td>\n",
" <td>0.004757</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" date Temperature Humidity Light CO2 HumidityRatio \\\n",
"1 2015-02-04 17:51:00 23.18 27.2720 426.0 721.25 0.004793 \n",
"2 2015-02-04 17:51:59 23.15 27.2675 429.5 714.00 0.004783 \n",
"3 2015-02-04 17:53:00 23.15 27.2450 426.0 713.50 0.004779 \n",
"4 2015-02-04 17:54:00 23.15 27.2000 426.0 708.25 0.004772 \n",
"5 2015-02-04 17:55:00 23.10 27.2000 426.0 704.50 0.004757 \n",
"\n",
" Occupancy \n",
"1 1 \n",
"2 1 \n",
"3 1 \n",
"4 1 \n",
"5 1 "
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataTraining = pd.read_csv('datatraining.txt') # Создаем датафрейм из файла datatraining.txt\n",
"dataTraining.head() # Отображаем первые 5 строк датафрейма dataTraining"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "eQHTSHF-jAjw"
},
"source": [
"Давайте превратим колонку с датой в datetime index:"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 235
},
"id": "zcJ96Gz4jAjx",
"outputId": "6929acd1-b62d-401b-c95f-994f43bc9af9"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>Temperature</th>\n",
" <th>Humidity</th>\n",
" <th>Light</th>\n",
" <th>CO2</th>\n",
" <th>HumidityRatio</th>\n",
" <th>Occupancy</th>\n",
" </tr>\n",
" <tr>\n",
" <th>date</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2015-02-04 17:51:00</th>\n",
" <td>2015-02-04 17:51:00</td>\n",
" <td>23.18</td>\n",
" <td>27.2720</td>\n",
" <td>426.0</td>\n",
" <td>721.25</td>\n",
" <td>0.004793</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-04 17:51:59</th>\n",
" <td>2015-02-04 17:51:59</td>\n",
" <td>23.15</td>\n",
" <td>27.2675</td>\n",
" <td>429.5</td>\n",
" <td>714.00</td>\n",
" <td>0.004783</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-04 17:53:00</th>\n",
" <td>2015-02-04 17:53:00</td>\n",
" <td>23.15</td>\n",
" <td>27.2450</td>\n",
" <td>426.0</td>\n",
" <td>713.50</td>\n",
" <td>0.004779</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-04 17:54:00</th>\n",
" <td>2015-02-04 17:54:00</td>\n",
" <td>23.15</td>\n",
" <td>27.2000</td>\n",
" <td>426.0</td>\n",
" <td>708.25</td>\n",
" <td>0.004772</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-04 17:55:00</th>\n",
" <td>2015-02-04 17:55:00</td>\n",
" <td>23.10</td>\n",
" <td>27.2000</td>\n",
" <td>426.0</td>\n",
" <td>704.50</td>\n",
" <td>0.004757</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" date Temperature Humidity Light \\\n",
"date \n",
"2015-02-04 17:51:00 2015-02-04 17:51:00 23.18 27.2720 426.0 \n",
"2015-02-04 17:51:59 2015-02-04 17:51:59 23.15 27.2675 429.5 \n",
"2015-02-04 17:53:00 2015-02-04 17:53:00 23.15 27.2450 426.0 \n",
"2015-02-04 17:54:00 2015-02-04 17:54:00 23.15 27.2000 426.0 \n",
"2015-02-04 17:55:00 2015-02-04 17:55:00 23.10 27.2000 426.0 \n",
"\n",
" CO2 HumidityRatio Occupancy \n",
"date \n",
"2015-02-04 17:51:00 721.25 0.004793 1 \n",
"2015-02-04 17:51:59 714.00 0.004783 1 \n",
"2015-02-04 17:53:00 713.50 0.004779 1 \n",
"2015-02-04 17:54:00 708.25 0.004772 1 \n",
"2015-02-04 17:55:00 704.50 0.004757 1 "
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataTraining.index = pd.to_datetime(dataTraining.date) # Превращаем колонку date в индекс\n",
"dataTraining.head() # Отображаем первые 5 строк датафрейма dataTraining"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 235
},
"id": "1Yb4I5iSEyag",
"outputId": "c00a1497-f528-4ab2-b267-5b7d5d315491"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Temperature</th>\n",
" <th>Humidity</th>\n",
" <th>Light</th>\n",
" <th>CO2</th>\n",
" <th>HumidityRatio</th>\n",
" <th>Occupancy</th>\n",
" </tr>\n",
" <tr>\n",
" <th>date</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2015-02-04 17:51:00</th>\n",
" <td>23.18</td>\n",
" <td>27.2720</td>\n",
" <td>426.0</td>\n",
" <td>721.25</td>\n",
" <td>0.004793</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-04 17:51:59</th>\n",
" <td>23.15</td>\n",
" <td>27.2675</td>\n",
" <td>429.5</td>\n",
" <td>714.00</td>\n",
" <td>0.004783</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-04 17:53:00</th>\n",
" <td>23.15</td>\n",
" <td>27.2450</td>\n",
" <td>426.0</td>\n",
" <td>713.50</td>\n",
" <td>0.004779</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-04 17:54:00</th>\n",
" <td>23.15</td>\n",
" <td>27.2000</td>\n",
" <td>426.0</td>\n",
" <td>708.25</td>\n",
" <td>0.004772</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-04 17:55:00</th>\n",
" <td>23.10</td>\n",
" <td>27.2000</td>\n",
" <td>426.0</td>\n",
" <td>704.50</td>\n",
" <td>0.004757</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Temperature Humidity Light CO2 HumidityRatio \\\n",
"date \n",
"2015-02-04 17:51:00 23.18 27.2720 426.0 721.25 0.004793 \n",
"2015-02-04 17:51:59 23.15 27.2675 429.5 714.00 0.004783 \n",
"2015-02-04 17:53:00 23.15 27.2450 426.0 713.50 0.004779 \n",
"2015-02-04 17:54:00 23.15 27.2000 426.0 708.25 0.004772 \n",
"2015-02-04 17:55:00 23.10 27.2000 426.0 704.50 0.004757 \n",
"\n",
" Occupancy \n",
"date \n",
"2015-02-04 17:51:00 1 \n",
"2015-02-04 17:51:59 1 \n",
"2015-02-04 17:53:00 1 \n",
"2015-02-04 17:54:00 1 \n",
"2015-02-04 17:55:00 1 "
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataTraining.drop('date', axis=1, inplace=True) # Удаляем колонку date из датафрейма\n",
"dataTraining.head() # Отображаем первые 5 строк датафрейма dataTraining"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "9xZ3hSlDjAj7"
},
"source": [
"Давайте посмотрим, что можно делать с данными, имеющими временной индекс."
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "3_YBy_F0jAj8"
},
"source": [
"### 1. Подвыборки"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "1aYyxuXUjAj-",
"outputId": "f6584641-0f32-461a-f20b-3e7372f340bc"
},
"outputs": [
{
"data": {
"text/plain": [
"DatetimeIndex(['2015-02-04 17:51:00', '2015-02-04 17:51:59',\n",
" '2015-02-04 17:53:00', '2015-02-04 17:54:00',\n",
" '2015-02-04 17:55:00', '2015-02-04 17:55:59',\n",
" '2015-02-04 17:57:00', '2015-02-04 17:57:59',\n",
" '2015-02-04 17:58:59', '2015-02-04 18:00:00',\n",
" ...\n",
" '2015-02-10 09:23:59', '2015-02-10 09:24:59',\n",
" '2015-02-10 09:26:00', '2015-02-10 09:27:00',\n",
" '2015-02-10 09:28:00', '2015-02-10 09:29:00',\n",
" '2015-02-10 09:29:59', '2015-02-10 09:30:59',\n",
" '2015-02-10 09:32:00', '2015-02-10 09:33:00'],\n",
" dtype='datetime64[ns]', name='date', length=8143, freq=None)"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataTraining.index # Выведем на экран колонку индексов"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 450
},
"id": "aFOWuFuojAkC",
"outputId": "b611086c-2f28-4d4f-f0f4-d177f971fa71"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Temperature</th>\n",
" <th>Humidity</th>\n",
" <th>Light</th>\n",
" <th>CO2</th>\n",
" <th>HumidityRatio</th>\n",
" <th>Occupancy</th>\n",
" </tr>\n",
" <tr>\n",
" <th>date</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2015-02-04 17:51:00</th>\n",
" <td>23.18</td>\n",
" <td>27.2720</td>\n",
" <td>426.0</td>\n",
" <td>721.25</td>\n",
" <td>0.004793</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-04 17:51:59</th>\n",
" <td>23.15</td>\n",
" <td>27.2675</td>\n",
" <td>429.5</td>\n",
" <td>714.00</td>\n",
" <td>0.004783</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-04 17:53:00</th>\n",
" <td>23.15</td>\n",
" <td>27.2450</td>\n",
" <td>426.0</td>\n",
" <td>713.50</td>\n",
" <td>0.004779</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-04 17:54:00</th>\n",
" <td>23.15</td>\n",
" <td>27.2000</td>\n",
" <td>426.0</td>\n",
" <td>708.25</td>\n",
" <td>0.004772</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-04 17:55:00</th>\n",
" <td>23.10</td>\n",
" <td>27.2000</td>\n",
" <td>426.0</td>\n",
" <td>704.50</td>\n",
" <td>0.004757</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-06 23:55:59</th>\n",
" <td>20.00</td>\n",
" <td>18.7450</td>\n",
" <td>0.0</td>\n",
" <td>435.00</td>\n",
" <td>0.002703</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-06 23:57:00</th>\n",
" <td>20.00</td>\n",
" <td>18.7000</td>\n",
" <td>0.0</td>\n",
" <td>441.00</td>\n",
" <td>0.002696</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-06 23:57:59</th>\n",
" <td>20.00</td>\n",
" <td>18.7000</td>\n",
" <td>0.0</td>\n",
" <td>441.00</td>\n",
" <td>0.002696</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-06 23:58:59</th>\n",
" <td>20.00</td>\n",
" <td>18.7000</td>\n",
" <td>0.0</td>\n",
" <td>440.00</td>\n",
" <td>0.002696</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-07 00:00:00</th>\n",
" <td>20.00</td>\n",
" <td>18.7000</td>\n",
" <td>0.0</td>\n",
" <td>438.00</td>\n",
" <td>0.002696</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3250 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" Temperature Humidity Light CO2 HumidityRatio \\\n",
"date \n",
"2015-02-04 17:51:00 23.18 27.2720 426.0 721.25 0.004793 \n",
"2015-02-04 17:51:59 23.15 27.2675 429.5 714.00 0.004783 \n",
"2015-02-04 17:53:00 23.15 27.2450 426.0 713.50 0.004779 \n",
"2015-02-04 17:54:00 23.15 27.2000 426.0 708.25 0.004772 \n",
"2015-02-04 17:55:00 23.10 27.2000 426.0 704.50 0.004757 \n",
"... ... ... ... ... ... \n",
"2015-02-06 23:55:59 20.00 18.7450 0.0 435.00 0.002703 \n",
"2015-02-06 23:57:00 20.00 18.7000 0.0 441.00 0.002696 \n",
"2015-02-06 23:57:59 20.00 18.7000 0.0 441.00 0.002696 \n",
"2015-02-06 23:58:59 20.00 18.7000 0.0 440.00 0.002696 \n",
"2015-02-07 00:00:00 20.00 18.7000 0.0 438.00 0.002696 \n",
"\n",
" Occupancy \n",
"date \n",
"2015-02-04 17:51:00 1 \n",
"2015-02-04 17:51:59 1 \n",
"2015-02-04 17:53:00 1 \n",
"2015-02-04 17:54:00 1 \n",
"2015-02-04 17:55:00 1 \n",
"... ... \n",
"2015-02-06 23:55:59 0 \n",
"2015-02-06 23:57:00 0 \n",
"2015-02-06 23:57:59 0 \n",
"2015-02-06 23:58:59 0 \n",
"2015-02-07 00:00:00 0 \n",
"\n",
"[3250 rows x 6 columns]"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"period = dataTraining[(dataTraining.index>'2015-02-04') & (dataTraining.index<='2015-02-07')] # Формируем новый датафрейм period, в который запишем все данные с 4 по 7 февраля 2015 года\n",
"period # Выведем на экран датафрейм period"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 450
},
"id": "rcz2NirgjAkI",
"outputId": "5d4d6f64-5ac9-442e-cbc0-27ae3e6d67c6"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Temperature</th>\n",
" <th>Humidity</th>\n",
" <th>Light</th>\n",
" <th>CO2</th>\n",
" <th>HumidityRatio</th>\n",
" <th>Occupancy</th>\n",
" </tr>\n",
" <tr>\n",
" <th>date</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2015-02-04 17:51:00</th>\n",
" <td>23.18</td>\n",
" <td>27.2720</td>\n",
" <td>426.0</td>\n",
" <td>721.250000</td>\n",
" <td>0.004793</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-04 17:51:59</th>\n",
" <td>23.15</td>\n",
" <td>27.2675</td>\n",
" <td>429.5</td>\n",
" <td>714.000000</td>\n",
" <td>0.004783</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-04 17:53:00</th>\n",
" <td>23.15</td>\n",
" <td>27.2450</td>\n",
" <td>426.0</td>\n",
" <td>713.500000</td>\n",
" <td>0.004779</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-04 17:54:00</th>\n",
" <td>23.15</td>\n",
" <td>27.2000</td>\n",
" <td>426.0</td>\n",
" <td>708.250000</td>\n",
" <td>0.004772</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-04 17:55:00</th>\n",
" <td>23.10</td>\n",
" <td>27.2000</td>\n",
" <td>426.0</td>\n",
" <td>704.500000</td>\n",
" <td>0.004757</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-10 09:29:00</th>\n",
" <td>21.05</td>\n",
" <td>36.0975</td>\n",
" <td>433.0</td>\n",
" <td>787.250000</td>\n",
" <td>0.005579</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-10 09:29:59</th>\n",
" <td>21.05</td>\n",
" <td>35.9950</td>\n",
" <td>433.0</td>\n",
" <td>789.500000</td>\n",
" <td>0.005563</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-10 09:30:59</th>\n",
" <td>21.10</td>\n",
" <td>36.0950</td>\n",
" <td>433.0</td>\n",
" <td>798.500000</td>\n",
" <td>0.005596</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-10 09:32:00</th>\n",
" <td>21.10</td>\n",
" <td>36.2600</td>\n",
" <td>433.0</td>\n",
" <td>820.333333</td>\n",
" <td>0.005621</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-10 09:33:00</th>\n",
" <td>21.10</td>\n",
" <td>36.2000</td>\n",
" <td>447.0</td>\n",
" <td>821.000000</td>\n",
" <td>0.005612</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>8143 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" Temperature Humidity Light CO2 HumidityRatio \\\n",
"date \n",
"2015-02-04 17:51:00 23.18 27.2720 426.0 721.250000 0.004793 \n",
"2015-02-04 17:51:59 23.15 27.2675 429.5 714.000000 0.004783 \n",
"2015-02-04 17:53:00 23.15 27.2450 426.0 713.500000 0.004779 \n",
"2015-02-04 17:54:00 23.15 27.2000 426.0 708.250000 0.004772 \n",
"2015-02-04 17:55:00 23.10 27.2000 426.0 704.500000 0.004757 \n",
"... ... ... ... ... ... \n",
"2015-02-10 09:29:00 21.05 36.0975 433.0 787.250000 0.005579 \n",
"2015-02-10 09:29:59 21.05 35.9950 433.0 789.500000 0.005563 \n",
"2015-02-10 09:30:59 21.10 36.0950 433.0 798.500000 0.005596 \n",
"2015-02-10 09:32:00 21.10 36.2600 433.0 820.333333 0.005621 \n",
"2015-02-10 09:33:00 21.10 36.2000 447.0 821.000000 0.005612 \n",
"\n",
" Occupancy \n",
"date \n",
"2015-02-04 17:51:00 1 \n",
"2015-02-04 17:51:59 1 \n",
"2015-02-04 17:53:00 1 \n",
"2015-02-04 17:54:00 1 \n",
"2015-02-04 17:55:00 1 \n",
"... ... \n",
"2015-02-10 09:29:00 1 \n",
"2015-02-10 09:29:59 1 \n",
"2015-02-10 09:30:59 1 \n",
"2015-02-10 09:32:00 1 \n",
"2015-02-10 09:33:00 1 \n",
"\n",
"[8143 rows x 6 columns]"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"month_set = dataTraining[dataTraining.index.month == 2] # Формируем новый датафрейм month_set, в который запишем все данные по февралю любого года\n",
"month_set # Выведем на экран датафрейм month_set"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 450
},
"id": "QjMdNFlHjAkM",
"outputId": "6bbb6eee-1c3e-4309-ba55-77fa0ef0cb12"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Temperature</th>\n",
" <th>Humidity</th>\n",
" <th>Light</th>\n",
" <th>CO2</th>\n",
" <th>HumidityRatio</th>\n",
" <th>Occupancy</th>\n",
" </tr>\n",
" <tr>\n",
" <th>date</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2015-02-07 13:00:00</th>\n",
" <td>23.10</td>\n",
" <td>17.060000</td>\n",
" <td>268.000000</td>\n",
" <td>445.250000</td>\n",
" <td>0.002975</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-07 13:01:00</th>\n",
" <td>23.10</td>\n",
" <td>17.033333</td>\n",
" <td>268.250000</td>\n",
" <td>447.500000</td>\n",
" <td>0.002970</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-07 13:01:59</th>\n",
" <td>23.10</td>\n",
" <td>17.066667</td>\n",
" <td>266.000000</td>\n",
" <td>446.333333</td>\n",
" <td>0.002976</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-07 13:02:59</th>\n",
" <td>23.10</td>\n",
" <td>17.000000</td>\n",
" <td>270.250000</td>\n",
" <td>453.500000</td>\n",
" <td>0.002965</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-07 13:04:00</th>\n",
" <td>23.10</td>\n",
" <td>16.972500</td>\n",
" <td>268.500000</td>\n",
" <td>449.250000</td>\n",
" <td>0.002960</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-07 13:56:00</th>\n",
" <td>22.70</td>\n",
" <td>17.050000</td>\n",
" <td>144.750000</td>\n",
" <td>442.500000</td>\n",
" <td>0.002902</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-07 13:57:00</th>\n",
" <td>22.70</td>\n",
" <td>17.100000</td>\n",
" <td>115.000000</td>\n",
" <td>443.750000</td>\n",
" <td>0.002910</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-07 13:58:00</th>\n",
" <td>22.65</td>\n",
" <td>17.100000</td>\n",
" <td>131.000000</td>\n",
" <td>445.000000</td>\n",
" <td>0.002902</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-07 13:59:00</th>\n",
" <td>22.60</td>\n",
" <td>17.100000</td>\n",
" <td>140.000000</td>\n",
" <td>443.250000</td>\n",
" <td>0.002893</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-07 13:59:59</th>\n",
" <td>22.60</td>\n",
" <td>17.100000</td>\n",
" <td>167.666667</td>\n",
" <td>444.000000</td>\n",
" <td>0.002893</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>61 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" Temperature Humidity Light CO2 \\\n",
"date \n",
"2015-02-07 13:00:00 23.10 17.060000 268.000000 445.250000 \n",
"2015-02-07 13:01:00 23.10 17.033333 268.250000 447.500000 \n",
"2015-02-07 13:01:59 23.10 17.066667 266.000000 446.333333 \n",
"2015-02-07 13:02:59 23.10 17.000000 270.250000 453.500000 \n",
"2015-02-07 13:04:00 23.10 16.972500 268.500000 449.250000 \n",
"... ... ... ... ... \n",
"2015-02-07 13:56:00 22.70 17.050000 144.750000 442.500000 \n",
"2015-02-07 13:57:00 22.70 17.100000 115.000000 443.750000 \n",
"2015-02-07 13:58:00 22.65 17.100000 131.000000 445.000000 \n",
"2015-02-07 13:59:00 22.60 17.100000 140.000000 443.250000 \n",
"2015-02-07 13:59:59 22.60 17.100000 167.666667 444.000000 \n",
"\n",
" HumidityRatio Occupancy \n",
"date \n",
"2015-02-07 13:00:00 0.002975 0 \n",
"2015-02-07 13:01:00 0.002970 0 \n",
"2015-02-07 13:01:59 0.002976 0 \n",
"2015-02-07 13:02:59 0.002965 0 \n",
"2015-02-07 13:04:00 0.002960 0 \n",
"... ... ... \n",
"2015-02-07 13:56:00 0.002902 0 \n",
"2015-02-07 13:57:00 0.002910 0 \n",
"2015-02-07 13:58:00 0.002902 0 \n",
"2015-02-07 13:59:00 0.002893 0 \n",
"2015-02-07 13:59:59 0.002893 0 \n",
"\n",
"[61 rows x 6 columns]"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"day_set = dataTraining[(dataTraining.index.weekday == 5) & (dataTraining.index.hour == 13)] # Формируем новый датафрейм day_set, в который запишем данные за все субботы в промежуток с 13-00 до 13-59\n",
"day_set # Выведем на экран датафрейм day_set\n",
"# Значения индексов index.weekday может принимать значения от 0 до 6, где 0 - понедельник, 1 - вторник и т.д.\n",
"# Значения индексов index.day может принимать значения от 1 до 31, в соответствии с числом месяца."
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 450
},
"id": "TBnaueJ_jAkQ",
"outputId": "dcd979d7-c575-4b42-c172-73b294093857"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/wx/b8w5bvpx5hjdgyvqlfp38x940000gn/T/ipykernel_15726/247725088.py:1: FutureWarning: Indexing a DataFrame with a datetimelike index using a single string to slice the rows, like `frame[string]`, is deprecated and will be removed in a future version. Use `frame.loc[string]` instead.\n",
" dataTraining['2015-02-06'] # Отобразим данные за 6 февраля 2015 года\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Temperature</th>\n",
" <th>Humidity</th>\n",
" <th>Light</th>\n",
" <th>CO2</th>\n",
" <th>HumidityRatio</th>\n",
" <th>Occupancy</th>\n",
" </tr>\n",
" <tr>\n",
" <th>date</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2015-02-06 00:00:00</th>\n",
" <td>20.20</td>\n",
" <td>21.290</td>\n",
" <td>0.0</td>\n",
" <td>438.0</td>\n",
" <td>0.003110</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-06 00:01:00</th>\n",
" <td>20.20</td>\n",
" <td>21.200</td>\n",
" <td>0.0</td>\n",
" <td>439.0</td>\n",
" <td>0.003097</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-06 00:02:00</th>\n",
" <td>20.20</td>\n",
" <td>21.290</td>\n",
" <td>0.0</td>\n",
" <td>441.5</td>\n",
" <td>0.003110</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-06 00:03:00</th>\n",
" <td>20.20</td>\n",
" <td>21.290</td>\n",
" <td>0.0</td>\n",
" <td>444.0</td>\n",
" <td>0.003110</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-06 00:04:00</th>\n",
" <td>20.20</td>\n",
" <td>21.290</td>\n",
" <td>0.0</td>\n",
" <td>446.5</td>\n",
" <td>0.003110</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-06 23:55:00</th>\n",
" <td>19.89</td>\n",
" <td>18.790</td>\n",
" <td>0.0</td>\n",
" <td>441.5</td>\n",
" <td>0.002691</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-06 23:55:59</th>\n",
" <td>20.00</td>\n",
" <td>18.745</td>\n",
" <td>0.0</td>\n",
" <td>435.0</td>\n",
" <td>0.002703</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-06 23:57:00</th>\n",
" <td>20.00</td>\n",
" <td>18.700</td>\n",
" <td>0.0</td>\n",
" <td>441.0</td>\n",
" <td>0.002696</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-06 23:57:59</th>\n",
" <td>20.00</td>\n",
" <td>18.700</td>\n",
" <td>0.0</td>\n",
" <td>441.0</td>\n",
" <td>0.002696</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-06 23:58:59</th>\n",
" <td>20.00</td>\n",
" <td>18.700</td>\n",
" <td>0.0</td>\n",
" <td>440.0</td>\n",
" <td>0.002696</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1440 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" Temperature Humidity Light CO2 HumidityRatio \\\n",
"date \n",
"2015-02-06 00:00:00 20.20 21.290 0.0 438.0 0.003110 \n",
"2015-02-06 00:01:00 20.20 21.200 0.0 439.0 0.003097 \n",
"2015-02-06 00:02:00 20.20 21.290 0.0 441.5 0.003110 \n",
"2015-02-06 00:03:00 20.20 21.290 0.0 444.0 0.003110 \n",
"2015-02-06 00:04:00 20.20 21.290 0.0 446.5 0.003110 \n",
"... ... ... ... ... ... \n",
"2015-02-06 23:55:00 19.89 18.790 0.0 441.5 0.002691 \n",
"2015-02-06 23:55:59 20.00 18.745 0.0 435.0 0.002703 \n",
"2015-02-06 23:57:00 20.00 18.700 0.0 441.0 0.002696 \n",
"2015-02-06 23:57:59 20.00 18.700 0.0 441.0 0.002696 \n",
"2015-02-06 23:58:59 20.00 18.700 0.0 440.0 0.002696 \n",
"\n",
" Occupancy \n",
"date \n",
"2015-02-06 00:00:00 0 \n",
"2015-02-06 00:01:00 0 \n",
"2015-02-06 00:02:00 0 \n",
"2015-02-06 00:03:00 0 \n",
"2015-02-06 00:04:00 0 \n",
"... ... \n",
"2015-02-06 23:55:00 0 \n",
"2015-02-06 23:55:59 0 \n",
"2015-02-06 23:57:00 0 \n",
"2015-02-06 23:57:59 0 \n",
"2015-02-06 23:58:59 0 \n",
"\n",
"[1440 rows x 6 columns]"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataTraining['2015-02-06'] # Отобразим данные за 6 февраля 2015 года"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 142
},
"id": "350rNvXEjAkS",
"outputId": "378a259e-82e2-4826-eac0-8db00d00c9d8"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Temperature</th>\n",
" <th>Humidity</th>\n",
" <th>Light</th>\n",
" <th>CO2</th>\n",
" <th>HumidityRatio</th>\n",
" <th>Occupancy</th>\n",
" </tr>\n",
" <tr>\n",
" <th>date</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2015-02-08 14:55:00</th>\n",
" <td>20.290</td>\n",
" <td>26.2</td>\n",
" <td>85.666667</td>\n",
" <td>418.666667</td>\n",
" <td>0.003853</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02-08 14:55:59</th>\n",
" <td>20.315</td>\n",
" <td>26.2</td>\n",
" <td>59.250000</td>\n",
" <td>422.500000</td>\n",
" <td>0.003859</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Temperature Humidity Light CO2 \\\n",
"date \n",
"2015-02-08 14:55:00 20.290 26.2 85.666667 418.666667 \n",
"2015-02-08 14:55:59 20.315 26.2 59.250000 422.500000 \n",
"\n",
" HumidityRatio Occupancy \n",
"date \n",
"2015-02-08 14:55:00 0.003853 0 \n",
"2015-02-08 14:55:59 0.003859 0 "
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataTraining['2015-02-08 14:55':'2015-02-08 14:56'] # Отобразим данные, начиная с 14:55 и до 14:56 8 февраля 2015 года"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "C8xqSqQujAmh"
},
"source": [
"## 6. Встроенные статистические функции\n",
"\n",
"Кроме уже известных и понятных нам суммы и среднего, есть и другие функции. Остановимся на них подробнее."
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"id": "hR8AlUzejAmh"
},
"outputs": [],
"source": [
"### 1. Корреляция\n"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib as plt\n",
"import seaborn"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "M7H3eXu0jAmi"
},
"source": [
"Обычный коэффициент корреляции Пирсона"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 235
},
"id": "IJLYQkG0jAmj",
"outputId": "bb23dcea-6f7d-47d8-9c5f-90e39eb25cfa"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Temperature</th>\n",
" <th>Humidity</th>\n",
" <th>Light</th>\n",
" <th>CO2</th>\n",
" <th>HumidityRatio</th>\n",
" <th>Occupancy</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Temperature</th>\n",
" <td>1.000000</td>\n",
" <td>-0.141759</td>\n",
" <td>0.649942</td>\n",
" <td>0.559894</td>\n",
" <td>0.151762</td>\n",
" <td>0.538220</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Humidity</th>\n",
" <td>-0.141759</td>\n",
" <td>1.000000</td>\n",
" <td>0.037828</td>\n",
" <td>0.439023</td>\n",
" <td>0.955198</td>\n",
" <td>0.132964</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Light</th>\n",
" <td>0.649942</td>\n",
" <td>0.037828</td>\n",
" <td>1.000000</td>\n",
" <td>0.664022</td>\n",
" <td>0.230420</td>\n",
" <td>0.907352</td>\n",
" </tr>\n",
" <tr>\n",
" <th>CO2</th>\n",
" <td>0.559894</td>\n",
" <td>0.439023</td>\n",
" <td>0.664022</td>\n",
" <td>1.000000</td>\n",
" <td>0.626556</td>\n",
" <td>0.712235</td>\n",
" </tr>\n",
" <tr>\n",
" <th>HumidityRatio</th>\n",
" <td>0.151762</td>\n",
" <td>0.955198</td>\n",
" <td>0.230420</td>\n",
" <td>0.626556</td>\n",
" <td>1.000000</td>\n",
" <td>0.300282</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Occupancy</th>\n",
" <td>0.538220</td>\n",
" <td>0.132964</td>\n",
" <td>0.907352</td>\n",
" <td>0.712235</td>\n",
" <td>0.300282</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Temperature Humidity Light CO2 HumidityRatio \\\n",
"Temperature 1.000000 -0.141759 0.649942 0.559894 0.151762 \n",
"Humidity -0.141759 1.000000 0.037828 0.439023 0.955198 \n",
"Light 0.649942 0.037828 1.000000 0.664022 0.230420 \n",
"CO2 0.559894 0.439023 0.664022 1.000000 0.626556 \n",
"HumidityRatio 0.151762 0.955198 0.230420 0.626556 1.000000 \n",
"Occupancy 0.538220 0.132964 0.907352 0.712235 0.300282 \n",
"\n",
" Occupancy \n",
"Temperature 0.538220 \n",
"Humidity 0.132964 \n",
"Light 0.907352 \n",
"CO2 0.712235 \n",
"HumidityRatio 0.300282 \n",
"Occupancy 1.000000 "
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataTraining.corr() # Выведем матрицу корреляции для датафрейма dataTraining"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Axes: >"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"seaborn.heatmap(dataTraining.corr())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "qG6LB7yDjAmx"
},
"outputs": [],
"source": [
"### 2. Медиана"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 136
},
"id": "YMbWszYgjAmx",
"outputId": "d66c8b8e-f38a-43f3-cfbd-93178a035139"
},
"outputs": [],
"source": [
"dataTraining.median() # Выведем медиану для датафрейма dataTraining"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "yRYKb92DjAm5"
},
"outputs": [],
"source": [
"### 4. Уникальные значения и их количество"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 136
},
"id": "BlNFl4MxjAm9",
"outputId": "68433809-3603-42f7-cdec-aa4594cfe6f2"
},
"outputs": [],
"source": [
"dataTraining.nunique() # Выведем количество уникльных значений для всех столбцов датафрейма dataTraining"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"id": "4EKcGjQKjAnA",
"outputId": "08915e97-77d3-4b26-e040-aa301943ec15"
},
"outputs": [],
"source": [
"dataTraining.Occupancy.unique() # Выведем уникальные значения для столбца Occupancy датафрейма dataTraining"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "8F6G2AUnKFCA"
},
"source": [
"# Глоссарий\n",
"\n",
"\n",
"pd.DataFrame(данные, columns = [колонки, если есть], index = [индексы ,если есть]) - создать датафрейм\n",
"\n",
"pd.read_csv(полный адрес расположения файла) - открыть .csv файл\n",
"\n",
"------------\n",
"\n",
".head() - посмотреть верхушку датафрейма (первые n строк)\n",
"\n",
".tail() - посмотреть конец датафрейма (последние n строк)\n",
"\n",
".columns - список колонок датафрейма\n",
"\n",
".values - вывести массив всех значений датафрейма\n",
"\n",
".index - список индексов датафрейма\n",
"\n",
".tolist() - перевести в список\n",
"\n",
".count() - посчитать количество определенных величин во фрейме\n",
"\n",
".describe() - посмотреть основные статистические характеристики фрейма\n",
"\n",
".shape - форма фрейма (строки, колонки)\n",
"\n",
".size - размер фрейма строки*колонки\n",
"\n",
".info() - информация о данных каждой колонки\n",
"\n",
".dtypes - тип данных каждой колонки\n",
"\n",
".isnull() - где недостает значений\n",
"\n",
".isna()- есть ли значения None\n",
"\n",
".dropna() - выкинуть строки/колонки с None\n",
"\n",
".fillna() - заполнить заданным значеним ячейки, где есть None\n",
"\n",
".loc[] - вывести значения по названиям колонок\n",
"\n",
".iloc[] - вывести значения по индексам колонок\n",
"\n",
".drop() - выкинуть определенные значения\n",
"\n",
"--------------\n",
"\n",
"pd.to_datetime(колонка, которую переводим в формат временного ряда)\n",
"\n",
".groupby() - сгруппировать по конкретному признаку\n",
"\n",
".copy() - создать копию\n",
"\n",
".sort_values() - сортировка значений\n",
"\n",
"pd.concat([df1,df2]) - конкатенация фреймов\n",
"\n",
".merge(второй_датафрейм, on = 'общая колонка, по которой склеиваем', how = 'с какой стороны') - конкатенация фреймов через общий признак\n",
"\n",
"-------------\n",
"\n",
"\n",
".corr() - вычислить корреляцию\n",
"\n",
".median() - вычислить медиану\n",
"\n",
".cumsum() - вычислить кумулятивную сумму\n",
"\n",
".cumprod() - вычислить кумулятивное произведение\n",
"\n",
".cummax() - вычислить кумулятивный максимум\n",
"\n",
"-------------\n",
"\n",
".quantile([]) - вычислить квантили\n",
"\n",
".nunique() - уникальные значения для n-колонок/строк\n",
"\n",
".unique() - уникальные значения определенной колонки/строк\n",
"\n",
"------------\n",
"\n",
".apply(функция) - применить функцию для колонки/строки\n",
"\n",
".agg(наборункций) - применить ряд функций для колонки/строки\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Ku19ab06hkRZ"
},
"outputs": [],
"source": []
}
],
"metadata": {
"colab": {
"collapsed_sections": [
"_mybfuLlhkzU",
"0Chntu25WIF2",
"Af1_T2N8jAfF",
"X6KDR8HPjAf3",
"C8xqSqQujAmh",
"hR8AlUzejAmh",
"qG6LB7yDjAmx",
"xzJAnTajjAm3",
"yRYKb92DjAm5",
"Idy8pVuvjAnD",
"G_jD-MltjAnR",
"9TdHEC0ajAng",
"3ANmDztPdeNy"
],
"provenance": []
},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 1
}