3237 lines
138 KiB
Plaintext
3237 lines
138 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "5f7c9658-c285-4854-96c0-e899fc55421b",
|
||
"metadata": {},
|
||
"source": [
|
||
"# DM project: cheese"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 73,
|
||
"id": "7f4f2b89-8257-468c-9f5e-a77e11b8b8ff",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"from mlxtend.preprocessing import TransactionEncoder\n",
|
||
"from mlxtend.frequent_patterns import apriori\n",
|
||
"import geopy\n",
|
||
"import matplotlib.pyplot as plt"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "38096d49-fe67-4c60-a05e-8ad8c442b19c",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"id": "1a0afba8-692b-4377-a2ce-5114983e3bbb",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"data=pd.read_csv(\"cheeses.csv\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"id": "06f7d328-7b6a-4193-b155-3d47519a1e9a",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>cheese</th>\n",
|
||
" <th>url</th>\n",
|
||
" <th>milk</th>\n",
|
||
" <th>country</th>\n",
|
||
" <th>region</th>\n",
|
||
" <th>family</th>\n",
|
||
" <th>type</th>\n",
|
||
" <th>fat_content</th>\n",
|
||
" <th>calcium_content</th>\n",
|
||
" <th>texture</th>\n",
|
||
" <th>rind</th>\n",
|
||
" <th>color</th>\n",
|
||
" <th>flavor</th>\n",
|
||
" <th>aroma</th>\n",
|
||
" <th>vegetarian</th>\n",
|
||
" <th>vegan</th>\n",
|
||
" <th>synonyms</th>\n",
|
||
" <th>alt_spellings</th>\n",
|
||
" <th>producers</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>Aarewasser</td>\n",
|
||
" <td>https://www.cheese.com/aarewasser/</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>Switzerland</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-soft</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>buttery</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>yellow</td>\n",
|
||
" <td>sweet</td>\n",
|
||
" <td>buttery</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Jumi</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>Abbaye de Belloc</td>\n",
|
||
" <td>https://www.cheese.com/abbaye-de-belloc/</td>\n",
|
||
" <td>sheep</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>Pays Basque</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard, artisan</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>creamy, dense, firm</td>\n",
|
||
" <td>natural</td>\n",
|
||
" <td>yellow</td>\n",
|
||
" <td>burnt caramel</td>\n",
|
||
" <td>lanoline</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Abbaye Notre-Dame de Belloc</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>Abbaye de Belval</td>\n",
|
||
" <td>https://www.cheese.com/abbaye-de-belval/</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard</td>\n",
|
||
" <td>40-46%</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>elastic</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>ivory</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>aromatic</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>Abbaye de Citeaux</td>\n",
|
||
" <td>https://www.cheese.com/abbaye-de-citeaux/</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>Burgundy</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-soft, artisan, brined</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>creamy, dense, smooth</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>white</td>\n",
|
||
" <td>acidic, milky, smooth</td>\n",
|
||
" <td>barnyardy, earthy</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>Abbaye de Tamié</td>\n",
|
||
" <td>https://www.cheese.com/tamie/</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>Savoie</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>soft, artisan</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>creamy, open, smooth</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>white</td>\n",
|
||
" <td>fruity, nutty</td>\n",
|
||
" <td>perfumed, pungent</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Tamié, Trappiste de Tamie, Abbey of Tamie</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1182</th>\n",
|
||
" <td>Sveciaost</td>\n",
|
||
" <td>https://www.cheese.com/sveciaost/</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>Sweden</td>\n",
|
||
" <td>Low-laying regions</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard, brined</td>\n",
|
||
" <td>45%</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>creamy, supple</td>\n",
|
||
" <td>rindless</td>\n",
|
||
" <td>pale yellow</td>\n",
|
||
" <td>acidic</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1183</th>\n",
|
||
" <td>Swag</td>\n",
|
||
" <td>https://www.cheese.com/swag/</td>\n",
|
||
" <td>goat</td>\n",
|
||
" <td>Australia</td>\n",
|
||
" <td>South Australia</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fresh firm, artisan</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>creamy, crumbly</td>\n",
|
||
" <td>ash coated</td>\n",
|
||
" <td>white</td>\n",
|
||
" <td>acidic, creamy</td>\n",
|
||
" <td>fresh</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Woodside Cheese Wrights</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1184</th>\n",
|
||
" <td>Swaledale</td>\n",
|
||
" <td>https://www.cheese.com/swaledale/</td>\n",
|
||
" <td>sheep</td>\n",
|
||
" <td>England</td>\n",
|
||
" <td>Swaledale, North Yorkshire</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>hard</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi firm</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>yellow</td>\n",
|
||
" <td>smooth, sweet</td>\n",
|
||
" <td>floral</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Swaledale Sheep Cheese</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1185</th>\n",
|
||
" <td>Sweet Style Swiss</td>\n",
|
||
" <td>https://www.cheese.com/sweet-style-swiss/</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Switzerland</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard, artisan</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>firm, supple</td>\n",
|
||
" <td>waxed</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>nutty</td>\n",
|
||
" <td>nutty, sweet</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1186</th>\n",
|
||
" <td>Swiss cheese</td>\n",
|
||
" <td>https://www.cheese.com/swiss/</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>United States</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Swiss Cheese</td>\n",
|
||
" <td>hard, artisan, processed</td>\n",
|
||
" <td>7.8 g/100g</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>firm</td>\n",
|
||
" <td>rindless</td>\n",
|
||
" <td>pale yellow</td>\n",
|
||
" <td>nutty, sweet</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>American Swiss Cheese</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Various</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>1187 rows × 19 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" cheese url milk \\\n",
|
||
"0 Aarewasser https://www.cheese.com/aarewasser/ cow \n",
|
||
"1 Abbaye de Belloc https://www.cheese.com/abbaye-de-belloc/ sheep \n",
|
||
"2 Abbaye de Belval https://www.cheese.com/abbaye-de-belval/ cow \n",
|
||
"3 Abbaye de Citeaux https://www.cheese.com/abbaye-de-citeaux/ cow \n",
|
||
"4 Abbaye de Tamié https://www.cheese.com/tamie/ cow \n",
|
||
"... ... ... ... \n",
|
||
"1182 Sveciaost https://www.cheese.com/sveciaost/ cow \n",
|
||
"1183 Swag https://www.cheese.com/swag/ goat \n",
|
||
"1184 Swaledale https://www.cheese.com/swaledale/ sheep \n",
|
||
"1185 Sweet Style Swiss https://www.cheese.com/sweet-style-swiss/ NaN \n",
|
||
"1186 Swiss cheese https://www.cheese.com/swiss/ cow \n",
|
||
"\n",
|
||
" country region family \\\n",
|
||
"0 Switzerland NaN NaN \n",
|
||
"1 France Pays Basque NaN \n",
|
||
"2 France NaN NaN \n",
|
||
"3 France Burgundy NaN \n",
|
||
"4 France Savoie NaN \n",
|
||
"... ... ... ... \n",
|
||
"1182 Sweden Low-laying regions NaN \n",
|
||
"1183 Australia South Australia NaN \n",
|
||
"1184 England Swaledale, North Yorkshire NaN \n",
|
||
"1185 Switzerland NaN NaN \n",
|
||
"1186 United States NaN Swiss Cheese \n",
|
||
"\n",
|
||
" type fat_content calcium_content \\\n",
|
||
"0 semi-soft NaN NaN \n",
|
||
"1 semi-hard, artisan NaN NaN \n",
|
||
"2 semi-hard 40-46% NaN \n",
|
||
"3 semi-soft, artisan, brined NaN NaN \n",
|
||
"4 soft, artisan NaN NaN \n",
|
||
"... ... ... ... \n",
|
||
"1182 semi-hard, brined 45% NaN \n",
|
||
"1183 fresh firm, artisan NaN NaN \n",
|
||
"1184 hard NaN NaN \n",
|
||
"1185 semi-hard, artisan NaN NaN \n",
|
||
"1186 hard, artisan, processed 7.8 g/100g NaN \n",
|
||
"\n",
|
||
" texture rind color flavor \\\n",
|
||
"0 buttery washed yellow sweet \n",
|
||
"1 creamy, dense, firm natural yellow burnt caramel \n",
|
||
"2 elastic washed ivory NaN \n",
|
||
"3 creamy, dense, smooth washed white acidic, milky, smooth \n",
|
||
"4 creamy, open, smooth washed white fruity, nutty \n",
|
||
"... ... ... ... ... \n",
|
||
"1182 creamy, supple rindless pale yellow acidic \n",
|
||
"1183 creamy, crumbly ash coated white acidic, creamy \n",
|
||
"1184 semi firm NaN yellow smooth, sweet \n",
|
||
"1185 firm, supple waxed NaN nutty \n",
|
||
"1186 firm rindless pale yellow nutty, sweet \n",
|
||
"\n",
|
||
" aroma vegetarian vegan synonyms \\\n",
|
||
"0 buttery False False NaN \n",
|
||
"1 lanoline True False Abbaye Notre-Dame de Belloc \n",
|
||
"2 aromatic False False NaN \n",
|
||
"3 barnyardy, earthy False False NaN \n",
|
||
"4 perfumed, pungent False False NaN \n",
|
||
"... ... ... ... ... \n",
|
||
"1182 NaN False False NaN \n",
|
||
"1183 fresh True False NaN \n",
|
||
"1184 floral True False Swaledale Sheep Cheese \n",
|
||
"1185 nutty, sweet False False NaN \n",
|
||
"1186 NaN True False American Swiss Cheese \n",
|
||
"\n",
|
||
" alt_spellings producers \n",
|
||
"0 NaN Jumi \n",
|
||
"1 NaN NaN \n",
|
||
"2 NaN NaN \n",
|
||
"3 NaN NaN \n",
|
||
"4 Tamié, Trappiste de Tamie, Abbey of Tamie NaN \n",
|
||
"... ... ... \n",
|
||
"1182 NaN NaN \n",
|
||
"1183 NaN Woodside Cheese Wrights \n",
|
||
"1184 NaN NaN \n",
|
||
"1185 NaN NaN \n",
|
||
"1186 NaN Various \n",
|
||
"\n",
|
||
"[1187 rows x 19 columns]"
|
||
]
|
||
},
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "7d354e9e-3abc-4d6a-9aea-a00a92ce3c3c",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"id": "2018aac2-6f3d-489a-b5d0-90b7c7793076",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"{'blue',\n",
|
||
" 'blue-grey',\n",
|
||
" 'brown',\n",
|
||
" 'brownish yellow',\n",
|
||
" 'cream',\n",
|
||
" 'golden orange',\n",
|
||
" 'golden yellow',\n",
|
||
" 'green',\n",
|
||
" 'ivory',\n",
|
||
" nan,\n",
|
||
" 'orange',\n",
|
||
" 'pale white',\n",
|
||
" 'pale yellow',\n",
|
||
" 'pink and white',\n",
|
||
" 'red',\n",
|
||
" 'straw',\n",
|
||
" 'white',\n",
|
||
" 'yellow'}"
|
||
]
|
||
},
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"set(data[\"color\"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"id": "71b1853e-d2a4-4afe-9ad0-4f15689f2e23",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>cheese</th>\n",
|
||
" <th>url</th>\n",
|
||
" <th>milk</th>\n",
|
||
" <th>country</th>\n",
|
||
" <th>region</th>\n",
|
||
" <th>family</th>\n",
|
||
" <th>type</th>\n",
|
||
" <th>fat_content</th>\n",
|
||
" <th>calcium_content</th>\n",
|
||
" <th>texture</th>\n",
|
||
" <th>rind</th>\n",
|
||
" <th>color</th>\n",
|
||
" <th>flavor</th>\n",
|
||
" <th>aroma</th>\n",
|
||
" <th>vegetarian</th>\n",
|
||
" <th>vegan</th>\n",
|
||
" <th>synonyms</th>\n",
|
||
" <th>alt_spellings</th>\n",
|
||
" <th>producers</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>10</th>\n",
|
||
" <td>Acapella</td>\n",
|
||
" <td>https://www.cheese.com/acapella/</td>\n",
|
||
" <td>goat</td>\n",
|
||
" <td>United States</td>\n",
|
||
" <td>California</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>soft, soft-ripened</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>buttery</td>\n",
|
||
" <td>fresh, herbal</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>13</th>\n",
|
||
" <td>Acorn</td>\n",
|
||
" <td>https://www.cheese.com/acorn/</td>\n",
|
||
" <td>sheep</td>\n",
|
||
" <td>United Kingdom</td>\n",
|
||
" <td>Bethania</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>hard, artisan</td>\n",
|
||
" <td>52%</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>crumbly, firm</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>burnt caramel, citrusy, herbaceous</td>\n",
|
||
" <td>fruity</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>19</th>\n",
|
||
" <td>Afuega'l Pitu</td>\n",
|
||
" <td>https://www.cheese.com/afuegal-pitu/</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>Spain</td>\n",
|
||
" <td>Asturias</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>soft, artisan</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>smooth</td>\n",
|
||
" <td>cloth wrapped</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>spicy, strong</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>48</th>\n",
|
||
" <td>Alpe di Frabosa</td>\n",
|
||
" <td>https://www.cheese.com/alpe-di-frabosa/</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>Italy</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-soft</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>bitter</td>\n",
|
||
" <td>milky, mushroom</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50</th>\n",
|
||
" <td>Alpicrème</td>\n",
|
||
" <td>https://www.cheese.com/alpicreme/</td>\n",
|
||
" <td>goat</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>soft</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1172</th>\n",
|
||
" <td>Strathdon Blue</td>\n",
|
||
" <td>https://www.cheese.com/strathdon-blue/</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>Scotland</td>\n",
|
||
" <td>Tain</td>\n",
|
||
" <td>Blue</td>\n",
|
||
" <td>semi-soft</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>creamy</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>creamy, spicy</td>\n",
|
||
" <td>aromatic, rich</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Highland Fine Cheeses Limited</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1175</th>\n",
|
||
" <td>String Cheese</td>\n",
|
||
" <td>https://www.cheese.com/string/</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>chewy, firm, stringy</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1177</th>\n",
|
||
" <td>Sulguni</td>\n",
|
||
" <td>https://www.cheese.com/sulguni/</td>\n",
|
||
" <td>buffalo, cow</td>\n",
|
||
" <td>Georgia</td>\n",
|
||
" <td>Svaneti, Samegrelo</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-firm</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>dense, elastic</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>salty, smokey , sour</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Georgian Pickle Cheese</td>\n",
|
||
" <td>Megruli Sulguni, Shebolili Megruli Sulguni</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1181</th>\n",
|
||
" <td>Sussex Slipcote</td>\n",
|
||
" <td>https://www.cheese.com/sussex-slipcote/</td>\n",
|
||
" <td>sheep</td>\n",
|
||
" <td>England</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>soft</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>sharp</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>High Weald Dairy</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1185</th>\n",
|
||
" <td>Sweet Style Swiss</td>\n",
|
||
" <td>https://www.cheese.com/sweet-style-swiss/</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Switzerland</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard, artisan</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>firm, supple</td>\n",
|
||
" <td>waxed</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>nutty</td>\n",
|
||
" <td>nutty, sweet</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>142 rows × 19 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" cheese url \\\n",
|
||
"10 Acapella https://www.cheese.com/acapella/ \n",
|
||
"13 Acorn https://www.cheese.com/acorn/ \n",
|
||
"19 Afuega'l Pitu https://www.cheese.com/afuegal-pitu/ \n",
|
||
"48 Alpe di Frabosa https://www.cheese.com/alpe-di-frabosa/ \n",
|
||
"50 Alpicrème https://www.cheese.com/alpicreme/ \n",
|
||
"... ... ... \n",
|
||
"1172 Strathdon Blue https://www.cheese.com/strathdon-blue/ \n",
|
||
"1175 String Cheese https://www.cheese.com/string/ \n",
|
||
"1177 Sulguni https://www.cheese.com/sulguni/ \n",
|
||
"1181 Sussex Slipcote https://www.cheese.com/sussex-slipcote/ \n",
|
||
"1185 Sweet Style Swiss https://www.cheese.com/sweet-style-swiss/ \n",
|
||
"\n",
|
||
" milk country region family \\\n",
|
||
"10 goat United States California NaN \n",
|
||
"13 sheep United Kingdom Bethania NaN \n",
|
||
"19 cow Spain Asturias NaN \n",
|
||
"48 cow Italy NaN NaN \n",
|
||
"50 goat France NaN NaN \n",
|
||
"... ... ... ... ... \n",
|
||
"1172 cow Scotland Tain Blue \n",
|
||
"1175 NaN NaN NaN NaN \n",
|
||
"1177 buffalo, cow Georgia Svaneti, Samegrelo NaN \n",
|
||
"1181 sheep England NaN NaN \n",
|
||
"1185 NaN Switzerland NaN NaN \n",
|
||
"\n",
|
||
" type fat_content calcium_content texture \\\n",
|
||
"10 soft, soft-ripened NaN NaN NaN \n",
|
||
"13 hard, artisan 52% NaN crumbly, firm \n",
|
||
"19 soft, artisan NaN NaN smooth \n",
|
||
"48 semi-soft NaN NaN NaN \n",
|
||
"50 soft NaN NaN NaN \n",
|
||
"... ... ... ... ... \n",
|
||
"1172 semi-soft NaN NaN creamy \n",
|
||
"1175 semi-hard NaN NaN chewy, firm, stringy \n",
|
||
"1177 semi-firm NaN NaN dense, elastic \n",
|
||
"1181 soft NaN NaN NaN \n",
|
||
"1185 semi-hard, artisan NaN NaN firm, supple \n",
|
||
"\n",
|
||
" rind color flavor \\\n",
|
||
"10 NaN NaN buttery \n",
|
||
"13 NaN NaN burnt caramel, citrusy, herbaceous \n",
|
||
"19 cloth wrapped NaN spicy, strong \n",
|
||
"48 NaN NaN bitter \n",
|
||
"50 NaN NaN NaN \n",
|
||
"... ... ... ... \n",
|
||
"1172 NaN NaN creamy, spicy \n",
|
||
"1175 NaN NaN NaN \n",
|
||
"1177 NaN NaN salty, smokey , sour \n",
|
||
"1181 NaN NaN sharp \n",
|
||
"1185 waxed NaN nutty \n",
|
||
"\n",
|
||
" aroma vegetarian vegan synonyms \\\n",
|
||
"10 fresh, herbal False False NaN \n",
|
||
"13 fruity True False NaN \n",
|
||
"19 NaN False False NaN \n",
|
||
"48 milky, mushroom False False NaN \n",
|
||
"50 NaN False False NaN \n",
|
||
"... ... ... ... ... \n",
|
||
"1172 aromatic, rich True False NaN \n",
|
||
"1175 NaN NaN NaN NaN \n",
|
||
"1177 NaN NaN NaN Georgian Pickle Cheese \n",
|
||
"1181 NaN True False NaN \n",
|
||
"1185 nutty, sweet False False NaN \n",
|
||
"\n",
|
||
" alt_spellings \\\n",
|
||
"10 NaN \n",
|
||
"13 NaN \n",
|
||
"19 NaN \n",
|
||
"48 NaN \n",
|
||
"50 NaN \n",
|
||
"... ... \n",
|
||
"1172 NaN \n",
|
||
"1175 NaN \n",
|
||
"1177 Megruli Sulguni, Shebolili Megruli Sulguni \n",
|
||
"1181 NaN \n",
|
||
"1185 NaN \n",
|
||
"\n",
|
||
" producers \n",
|
||
"10 NaN \n",
|
||
"13 NaN \n",
|
||
"19 NaN \n",
|
||
"48 NaN \n",
|
||
"50 NaN \n",
|
||
"... ... \n",
|
||
"1172 Highland Fine Cheeses Limited \n",
|
||
"1175 NaN \n",
|
||
"1177 NaN \n",
|
||
"1181 High Weald Dairy \n",
|
||
"1185 NaN \n",
|
||
"\n",
|
||
"[142 rows x 19 columns]"
|
||
]
|
||
},
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data[pd.isnull(data[\"color\"])]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "bf3b548c-5ac4-4126-9ae9-5578ad158015",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Cleaning"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"id": "fb5ddb0a-8551-4e7c-971e-00a819ebb4b3",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>cheese</th>\n",
|
||
" <th>url</th>\n",
|
||
" <th>milk</th>\n",
|
||
" <th>country</th>\n",
|
||
" <th>region</th>\n",
|
||
" <th>family</th>\n",
|
||
" <th>type</th>\n",
|
||
" <th>fat_content</th>\n",
|
||
" <th>calcium_content</th>\n",
|
||
" <th>texture</th>\n",
|
||
" <th>rind</th>\n",
|
||
" <th>color</th>\n",
|
||
" <th>flavor</th>\n",
|
||
" <th>aroma</th>\n",
|
||
" <th>vegetarian</th>\n",
|
||
" <th>vegan</th>\n",
|
||
" <th>synonyms</th>\n",
|
||
" <th>alt_spellings</th>\n",
|
||
" <th>producers</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>10</th>\n",
|
||
" <td>Acapella</td>\n",
|
||
" <td>https://www.cheese.com/acapella/</td>\n",
|
||
" <td>goat</td>\n",
|
||
" <td>United States</td>\n",
|
||
" <td>California</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>soft, soft-ripened</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>buttery</td>\n",
|
||
" <td>fresh, herbal</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>13</th>\n",
|
||
" <td>Acorn</td>\n",
|
||
" <td>https://www.cheese.com/acorn/</td>\n",
|
||
" <td>sheep</td>\n",
|
||
" <td>United Kingdom</td>\n",
|
||
" <td>Bethania</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>hard, artisan</td>\n",
|
||
" <td>52%</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>crumbly, firm</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>burnt caramel, citrusy, herbaceous</td>\n",
|
||
" <td>fruity</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>19</th>\n",
|
||
" <td>Afuega'l Pitu</td>\n",
|
||
" <td>https://www.cheese.com/afuegal-pitu/</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>Spain</td>\n",
|
||
" <td>Asturias</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>soft, artisan</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>smooth</td>\n",
|
||
" <td>cloth wrapped</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>spicy, strong</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>48</th>\n",
|
||
" <td>Alpe di Frabosa</td>\n",
|
||
" <td>https://www.cheese.com/alpe-di-frabosa/</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>Italy</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-soft</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>bitter</td>\n",
|
||
" <td>milky, mushroom</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50</th>\n",
|
||
" <td>Alpicrème</td>\n",
|
||
" <td>https://www.cheese.com/alpicreme/</td>\n",
|
||
" <td>goat</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>soft</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1172</th>\n",
|
||
" <td>Strathdon Blue</td>\n",
|
||
" <td>https://www.cheese.com/strathdon-blue/</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>Scotland</td>\n",
|
||
" <td>Tain</td>\n",
|
||
" <td>Blue</td>\n",
|
||
" <td>semi-soft</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>creamy</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>creamy, spicy</td>\n",
|
||
" <td>aromatic, rich</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Highland Fine Cheeses Limited</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1175</th>\n",
|
||
" <td>String Cheese</td>\n",
|
||
" <td>https://www.cheese.com/string/</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>chewy, firm, stringy</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1177</th>\n",
|
||
" <td>Sulguni</td>\n",
|
||
" <td>https://www.cheese.com/sulguni/</td>\n",
|
||
" <td>buffalo, cow</td>\n",
|
||
" <td>Georgia</td>\n",
|
||
" <td>Svaneti, Samegrelo</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-firm</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>dense, elastic</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>salty, smokey , sour</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Georgian Pickle Cheese</td>\n",
|
||
" <td>Megruli Sulguni, Shebolili Megruli Sulguni</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1181</th>\n",
|
||
" <td>Sussex Slipcote</td>\n",
|
||
" <td>https://www.cheese.com/sussex-slipcote/</td>\n",
|
||
" <td>sheep</td>\n",
|
||
" <td>England</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>soft</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>sharp</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>High Weald Dairy</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1185</th>\n",
|
||
" <td>Sweet Style Swiss</td>\n",
|
||
" <td>https://www.cheese.com/sweet-style-swiss/</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Switzerland</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard, artisan</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>firm, supple</td>\n",
|
||
" <td>waxed</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>nutty</td>\n",
|
||
" <td>nutty, sweet</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>142 rows × 19 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" cheese url \\\n",
|
||
"10 Acapella https://www.cheese.com/acapella/ \n",
|
||
"13 Acorn https://www.cheese.com/acorn/ \n",
|
||
"19 Afuega'l Pitu https://www.cheese.com/afuegal-pitu/ \n",
|
||
"48 Alpe di Frabosa https://www.cheese.com/alpe-di-frabosa/ \n",
|
||
"50 Alpicrème https://www.cheese.com/alpicreme/ \n",
|
||
"... ... ... \n",
|
||
"1172 Strathdon Blue https://www.cheese.com/strathdon-blue/ \n",
|
||
"1175 String Cheese https://www.cheese.com/string/ \n",
|
||
"1177 Sulguni https://www.cheese.com/sulguni/ \n",
|
||
"1181 Sussex Slipcote https://www.cheese.com/sussex-slipcote/ \n",
|
||
"1185 Sweet Style Swiss https://www.cheese.com/sweet-style-swiss/ \n",
|
||
"\n",
|
||
" milk country region family \\\n",
|
||
"10 goat United States California NaN \n",
|
||
"13 sheep United Kingdom Bethania NaN \n",
|
||
"19 cow Spain Asturias NaN \n",
|
||
"48 cow Italy NaN NaN \n",
|
||
"50 goat France NaN NaN \n",
|
||
"... ... ... ... ... \n",
|
||
"1172 cow Scotland Tain Blue \n",
|
||
"1175 NaN NaN NaN NaN \n",
|
||
"1177 buffalo, cow Georgia Svaneti, Samegrelo NaN \n",
|
||
"1181 sheep England NaN NaN \n",
|
||
"1185 NaN Switzerland NaN NaN \n",
|
||
"\n",
|
||
" type fat_content calcium_content texture \\\n",
|
||
"10 soft, soft-ripened NaN NaN NaN \n",
|
||
"13 hard, artisan 52% NaN crumbly, firm \n",
|
||
"19 soft, artisan NaN NaN smooth \n",
|
||
"48 semi-soft NaN NaN NaN \n",
|
||
"50 soft NaN NaN NaN \n",
|
||
"... ... ... ... ... \n",
|
||
"1172 semi-soft NaN NaN creamy \n",
|
||
"1175 semi-hard NaN NaN chewy, firm, stringy \n",
|
||
"1177 semi-firm NaN NaN dense, elastic \n",
|
||
"1181 soft NaN NaN NaN \n",
|
||
"1185 semi-hard, artisan NaN NaN firm, supple \n",
|
||
"\n",
|
||
" rind color flavor \\\n",
|
||
"10 NaN NaN buttery \n",
|
||
"13 NaN NaN burnt caramel, citrusy, herbaceous \n",
|
||
"19 cloth wrapped NaN spicy, strong \n",
|
||
"48 NaN NaN bitter \n",
|
||
"50 NaN NaN NaN \n",
|
||
"... ... ... ... \n",
|
||
"1172 NaN NaN creamy, spicy \n",
|
||
"1175 NaN NaN NaN \n",
|
||
"1177 NaN NaN salty, smokey , sour \n",
|
||
"1181 NaN NaN sharp \n",
|
||
"1185 waxed NaN nutty \n",
|
||
"\n",
|
||
" aroma vegetarian vegan synonyms \\\n",
|
||
"10 fresh, herbal False False NaN \n",
|
||
"13 fruity True False NaN \n",
|
||
"19 NaN False False NaN \n",
|
||
"48 milky, mushroom False False NaN \n",
|
||
"50 NaN False False NaN \n",
|
||
"... ... ... ... ... \n",
|
||
"1172 aromatic, rich True False NaN \n",
|
||
"1175 NaN NaN NaN NaN \n",
|
||
"1177 NaN NaN NaN Georgian Pickle Cheese \n",
|
||
"1181 NaN True False NaN \n",
|
||
"1185 nutty, sweet False False NaN \n",
|
||
"\n",
|
||
" alt_spellings \\\n",
|
||
"10 NaN \n",
|
||
"13 NaN \n",
|
||
"19 NaN \n",
|
||
"48 NaN \n",
|
||
"50 NaN \n",
|
||
"... ... \n",
|
||
"1172 NaN \n",
|
||
"1175 NaN \n",
|
||
"1177 Megruli Sulguni, Shebolili Megruli Sulguni \n",
|
||
"1181 NaN \n",
|
||
"1185 NaN \n",
|
||
"\n",
|
||
" producers \n",
|
||
"10 NaN \n",
|
||
"13 NaN \n",
|
||
"19 NaN \n",
|
||
"48 NaN \n",
|
||
"50 NaN \n",
|
||
"... ... \n",
|
||
"1172 Highland Fine Cheeses Limited \n",
|
||
"1175 NaN \n",
|
||
"1177 NaN \n",
|
||
"1181 High Weald Dairy \n",
|
||
"1185 NaN \n",
|
||
"\n",
|
||
"[142 rows x 19 columns]"
|
||
]
|
||
},
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data[data[\"color\"].isnull()]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"id": "a0a77563-518e-4808-b744-9fc0c76763fe",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"1162\n",
|
||
"939\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(len(data[pd.isnull(data[\"calcium_content\"])]))\n",
|
||
"print(len(data[pd.isnull(data[\"fat_content\"])]))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"id": "c8489ffa-1067-4eb7-b65a-2fa18fdb4b04",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"del data[\"alt_spellings\"]\n",
|
||
"del data[\"producers\"]\n",
|
||
"del data[\"calcium_content\"]\n",
|
||
"del data[\"url\"]\n",
|
||
"del data[\"fat_content\"]\n",
|
||
"del data[\"synonyms\"]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"id": "5379265a-cd49-41fa-845c-bfae33bb8f5a",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>cheese</th>\n",
|
||
" <th>milk</th>\n",
|
||
" <th>country</th>\n",
|
||
" <th>region</th>\n",
|
||
" <th>family</th>\n",
|
||
" <th>type</th>\n",
|
||
" <th>texture</th>\n",
|
||
" <th>rind</th>\n",
|
||
" <th>color</th>\n",
|
||
" <th>flavor</th>\n",
|
||
" <th>aroma</th>\n",
|
||
" <th>vegetarian</th>\n",
|
||
" <th>vegan</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>Aarewasser</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>Switzerland</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-soft</td>\n",
|
||
" <td>buttery</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>yellow</td>\n",
|
||
" <td>sweet</td>\n",
|
||
" <td>buttery</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>Abbaye de Belloc</td>\n",
|
||
" <td>sheep</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>Pays Basque</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard, artisan</td>\n",
|
||
" <td>creamy, dense, firm</td>\n",
|
||
" <td>natural</td>\n",
|
||
" <td>yellow</td>\n",
|
||
" <td>burnt caramel</td>\n",
|
||
" <td>lanoline</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>Abbaye de Belval</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard</td>\n",
|
||
" <td>elastic</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>ivory</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>aromatic</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>Abbaye de Citeaux</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>Burgundy</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-soft, artisan, brined</td>\n",
|
||
" <td>creamy, dense, smooth</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>white</td>\n",
|
||
" <td>acidic, milky, smooth</td>\n",
|
||
" <td>barnyardy, earthy</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>Abbaye de Tamié</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>Savoie</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>soft, artisan</td>\n",
|
||
" <td>creamy, open, smooth</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>white</td>\n",
|
||
" <td>fruity, nutty</td>\n",
|
||
" <td>perfumed, pungent</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1182</th>\n",
|
||
" <td>Sveciaost</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>Sweden</td>\n",
|
||
" <td>Low-laying regions</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard, brined</td>\n",
|
||
" <td>creamy, supple</td>\n",
|
||
" <td>rindless</td>\n",
|
||
" <td>pale yellow</td>\n",
|
||
" <td>acidic</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1183</th>\n",
|
||
" <td>Swag</td>\n",
|
||
" <td>goat</td>\n",
|
||
" <td>Australia</td>\n",
|
||
" <td>South Australia</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fresh firm, artisan</td>\n",
|
||
" <td>creamy, crumbly</td>\n",
|
||
" <td>ash coated</td>\n",
|
||
" <td>white</td>\n",
|
||
" <td>acidic, creamy</td>\n",
|
||
" <td>fresh</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1184</th>\n",
|
||
" <td>Swaledale</td>\n",
|
||
" <td>sheep</td>\n",
|
||
" <td>England</td>\n",
|
||
" <td>Swaledale, North Yorkshire</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>hard</td>\n",
|
||
" <td>semi firm</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>yellow</td>\n",
|
||
" <td>smooth, sweet</td>\n",
|
||
" <td>floral</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1185</th>\n",
|
||
" <td>Sweet Style Swiss</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Switzerland</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard, artisan</td>\n",
|
||
" <td>firm, supple</td>\n",
|
||
" <td>waxed</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>nutty</td>\n",
|
||
" <td>nutty, sweet</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1186</th>\n",
|
||
" <td>Swiss cheese</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>United States</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Swiss Cheese</td>\n",
|
||
" <td>hard, artisan, processed</td>\n",
|
||
" <td>firm</td>\n",
|
||
" <td>rindless</td>\n",
|
||
" <td>pale yellow</td>\n",
|
||
" <td>nutty, sweet</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>1187 rows × 13 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" cheese milk country region \\\n",
|
||
"0 Aarewasser cow Switzerland NaN \n",
|
||
"1 Abbaye de Belloc sheep France Pays Basque \n",
|
||
"2 Abbaye de Belval cow France NaN \n",
|
||
"3 Abbaye de Citeaux cow France Burgundy \n",
|
||
"4 Abbaye de Tamié cow France Savoie \n",
|
||
"... ... ... ... ... \n",
|
||
"1182 Sveciaost cow Sweden Low-laying regions \n",
|
||
"1183 Swag goat Australia South Australia \n",
|
||
"1184 Swaledale sheep England Swaledale, North Yorkshire \n",
|
||
"1185 Sweet Style Swiss NaN Switzerland NaN \n",
|
||
"1186 Swiss cheese cow United States NaN \n",
|
||
"\n",
|
||
" family type texture \\\n",
|
||
"0 NaN semi-soft buttery \n",
|
||
"1 NaN semi-hard, artisan creamy, dense, firm \n",
|
||
"2 NaN semi-hard elastic \n",
|
||
"3 NaN semi-soft, artisan, brined creamy, dense, smooth \n",
|
||
"4 NaN soft, artisan creamy, open, smooth \n",
|
||
"... ... ... ... \n",
|
||
"1182 NaN semi-hard, brined creamy, supple \n",
|
||
"1183 NaN fresh firm, artisan creamy, crumbly \n",
|
||
"1184 NaN hard semi firm \n",
|
||
"1185 NaN semi-hard, artisan firm, supple \n",
|
||
"1186 Swiss Cheese hard, artisan, processed firm \n",
|
||
"\n",
|
||
" rind color flavor aroma \\\n",
|
||
"0 washed yellow sweet buttery \n",
|
||
"1 natural yellow burnt caramel lanoline \n",
|
||
"2 washed ivory NaN aromatic \n",
|
||
"3 washed white acidic, milky, smooth barnyardy, earthy \n",
|
||
"4 washed white fruity, nutty perfumed, pungent \n",
|
||
"... ... ... ... ... \n",
|
||
"1182 rindless pale yellow acidic NaN \n",
|
||
"1183 ash coated white acidic, creamy fresh \n",
|
||
"1184 NaN yellow smooth, sweet floral \n",
|
||
"1185 waxed NaN nutty nutty, sweet \n",
|
||
"1186 rindless pale yellow nutty, sweet NaN \n",
|
||
"\n",
|
||
" vegetarian vegan \n",
|
||
"0 False False \n",
|
||
"1 True False \n",
|
||
"2 False False \n",
|
||
"3 False False \n",
|
||
"4 False False \n",
|
||
"... ... ... \n",
|
||
"1182 False False \n",
|
||
"1183 True False \n",
|
||
"1184 True False \n",
|
||
"1185 False False \n",
|
||
"1186 True False \n",
|
||
"\n",
|
||
"[1187 rows x 13 columns]"
|
||
]
|
||
},
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"id": "633ed80e-e416-41f6-ae58-b86ce4c132af",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>cheese</th>\n",
|
||
" <th>milk</th>\n",
|
||
" <th>country</th>\n",
|
||
" <th>region</th>\n",
|
||
" <th>family</th>\n",
|
||
" <th>type</th>\n",
|
||
" <th>texture</th>\n",
|
||
" <th>rind</th>\n",
|
||
" <th>color</th>\n",
|
||
" <th>flavor</th>\n",
|
||
" <th>aroma</th>\n",
|
||
" <th>vegetarian</th>\n",
|
||
" <th>vegan</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>Aarewasser</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>Switzerland</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-soft</td>\n",
|
||
" <td>buttery</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>yellow</td>\n",
|
||
" <td>sweet</td>\n",
|
||
" <td>buttery</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>Abbaye de Belloc</td>\n",
|
||
" <td>sheep</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>Pays Basque</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard, artisan</td>\n",
|
||
" <td>creamy, dense, firm</td>\n",
|
||
" <td>natural</td>\n",
|
||
" <td>yellow</td>\n",
|
||
" <td>burnt caramel</td>\n",
|
||
" <td>lanoline</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>Abbaye de Belval</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard</td>\n",
|
||
" <td>elastic</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>ivory</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>aromatic</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>Abbaye de Citeaux</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>Burgundy</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-soft, artisan, brined</td>\n",
|
||
" <td>creamy, dense, smooth</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>white</td>\n",
|
||
" <td>acidic, milky, smooth</td>\n",
|
||
" <td>barnyardy, earthy</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>Abbaye de Tamié</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>Savoie</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>soft, artisan</td>\n",
|
||
" <td>creamy, open, smooth</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>white</td>\n",
|
||
" <td>fruity, nutty</td>\n",
|
||
" <td>perfumed, pungent</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1182</th>\n",
|
||
" <td>Sveciaost</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>Sweden</td>\n",
|
||
" <td>Low-laying regions</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard, brined</td>\n",
|
||
" <td>creamy, supple</td>\n",
|
||
" <td>rindless</td>\n",
|
||
" <td>pale yellow</td>\n",
|
||
" <td>acidic</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1183</th>\n",
|
||
" <td>Swag</td>\n",
|
||
" <td>goat</td>\n",
|
||
" <td>Australia</td>\n",
|
||
" <td>South Australia</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fresh firm, artisan</td>\n",
|
||
" <td>creamy, crumbly</td>\n",
|
||
" <td>ash coated</td>\n",
|
||
" <td>white</td>\n",
|
||
" <td>acidic, creamy</td>\n",
|
||
" <td>fresh</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1184</th>\n",
|
||
" <td>Swaledale</td>\n",
|
||
" <td>sheep</td>\n",
|
||
" <td>England</td>\n",
|
||
" <td>Swaledale, North Yorkshire</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>hard</td>\n",
|
||
" <td>semi firm</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>yellow</td>\n",
|
||
" <td>smooth, sweet</td>\n",
|
||
" <td>floral</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1185</th>\n",
|
||
" <td>Sweet Style Swiss</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Switzerland</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard, artisan</td>\n",
|
||
" <td>firm, supple</td>\n",
|
||
" <td>waxed</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>nutty</td>\n",
|
||
" <td>nutty, sweet</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1186</th>\n",
|
||
" <td>Swiss cheese</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>United States</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Swiss Cheese</td>\n",
|
||
" <td>hard, artisan, processed</td>\n",
|
||
" <td>firm</td>\n",
|
||
" <td>rindless</td>\n",
|
||
" <td>pale yellow</td>\n",
|
||
" <td>nutty, sweet</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>1181 rows × 13 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" cheese milk country region \\\n",
|
||
"0 Aarewasser cow Switzerland NaN \n",
|
||
"1 Abbaye de Belloc sheep France Pays Basque \n",
|
||
"2 Abbaye de Belval cow France NaN \n",
|
||
"3 Abbaye de Citeaux cow France Burgundy \n",
|
||
"4 Abbaye de Tamié cow France Savoie \n",
|
||
"... ... ... ... ... \n",
|
||
"1182 Sveciaost cow Sweden Low-laying regions \n",
|
||
"1183 Swag goat Australia South Australia \n",
|
||
"1184 Swaledale sheep England Swaledale, North Yorkshire \n",
|
||
"1185 Sweet Style Swiss NaN Switzerland NaN \n",
|
||
"1186 Swiss cheese cow United States NaN \n",
|
||
"\n",
|
||
" family type texture \\\n",
|
||
"0 NaN semi-soft buttery \n",
|
||
"1 NaN semi-hard, artisan creamy, dense, firm \n",
|
||
"2 NaN semi-hard elastic \n",
|
||
"3 NaN semi-soft, artisan, brined creamy, dense, smooth \n",
|
||
"4 NaN soft, artisan creamy, open, smooth \n",
|
||
"... ... ... ... \n",
|
||
"1182 NaN semi-hard, brined creamy, supple \n",
|
||
"1183 NaN fresh firm, artisan creamy, crumbly \n",
|
||
"1184 NaN hard semi firm \n",
|
||
"1185 NaN semi-hard, artisan firm, supple \n",
|
||
"1186 Swiss Cheese hard, artisan, processed firm \n",
|
||
"\n",
|
||
" rind color flavor aroma \\\n",
|
||
"0 washed yellow sweet buttery \n",
|
||
"1 natural yellow burnt caramel lanoline \n",
|
||
"2 washed ivory NaN aromatic \n",
|
||
"3 washed white acidic, milky, smooth barnyardy, earthy \n",
|
||
"4 washed white fruity, nutty perfumed, pungent \n",
|
||
"... ... ... ... ... \n",
|
||
"1182 rindless pale yellow acidic NaN \n",
|
||
"1183 ash coated white acidic, creamy fresh \n",
|
||
"1184 NaN yellow smooth, sweet floral \n",
|
||
"1185 waxed NaN nutty nutty, sweet \n",
|
||
"1186 rindless pale yellow nutty, sweet NaN \n",
|
||
"\n",
|
||
" vegetarian vegan \n",
|
||
"0 False False \n",
|
||
"1 True False \n",
|
||
"2 False False \n",
|
||
"3 False False \n",
|
||
"4 False False \n",
|
||
"... ... ... \n",
|
||
"1182 False False \n",
|
||
"1183 True False \n",
|
||
"1184 True False \n",
|
||
"1185 False False \n",
|
||
"1186 True False \n",
|
||
"\n",
|
||
"[1181 rows x 13 columns]"
|
||
]
|
||
},
|
||
"execution_count": 10,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data=data.dropna(subset=[\"country\",\"region\"], how=\"all\")\n",
|
||
"data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "e28eb2ee-1bed-4dd7-8133-c99b1f7ea26b",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"id": "5a4c0e30-8535-498b-9a9e-0d7d232d4eb7",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_39592/2649664722.py:2: SettingWithCopyWarning: \n",
|
||
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
||
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
||
"\n",
|
||
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
||
" data[\"country\"]=data[\"country\"].fillna(\"\")\n",
|
||
"/tmp/ipykernel_39592/2649664722.py:3: SettingWithCopyWarning: \n",
|
||
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
||
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
||
"\n",
|
||
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
||
" data[\"region\"]=data[\"region\"].fillna(\"\")\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"data[[\"country\",\"region\"]]\n",
|
||
"data[\"country\"]=data[\"country\"].fillna(\"\")\n",
|
||
"data[\"region\"]=data[\"region\"].fillna(\"\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 35,
|
||
"id": "0edcee6f-a2c3-4804-a665-507b77f2651b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/tmp/ipykernel_39592/3582726305.py:1: SettingWithCopyWarning: \n",
|
||
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
||
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
||
"\n",
|
||
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
||
" data[\"location\"]=data[\"region\"]+\", \"+data[\"country\"]\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"data[\"location\"]=data[\"region\"]+\", \"+data[\"country\"]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 36,
|
||
"id": "017e30da-4f60-4d16-b6d4-56fb76acb740",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>cheese</th>\n",
|
||
" <th>milk</th>\n",
|
||
" <th>country</th>\n",
|
||
" <th>region</th>\n",
|
||
" <th>family</th>\n",
|
||
" <th>type</th>\n",
|
||
" <th>texture</th>\n",
|
||
" <th>rind</th>\n",
|
||
" <th>color</th>\n",
|
||
" <th>flavor</th>\n",
|
||
" <th>aroma</th>\n",
|
||
" <th>vegetarian</th>\n",
|
||
" <th>vegan</th>\n",
|
||
" <th>location</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>Aarewasser</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>Switzerland</td>\n",
|
||
" <td></td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-soft</td>\n",
|
||
" <td>buttery</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>yellow</td>\n",
|
||
" <td>sweet</td>\n",
|
||
" <td>buttery</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>, Switzerland</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>Abbaye de Belloc</td>\n",
|
||
" <td>sheep</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>Pays Basque</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard, artisan</td>\n",
|
||
" <td>creamy, dense, firm</td>\n",
|
||
" <td>natural</td>\n",
|
||
" <td>yellow</td>\n",
|
||
" <td>burnt caramel</td>\n",
|
||
" <td>lanoline</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Pays Basque, France</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>Abbaye de Belval</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td></td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard</td>\n",
|
||
" <td>elastic</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>ivory</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>aromatic</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>, France</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>Abbaye de Citeaux</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>Burgundy</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-soft, artisan, brined</td>\n",
|
||
" <td>creamy, dense, smooth</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>white</td>\n",
|
||
" <td>acidic, milky, smooth</td>\n",
|
||
" <td>barnyardy, earthy</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Burgundy, France</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>Abbaye de Tamié</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>Savoie</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>soft, artisan</td>\n",
|
||
" <td>creamy, open, smooth</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>white</td>\n",
|
||
" <td>fruity, nutty</td>\n",
|
||
" <td>perfumed, pungent</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Savoie, France</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1182</th>\n",
|
||
" <td>Sveciaost</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>Sweden</td>\n",
|
||
" <td>Low-laying regions</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard, brined</td>\n",
|
||
" <td>creamy, supple</td>\n",
|
||
" <td>rindless</td>\n",
|
||
" <td>pale yellow</td>\n",
|
||
" <td>acidic</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Low-laying regions, Sweden</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1183</th>\n",
|
||
" <td>Swag</td>\n",
|
||
" <td>goat</td>\n",
|
||
" <td>Australia</td>\n",
|
||
" <td>South Australia</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fresh firm, artisan</td>\n",
|
||
" <td>creamy, crumbly</td>\n",
|
||
" <td>ash coated</td>\n",
|
||
" <td>white</td>\n",
|
||
" <td>acidic, creamy</td>\n",
|
||
" <td>fresh</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>South Australia, Australia</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1184</th>\n",
|
||
" <td>Swaledale</td>\n",
|
||
" <td>sheep</td>\n",
|
||
" <td>England</td>\n",
|
||
" <td>Swaledale, North Yorkshire</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>hard</td>\n",
|
||
" <td>semi firm</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>yellow</td>\n",
|
||
" <td>smooth, sweet</td>\n",
|
||
" <td>floral</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Swaledale, North Yorkshire, England</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1185</th>\n",
|
||
" <td>Sweet Style Swiss</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Switzerland</td>\n",
|
||
" <td></td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard, artisan</td>\n",
|
||
" <td>firm, supple</td>\n",
|
||
" <td>waxed</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>nutty</td>\n",
|
||
" <td>nutty, sweet</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>, Switzerland</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1186</th>\n",
|
||
" <td>Swiss cheese</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>United States</td>\n",
|
||
" <td></td>\n",
|
||
" <td>Swiss Cheese</td>\n",
|
||
" <td>hard, artisan, processed</td>\n",
|
||
" <td>firm</td>\n",
|
||
" <td>rindless</td>\n",
|
||
" <td>pale yellow</td>\n",
|
||
" <td>nutty, sweet</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>, United States</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>1181 rows × 14 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" cheese milk country region \\\n",
|
||
"0 Aarewasser cow Switzerland \n",
|
||
"1 Abbaye de Belloc sheep France Pays Basque \n",
|
||
"2 Abbaye de Belval cow France \n",
|
||
"3 Abbaye de Citeaux cow France Burgundy \n",
|
||
"4 Abbaye de Tamié cow France Savoie \n",
|
||
"... ... ... ... ... \n",
|
||
"1182 Sveciaost cow Sweden Low-laying regions \n",
|
||
"1183 Swag goat Australia South Australia \n",
|
||
"1184 Swaledale sheep England Swaledale, North Yorkshire \n",
|
||
"1185 Sweet Style Swiss NaN Switzerland \n",
|
||
"1186 Swiss cheese cow United States \n",
|
||
"\n",
|
||
" family type texture \\\n",
|
||
"0 NaN semi-soft buttery \n",
|
||
"1 NaN semi-hard, artisan creamy, dense, firm \n",
|
||
"2 NaN semi-hard elastic \n",
|
||
"3 NaN semi-soft, artisan, brined creamy, dense, smooth \n",
|
||
"4 NaN soft, artisan creamy, open, smooth \n",
|
||
"... ... ... ... \n",
|
||
"1182 NaN semi-hard, brined creamy, supple \n",
|
||
"1183 NaN fresh firm, artisan creamy, crumbly \n",
|
||
"1184 NaN hard semi firm \n",
|
||
"1185 NaN semi-hard, artisan firm, supple \n",
|
||
"1186 Swiss Cheese hard, artisan, processed firm \n",
|
||
"\n",
|
||
" rind color flavor aroma \\\n",
|
||
"0 washed yellow sweet buttery \n",
|
||
"1 natural yellow burnt caramel lanoline \n",
|
||
"2 washed ivory NaN aromatic \n",
|
||
"3 washed white acidic, milky, smooth barnyardy, earthy \n",
|
||
"4 washed white fruity, nutty perfumed, pungent \n",
|
||
"... ... ... ... ... \n",
|
||
"1182 rindless pale yellow acidic NaN \n",
|
||
"1183 ash coated white acidic, creamy fresh \n",
|
||
"1184 NaN yellow smooth, sweet floral \n",
|
||
"1185 waxed NaN nutty nutty, sweet \n",
|
||
"1186 rindless pale yellow nutty, sweet NaN \n",
|
||
"\n",
|
||
" vegetarian vegan location \n",
|
||
"0 False False , Switzerland \n",
|
||
"1 True False Pays Basque, France \n",
|
||
"2 False False , France \n",
|
||
"3 False False Burgundy, France \n",
|
||
"4 False False Savoie, France \n",
|
||
"... ... ... ... \n",
|
||
"1182 False False Low-laying regions, Sweden \n",
|
||
"1183 True False South Australia, Australia \n",
|
||
"1184 True False Swaledale, North Yorkshire, England \n",
|
||
"1185 False False , Switzerland \n",
|
||
"1186 True False , United States \n",
|
||
"\n",
|
||
"[1181 rows x 14 columns]"
|
||
]
|
||
},
|
||
"execution_count": 36,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 37,
|
||
"id": "bf9d5b2a-bd47-4c4c-85c4-5ad7769a3f31",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"430"
|
||
]
|
||
},
|
||
"execution_count": 37,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"len(set(data[\"location\"]))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 44,
|
||
"id": "e7ae8f76-b33b-42ce-9dea-9fab9e33069e",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"locs=list(set(loc for loc in data[\"location\"]))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 49,
|
||
"id": "fb044984-c33c-492c-91a2-4e9fff29ceb3",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"['Oristano, Italy',\n",
|
||
" 'Isere, France',\n",
|
||
" 'Romanian Carpathians, Romania',\n",
|
||
" 'French Basque Country, Midi-Pyrénées, France',\n",
|
||
" 'Kilmallock County Limerick, Ireland',\n",
|
||
" ', Australia',\n",
|
||
" 'massif des Causses, France',\n",
|
||
" 'Basque, Pyrenees Mountains, France',\n",
|
||
" 'Modena, Italy',\n",
|
||
" 'Vermont, United States',\n",
|
||
" 'Rhone-Alps, France',\n",
|
||
" 'Brooklyn NY, United States',\n",
|
||
" 'Coast of Oregon, United States',\n",
|
||
" 'Veneto, Trentino, Italy',\n",
|
||
" 'Emilia Romagna, Italy',\n",
|
||
" ', Middle East',\n",
|
||
" 'Carmarthenshire, Wales',\n",
|
||
" ', Egypt, Lebanon, Syria',\n",
|
||
" 'Jura, Switzerland',\n",
|
||
" ', Australia, France',\n",
|
||
" 'Rio Grande do Sul, Brazil',\n",
|
||
" ', Turkey',\n",
|
||
" 'Banks Peninsular in Canterbury, New Zealand',\n",
|
||
" 'Loire Valley, France',\n",
|
||
" 'Berry, France',\n",
|
||
" 'Seattle, Washington, United States',\n",
|
||
" 'Marathon, NY, United States',\n",
|
||
" 'Setubal, Palmela and Sesimbra, Portugal',\n",
|
||
" 'Central Balkan Mountains, Bulgaria',\n",
|
||
" 'North Wootton, England',\n",
|
||
" 'Allagau, Bavarian Alps, Germany',\n",
|
||
" 'Colorado, United States',\n",
|
||
" ', Armenia',\n",
|
||
" 'Lebanon, CT, United States',\n",
|
||
" 'Centre-Val de Loire, France',\n",
|
||
" 'Barcelona, Spain',\n",
|
||
" 'South West England, England',\n",
|
||
" 'Laqueuille, France',\n",
|
||
" 'Fethard, Co Tipperary, Ireland',\n",
|
||
" 'Mornington Peninsula, Melbourne, Australia',\n",
|
||
" 'Odell, Bedfordshire, England, Great Britain, United Kingdom',\n",
|
||
" 'Shelburne Farms, United States',\n",
|
||
" ', Israel',\n",
|
||
" 'South Australia, Australia',\n",
|
||
" 'Bornholm, Denmark',\n",
|
||
" 'Pyrénées, France',\n",
|
||
" 'Sardinia & Campania, Italy',\n",
|
||
" 'Hunter Valley, Australia',\n",
|
||
" 'Greenville, Indiana, United States',\n",
|
||
" 'Pokolbin, Hunter Valley, Australia',\n",
|
||
" 'Landford, England, Great Britain, United Kingdom',\n",
|
||
" 'Carrigtwohill, Ireland',\n",
|
||
" 'Minnesota, United States',\n",
|
||
" 'Lombardy, Italy',\n",
|
||
" 'County Cavan, Ireland',\n",
|
||
" ', Argentina',\n",
|
||
" ', France',\n",
|
||
" 'Avila, Spain',\n",
|
||
" 'Crotone, Italy',\n",
|
||
" 'New Hampshire, United States',\n",
|
||
" ', Serbia',\n",
|
||
" 'Lombardy, Canada, Italy',\n",
|
||
" 'Pays d’Auge, Normandy, France',\n",
|
||
" 'Wisconsin, United States',\n",
|
||
" 'Tomales, California, United States',\n",
|
||
" ', Netherlands',\n",
|
||
" 'Serra da Estrela, Portugal',\n",
|
||
" 'St. Gallen (canton), Tufertschwil, Switzerland',\n",
|
||
" 'Extremadura, Spain',\n",
|
||
" ', Spain',\n",
|
||
" 'Pesaro-Urbino, Italy',\n",
|
||
" 'New York, France, United States',\n",
|
||
" 'Northern Holland, Netherlands',\n",
|
||
" 'South West England, United Kingdom',\n",
|
||
" 'Mankato, MN, United States',\n",
|
||
" 'Charentes-Poitou, France',\n",
|
||
" 'Postel, Belgium',\n",
|
||
" 'Auvergne, Salers, France',\n",
|
||
" 'Leiden, Netherlands',\n",
|
||
" 'Apulia, Italy, United States',\n",
|
||
" ', Afghanistan',\n",
|
||
" 'Canary Islands, Spain',\n",
|
||
" ', Great Britain, United Kingdom, Wales',\n",
|
||
" 'Oregon Coast Range, United States',\n",
|
||
" 'Calabria, Italy',\n",
|
||
" 'Utah, United States',\n",
|
||
" 'Gâtinais, France',\n",
|
||
" 'North Yorkshire, England',\n",
|
||
" 'Carnia, Italy',\n",
|
||
" 'Passendale, Belgium',\n",
|
||
" \"Lucerne, Schwyz, Unterwald, and Zoug, and the following additional places: Muri district in d'Argovi, Switzerland\",\n",
|
||
" 'Carrigtwohill, ',\n",
|
||
" 'Adamstown, Co Wexford, Ireland',\n",
|
||
" 'Bermondsey, London, England, Great Britain, United Kingdom',\n",
|
||
" 'Lapland, Finland',\n",
|
||
" 'Tasmania, Australia',\n",
|
||
" 'Staffordshire, England, Great Britain, United Kingdom',\n",
|
||
" 'Ile de France, France',\n",
|
||
" 'Lodi, Italy',\n",
|
||
" 'Macedonia, Thrace, Thessalia, Peloponissos, Ionian Islands, Aegean islands, Crete Island and Epirus, Greece',\n",
|
||
" ', United Kingdom, United States',\n",
|
||
" 'Southern California, United States',\n",
|
||
" 'South West England, England, United Kingdom',\n",
|
||
" 'Haute Vienne, France',\n",
|
||
" 'Menorca, Balearic Islands, Spain',\n",
|
||
" 'Aberdeenshire, Scotland',\n",
|
||
" 'Kimball, United States',\n",
|
||
" 'Schoonrewoerd, Leerdam, Netherlands',\n",
|
||
" 'Friuli-Venezia Giulia, Italy',\n",
|
||
" 'Larzac, France',\n",
|
||
" 'Nottinghamshire, England, Great Britain, United Kingdom',\n",
|
||
" 'Charm, Ohio, United States',\n",
|
||
" 'Troyes , Aube, France',\n",
|
||
" ', Denmark, Finland, Germany, Iceland, Norway, Sweden',\n",
|
||
" 'Québec, Canada',\n",
|
||
" 'Herault, France',\n",
|
||
" 'Pullman, Washington, United States',\n",
|
||
" 'Dumfries, Scotland',\n",
|
||
" 'Tieton, Washington, United States',\n",
|
||
" 'Murazzano, Italy',\n",
|
||
" 'Pennsylvania, United States',\n",
|
||
" 'Co. Carlow, Ireland',\n",
|
||
" 'Languedoc-Roussillon, France',\n",
|
||
" 'Aquitaine, France',\n",
|
||
" 'Franche Comté, France, Switzerland',\n",
|
||
" 'Duhallow, Ireland',\n",
|
||
" 'Gippsland, Victoria, Australia',\n",
|
||
" 'British Columbia, Canada',\n",
|
||
" ', Austria, Germany',\n",
|
||
" 'Hamilton, New Zealand',\n",
|
||
" 'Roncq, France',\n",
|
||
" 'St. Louis, Missouri, United States',\n",
|
||
" 'Iowa, United States',\n",
|
||
" 'Burgund, France',\n",
|
||
" 'Pembrokeshire, Great Britain, United Kingdom, Wales',\n",
|
||
" 'Ballarat, Victoria, Australia',\n",
|
||
" 'Nord-Pas-de-Calais, France',\n",
|
||
" 'Aconcagua, Chile',\n",
|
||
" 'Massachusetts, United States',\n",
|
||
" 'North Yorkshire, England, Great Britain, United Kingdom',\n",
|
||
" 'Bregenzerwald, Kleinwalsertal, Großwalsertal, Laiblachtal (Pfänderstock) and Rheintal, Austria',\n",
|
||
" ', Denmark',\n",
|
||
" 'Ile-de-France/Champagne, France',\n",
|
||
" 'Castilla Leon, Spain',\n",
|
||
" 'Asiago, Italy',\n",
|
||
" 'Manitoba, Canada',\n",
|
||
" 'Cheshire, England, Great Britain, United Kingdom',\n",
|
||
" 'County Tipperary, Clogheen, Ireland',\n",
|
||
" 'Valpadana, Italy',\n",
|
||
" 'Severn Valley, England, Great Britain, United Kingdom',\n",
|
||
" 'Co Clare, Ireland',\n",
|
||
" 'Timsbury, Somerset, England, Great Britain, United Kingdom',\n",
|
||
" 'Somerset, England, Great Britain, United Kingdom',\n",
|
||
" 'Greensboro, VT, United States',\n",
|
||
" 'Dorset, England, Great Britain, United Kingdom',\n",
|
||
" 'Orkney Islands, Great Britain, Scotland, United Kingdom',\n",
|
||
" 'East Midlands, England',\n",
|
||
" 'Northeastern Brazil, Brazil',\n",
|
||
" 'NY, United States',\n",
|
||
" ', England, Great Britain, United Kingdom',\n",
|
||
" 'Alba, Italy',\n",
|
||
" 'Fife, Scotland',\n",
|
||
" 'Stranraer, England, Scotland, United Kingdom',\n",
|
||
" 'Castelo Branco, Fundão and Idanha-a-Nova, Portugal',\n",
|
||
" 'Moliterno, Italy',\n",
|
||
" 'Dorset, England',\n",
|
||
" 'Cornwall, England, Great Britain, United Kingdom',\n",
|
||
" 'Northwest, United States',\n",
|
||
" 'Carneros, Sonoma, California, United States',\n",
|
||
" 'Croisy-sur-Eure, France',\n",
|
||
" 'Pyrenees-Atlantiques, France',\n",
|
||
" 'Västra Götaland, Sweden',\n",
|
||
" 'Brisbane, Australia',\n",
|
||
" 'Campania, Paestum, Foggia, Italy',\n",
|
||
" 'Cevenes, France',\n",
|
||
" ', England',\n",
|
||
" 'Corsica, France',\n",
|
||
" ', Austria',\n",
|
||
" 'Ann Arbor, Michigan, United States',\n",
|
||
" 'Po valley region, Italy',\n",
|
||
" 'Banon, France',\n",
|
||
" 'Herefordshire, West Midlands, United Kingdom',\n",
|
||
" 'Zasavica, Serbia',\n",
|
||
" ', United Kingdom, Wales',\n",
|
||
" 'Lower Normandy, France',\n",
|
||
" 'Piave Valley, Italy, Italy',\n",
|
||
" 'Nicasio, United States',\n",
|
||
" 'Beara Peninsula, Co. Cork, Ireland',\n",
|
||
" 'Cotswolds, England, Great Britain, United Kingdom',\n",
|
||
" 'Missouri, United States',\n",
|
||
" 'Rhone Valley, France',\n",
|
||
" 'California, United States',\n",
|
||
" 'Oxfordshire, Great Britain',\n",
|
||
" 'Petaluma, California, United States',\n",
|
||
" 'Prince Edward Island, Canada',\n",
|
||
" 'North Cornwall, England',\n",
|
||
" 'Coquet, England',\n",
|
||
" 'Emilia-Romagna, Italy',\n",
|
||
" 'Castille-Leon, Spain',\n",
|
||
" ', Cyprus',\n",
|
||
" 'Bethania, United Kingdom',\n",
|
||
" 'Puimichel in Provence Alpes, France',\n",
|
||
" 'Gloucestershire County, England, United Kingdom',\n",
|
||
" 'Galicia, Spain',\n",
|
||
" 'Flanders, Belgium',\n",
|
||
" ', Canada, Denmark, France, Germany, Netherlands, United States',\n",
|
||
" 'Lanarkshire, Great Britain, Scotland, United Kingdom',\n",
|
||
" 'Pays Basque, France',\n",
|
||
" 'Veneto, Italy',\n",
|
||
" ', Scotland',\n",
|
||
" 'Victoria, Australia',\n",
|
||
" 'Co Limerick, Ireland',\n",
|
||
" 'Treviso, Veneto, Italy',\n",
|
||
" 'Wales, Great Britain',\n",
|
||
" 'Serra da Canastra, Minas Gerais state, Brazil',\n",
|
||
" 'Devon, England, United Kingdom',\n",
|
||
" 'Gevrey-Chambertin, Burgundy, France',\n",
|
||
" ', Brazil',\n",
|
||
" 'Gloucestershire, England, Great Britain, United Kingdom',\n",
|
||
" 'Averyon, France',\n",
|
||
" 'Midi-Pyrénées, France',\n",
|
||
" 'Tipperary, Ireland',\n",
|
||
" 'Maribo, Denmark',\n",
|
||
" 'province of Brittany, France',\n",
|
||
" 'Bavaria, Germany',\n",
|
||
" ', United States',\n",
|
||
" 'Northern Wisconsin, United States',\n",
|
||
" 'New York, United States',\n",
|
||
" 'Blarney, Ireland',\n",
|
||
" 'Stewarton, Scotland, United Kingdom',\n",
|
||
" 'Monterey, California, United States',\n",
|
||
" 'Wigtownshire, Scotland',\n",
|
||
" 'Cornwall, England',\n",
|
||
" 'Co. Offaly, Ireland',\n",
|
||
" 'Karlovy Vary, Czech Republic',\n",
|
||
" 'Quebec, Canada',\n",
|
||
" 'Laruns, France',\n",
|
||
" 'Maine, United States',\n",
|
||
" ', Mauritania',\n",
|
||
" 'Piemonte, Italy',\n",
|
||
" 'Brittany, France',\n",
|
||
" 'Poitou-Charentes, France',\n",
|
||
" 'Swaledale, North Yorkshire, England',\n",
|
||
" 'Timsbury, Somerset, England, Scotland, Wales',\n",
|
||
" ', Iceland',\n",
|
||
" 'Oviken, Sweden',\n",
|
||
" 'Pyrénées-Atlantiques, France',\n",
|
||
" 'Pinconning, Michigan, United States',\n",
|
||
" 'Belvederis, Lithuania',\n",
|
||
" 'Chelmarsh, Bridgnorth, Shropshire, England',\n",
|
||
" 'Peekskill, United States',\n",
|
||
" 'Ceredigion, United Kingdom',\n",
|
||
" 'All Holland, Netherlands',\n",
|
||
" 'Normandy, France',\n",
|
||
" 'Inagh, Co Clare, ',\n",
|
||
" 'Amou, Gascony, France',\n",
|
||
" 'Colby, Wisconsin, United States',\n",
|
||
" 'Southwestern Wisconsin, United States',\n",
|
||
" 'Lincolnshire, England, United Kingdom',\n",
|
||
" 'Bas-Languedoc, Comtat Venaissin, France',\n",
|
||
" ', United Kingdom',\n",
|
||
" ', Switzerland',\n",
|
||
" ', Portugal',\n",
|
||
" 'Comox Valley, Vancouver Island, Canada',\n",
|
||
" 'Derbyshire, Leicestershire, Nottinghamshire, England',\n",
|
||
" 'Huizen, Netherlands',\n",
|
||
" 'Asturias, Spain',\n",
|
||
" 'Piedmont, Italy',\n",
|
||
" ', Canada, India, United States',\n",
|
||
" 'County Antrim, Ireland',\n",
|
||
" ', Italy',\n",
|
||
" 'St Antoine, France',\n",
|
||
" ', Hungary',\n",
|
||
" 'County Carlow, Ireland',\n",
|
||
" 'Provencale, France',\n",
|
||
" ', Sweden',\n",
|
||
" 'Allgau, Germany',\n",
|
||
" 'North Carolina, United States',\n",
|
||
" 'Basilicata, Italy',\n",
|
||
" ', Canada',\n",
|
||
" 'Beira Baixa Province, Portugal',\n",
|
||
" 'Ann Arbor, MI, United States',\n",
|
||
" ', China, Nepal, Tibet',\n",
|
||
" 'Bursa, Turkey',\n",
|
||
" 'Websterville, VT, United States',\n",
|
||
" 'Kent, United Kingdom',\n",
|
||
" 'County Wexford, Ireland',\n",
|
||
" 'island wide, Cyprus',\n",
|
||
" 'Pyrenees, France',\n",
|
||
" ', Mexico',\n",
|
||
" 'Duchy of Limburg, Belgium, Germany, Netherlands',\n",
|
||
" 'Cotherstone, England, United Kingdom',\n",
|
||
" 'Richfield, Wisconsin, United States',\n",
|
||
" 'Pienza, Italy',\n",
|
||
" 'Dalmatia, Croatia',\n",
|
||
" 'Taxco, Mexico',\n",
|
||
" 'Plessisville, Quebec, Canada, France',\n",
|
||
" ', Lebanon, Middle East',\n",
|
||
" 'Mols, Denmark',\n",
|
||
" '+, Cyprus, Egypt, Israel, Jordan, Lebanon, Middle East, Syria',\n",
|
||
" 'Prince Edward County, Ontario, Canada',\n",
|
||
" ', Belgium',\n",
|
||
" 'Auvergne, France',\n",
|
||
" ', Canada, United States',\n",
|
||
" 'Upper Corsica, France',\n",
|
||
" 'New South Wales, Australia',\n",
|
||
" 'Tibet, China, Nepal, Tibet',\n",
|
||
" 'West Bengal, India',\n",
|
||
" ', Iraq',\n",
|
||
" 'Béarnaise in Pyrénées-Atlantique, France',\n",
|
||
" 'Co. Cork, Ireland',\n",
|
||
" 'Vorarlberg, Austria',\n",
|
||
" 'Midi-Pyrenees, France',\n",
|
||
" ', Ireland',\n",
|
||
" 'Wallonia, Belgium',\n",
|
||
" 'Devon, England',\n",
|
||
" 'South East England, United Kingdom',\n",
|
||
" 'Indiana, United States',\n",
|
||
" 'Island of Pag, Croatia',\n",
|
||
" 'Sonoma, California, United States',\n",
|
||
" 'Port Townsend, United States',\n",
|
||
" 'old Liburnia (Dalmatia), Croatia',\n",
|
||
" ', Mexico and Caribbean',\n",
|
||
" 'North Yorkshire, England, United Kingdom',\n",
|
||
" 'Charentes, France',\n",
|
||
" 'La Velle, Wisconsin, United States',\n",
|
||
" ', Germany',\n",
|
||
" 'Monterey, California, Mexico, United States',\n",
|
||
" 'Roxburghshire, Scotland, United Kingdom',\n",
|
||
" 'Minas Gerais, Brazil',\n",
|
||
" 'Sardinia (Italy), Southern Corsica (France), France, Italy',\n",
|
||
" ', Hungary, Poland, Slovakia',\n",
|
||
" 'Allgaeu Alps, Germany',\n",
|
||
" 'Orkney Isles, Scotland, United Kingdom',\n",
|
||
" 'East Sussex, United Kingdom',\n",
|
||
" 'Póvoa de Lanhoso, Portugal',\n",
|
||
" 'Central and Western Macedonia, Thessalia, Greece',\n",
|
||
" 'Friuli Venezia Giulia and Veneto, Italy',\n",
|
||
" 'Somerset, England, United Kingdom',\n",
|
||
" 'Brickhill, Co. Clare, Ireland',\n",
|
||
" 'New York, France',\n",
|
||
" 'Burgundy, France',\n",
|
||
" ', Holland',\n",
|
||
" 'Gujarat, India',\n",
|
||
" 'Pembrokeshire, United Kingdom',\n",
|
||
" 'Milford, NJ, United States',\n",
|
||
" 'Campania, Italy',\n",
|
||
" ', Mongolia',\n",
|
||
" 'Valencia, Spain',\n",
|
||
" 'Tibet, China, Tibet',\n",
|
||
" 'Bourgogne, France',\n",
|
||
" 'Bjurholm, Sweden',\n",
|
||
" 'Het Groene Hart, Netherlands',\n",
|
||
" 'Kinfauns, Perthshire, Scotland',\n",
|
||
" 'Stoneyford, Ireland',\n",
|
||
" 'West Pawlet, VT, United States',\n",
|
||
" 'Sebastopol, California, United States',\n",
|
||
" 'Airedale farming district, New Zealand',\n",
|
||
" 'Wales, London, United Kingdom, Wales',\n",
|
||
" 'Tain, Scotland',\n",
|
||
" 'Naples, Italy',\n",
|
||
" 'Chirac, France',\n",
|
||
" 'Fornells de la Selva, Gironès, Spain',\n",
|
||
" 'Canton of Glarus, Switzerland',\n",
|
||
" ', Belgium, Canada, France, Switzerland, United States',\n",
|
||
" 'Tuscany, Italy',\n",
|
||
" 'Haute-Savoie / Upper Savoy, France',\n",
|
||
" 'Cornwall, ',\n",
|
||
" 'Centre , the department of Loiret, France',\n",
|
||
" ', Poland',\n",
|
||
" 'Rhône-Alpes, France',\n",
|
||
" 'Trakia, Albania, Bulgaria, Croatia, Greece, Israel, Macedonia, Romania, Serbia',\n",
|
||
" 'Somerset, England',\n",
|
||
" 'North East Victoria, Australia',\n",
|
||
" 'Sardegna, Italy',\n",
|
||
" 'Fairview, United States',\n",
|
||
" 'Murcia, Spain',\n",
|
||
" 'Languedoc, France',\n",
|
||
" 'Veneto, ',\n",
|
||
" 'Umbria, Lazio, Italy',\n",
|
||
" 'Sulzberg, Austria',\n",
|
||
" 'Queenstown, New Zealand',\n",
|
||
" 'Bergues, France',\n",
|
||
" 'Piora Valley, Switzerland',\n",
|
||
" 'Savoie, France',\n",
|
||
" 'Galax, Virginia, United States',\n",
|
||
" ', New Zealand',\n",
|
||
" 'Devon, England, Great Britain, United Kingdom',\n",
|
||
" 'Avesnes, France',\n",
|
||
" 'Dumfriesshire, Scotland, United Kingdom',\n",
|
||
" ', Greece',\n",
|
||
" 'Franche Comté, France',\n",
|
||
" 'Ontario, Canada',\n",
|
||
" 'North East Victoria, ',\n",
|
||
" 'Oregon, United States',\n",
|
||
" 'Loire, France',\n",
|
||
" 'Friuli-Venezia Giulia and the Veneto, Italy',\n",
|
||
" 'Virginia, United States',\n",
|
||
" 'Central and Western Macedonia, Thessaly, Greece',\n",
|
||
" 'Co. Mayo, Ireland',\n",
|
||
" 'Normandy, Auvilliers, France',\n",
|
||
" 'Bloomdale, United States',\n",
|
||
" 'Azores, Portugal',\n",
|
||
" 'Georgia, United States',\n",
|
||
" 'Swabia, Germany',\n",
|
||
" 'Äänekoski, Finland',\n",
|
||
" 'Lazio, Sardinia, Italy',\n",
|
||
" 'Buxton, Derbyshire, England, United Kingdom',\n",
|
||
" 'Inagh, Co Clare, Ireland',\n",
|
||
" 'Landshut, Germany',\n",
|
||
" 'East Midlands, England, Great Britain, United Kingdom',\n",
|
||
" 'East Midlands, England, United Kingdom',\n",
|
||
" 'Stawley, near Wellington, Somerset, England, Great Britain, United Kingdom',\n",
|
||
" 'Allgäu, Germany',\n",
|
||
" ', Bangladesh, India',\n",
|
||
" 'Stonegate, East Sussex, England, Great Britain, United Kingdom',\n",
|
||
" 'Castile-Leon, Spain',\n",
|
||
" 'New Jersey, United States',\n",
|
||
" 'Svaneti, Samegrelo, Georgia',\n",
|
||
" 'Low-laying regions, Sweden',\n",
|
||
" 'California, Netherlands, United States',\n",
|
||
" 'Gravina in Puglia, Murgia, Italy',\n",
|
||
" 'Aveyron, Laguiole, France',\n",
|
||
" 'Provence, France',\n",
|
||
" 'Illoud (Haute-Marne), France',\n",
|
||
" 'Cumbrian, United Kingdom',\n",
|
||
" ', Wales',\n",
|
||
" 'Illinois, United States',\n",
|
||
" 'Aveyron, France',\n",
|
||
" 'Anjou, France']"
|
||
]
|
||
},
|
||
"execution_count": 49,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"locs"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 40,
|
||
"id": "debb780e-ec13-4502-ac44-6001335e507d",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from geopy.geocoders import Nominatim"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 41,
|
||
"id": "eed3ac7b-5283-4d8e-bc26-61e1d821ccaf",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"geolocator=Nominatim(user_agent=\"toto\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 48,
|
||
"id": "0043fe0d-e2d2-48f0-8953-ffc3dee52ba6",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"location=geolocator.geocode(locs[1])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "710341db-408f-4a4a-a849-65b963582ebc",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"application/vnd.jupyter.widget-view+json": {
|
||
"model_id": "a2bbdb40ac6c43b0bfe08d014970db40",
|
||
"version_major": 2,
|
||
"version_minor": 0
|
||
},
|
||
"text/plain": [
|
||
" 0%| | 0/430 [00:00<?, ?it/s]"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Oristano, Italy\n",
|
||
"Isere, France\n",
|
||
"Romanian Carpathians, Romania\n",
|
||
"French Basque Country, Midi-Pyrénées, France\n",
|
||
"ERREUR\n",
|
||
"Kilmallock County Limerick, Ireland\n",
|
||
", Australia\n",
|
||
"massif des Causses, France\n",
|
||
"ERREUR\n",
|
||
"Basque, Pyrenees Mountains, France\n",
|
||
"ERREUR\n",
|
||
"Modena, Italy\n",
|
||
"Vermont, United States\n",
|
||
"Rhone-Alps, France\n",
|
||
"Brooklyn NY, United States\n",
|
||
"Coast of Oregon, United States\n",
|
||
"ERREUR\n",
|
||
"Veneto, Trentino, Italy\n",
|
||
"ERREUR\n",
|
||
"Emilia Romagna, Italy\n",
|
||
", Middle East\n",
|
||
"Carmarthenshire, Wales\n",
|
||
", Egypt, Lebanon, Syria\n",
|
||
"ERREUR\n",
|
||
"Jura, Switzerland\n",
|
||
", Australia, France\n",
|
||
"Rio Grande do Sul, Brazil\n",
|
||
", Turkey\n",
|
||
"Banks Peninsular in Canterbury, New Zealand\n",
|
||
"ERREUR\n",
|
||
"Loire Valley, France\n",
|
||
"Berry, France\n",
|
||
"Seattle, Washington, United States\n",
|
||
"Marathon, NY, United States\n",
|
||
"Setubal, Palmela and Sesimbra, Portugal\n",
|
||
"ERREUR\n",
|
||
"Central Balkan Mountains, Bulgaria\n",
|
||
"ERREUR\n",
|
||
"North Wootton, England\n",
|
||
"Allagau, Bavarian Alps, Germany\n",
|
||
"ERREUR\n",
|
||
"Colorado, United States\n",
|
||
", Armenia\n",
|
||
"Lebanon, CT, United States\n",
|
||
"Centre-Val de Loire, France\n",
|
||
"Barcelona, Spain\n",
|
||
"South West England, England\n",
|
||
"Laqueuille, France\n",
|
||
"Fethard, Co Tipperary, Ireland\n",
|
||
"Mornington Peninsula, Melbourne, Australia\n",
|
||
"Odell, Bedfordshire, England, Great Britain, United Kingdom\n",
|
||
"ERREUR\n",
|
||
"Shelburne Farms, United States\n",
|
||
", Israel\n",
|
||
"South Australia, Australia\n",
|
||
"Bornholm, Denmark\n",
|
||
"Pyrénées, France\n",
|
||
"Sardinia & Campania, Italy\n",
|
||
"ERREUR\n",
|
||
"Hunter Valley, Australia\n",
|
||
"Greenville, Indiana, United States\n",
|
||
"Pokolbin, Hunter Valley, Australia\n",
|
||
"ERREUR\n",
|
||
"Landford, England, Great Britain, United Kingdom\n",
|
||
"ERREUR\n",
|
||
"Carrigtwohill, Ireland\n",
|
||
"Minnesota, United States\n",
|
||
"Lombardy, Italy\n",
|
||
"County Cavan, Ireland\n",
|
||
", Argentina\n",
|
||
", France\n",
|
||
"Avila, Spain\n",
|
||
"Crotone, Italy\n",
|
||
"New Hampshire, United States\n",
|
||
", Serbia\n",
|
||
"Lombardy, Canada, Italy\n",
|
||
"ERREUR\n",
|
||
"Pays d’Auge, Normandy, France\n",
|
||
"Wisconsin, United States\n",
|
||
"Tomales, California, United States\n",
|
||
", Netherlands\n",
|
||
"Serra da Estrela, Portugal\n",
|
||
"St. Gallen (canton), Tufertschwil, Switzerland\n",
|
||
"ERREUR\n",
|
||
"Extremadura, Spain\n",
|
||
", Spain\n",
|
||
"Pesaro-Urbino, Italy\n",
|
||
"New York, France, United States\n",
|
||
"ERREUR\n",
|
||
"Northern Holland, Netherlands\n",
|
||
"ERREUR\n",
|
||
"South West England, United Kingdom\n",
|
||
"Mankato, MN, United States\n",
|
||
"Charentes-Poitou, France\n",
|
||
"Postel, Belgium\n",
|
||
"Auvergne, Salers, France\n",
|
||
"Leiden, Netherlands\n",
|
||
"Apulia, Italy, United States\n",
|
||
"ERREUR\n",
|
||
", Afghanistan\n",
|
||
"Canary Islands, Spain\n",
|
||
", Great Britain, United Kingdom, Wales\n",
|
||
"ERREUR\n",
|
||
"Oregon Coast Range, United States\n",
|
||
"ERREUR\n",
|
||
"Calabria, Italy\n",
|
||
"Utah, United States\n",
|
||
"Gâtinais, France\n",
|
||
"North Yorkshire, England\n",
|
||
"Carnia, Italy\n",
|
||
"Passendale, Belgium\n",
|
||
"Lucerne, Schwyz, Unterwald, and Zoug, and the following additional places: Muri district in d'Argovi, Switzerland\n",
|
||
"ERREUR\n",
|
||
"Carrigtwohill, \n",
|
||
"Adamstown, Co Wexford, Ireland\n",
|
||
"Bermondsey, London, England, Great Britain, United Kingdom\n",
|
||
"ERREUR\n",
|
||
"Lapland, Finland\n",
|
||
"Tasmania, Australia\n",
|
||
"Staffordshire, England, Great Britain, United Kingdom\n",
|
||
"ERREUR\n",
|
||
"Ile de France, France\n",
|
||
"Lodi, Italy\n",
|
||
"Macedonia, Thrace, Thessalia, Peloponissos, Ionian Islands, Aegean islands, Crete Island and Epirus, Greece\n",
|
||
"ERREUR\n",
|
||
", United Kingdom, United States\n",
|
||
"Southern California, United States\n",
|
||
"South West England, England, United Kingdom\n",
|
||
"Haute Vienne, France\n",
|
||
"Menorca, Balearic Islands, Spain\n",
|
||
"Aberdeenshire, Scotland\n",
|
||
"Kimball, United States\n",
|
||
"Schoonrewoerd, Leerdam, Netherlands\n",
|
||
"ERREUR\n",
|
||
"Friuli-Venezia Giulia, Italy\n",
|
||
"Larzac, France\n",
|
||
"Nottinghamshire, England, Great Britain, United Kingdom\n",
|
||
"ERREUR\n",
|
||
"Charm, Ohio, United States\n",
|
||
"Troyes , Aube, France\n",
|
||
", Denmark, Finland, Germany, Iceland, Norway, Sweden\n",
|
||
"ERREUR\n",
|
||
"Québec, Canada\n",
|
||
"Herault, France\n",
|
||
"Pullman, Washington, United States\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import time\n",
|
||
"import tqdm.notebook as tqdm\n",
|
||
"import random\n",
|
||
"locations=[]\n",
|
||
"for loc in tqdm.tqdm(locs):\n",
|
||
" print(loc)\n",
|
||
" time.sleep(1)\n",
|
||
" locations.append(Nominatim(user_agent=\"toto\").geocode(loc))\n",
|
||
" try:\n",
|
||
" locations[-1]=locations[-1].latitude,locations[-1].longitude\n",
|
||
" except AttributeError:print(\"ERREUR\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 75,
|
||
"id": "d41b1dc8-90df-44b8-9d83-d218f82a3637",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"[<matplotlib.lines.Line2D at 0x79ba3101fad0>]"
|
||
]
|
||
},
|
||
"execution_count": 75,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
},
|
||
{
|
||
"data": {
|
||
"image/png": "",
|
||
"text/plain": [
|
||
"<Figure size 640x480 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"len(locations)\n",
|
||
"locations\n",
|
||
"x=[]\n",
|
||
"y=[]\n",
|
||
"for l in locations:\n",
|
||
" if l is not None:\n",
|
||
" x.append(l[0])\n",
|
||
" y.append(l[1])\n",
|
||
"plt.plot(x,y,\".\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "66ce4e4a-7006-411f-abd0-ee94d7cf99b3",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def filter_df(df, cols=None):\n",
|
||
" if cols is None:\n",
|
||
" cols = [\"milk\", \"country\", \"type\", \"texture\", \"flavor\", \"aroma\", \"family\", \"rind\"]\n",
|
||
"\n",
|
||
" df = df.copy()\n",
|
||
" attributes = set() # Get all the possible attributes (some are mixed in different columns)\n",
|
||
" for col in cols:\n",
|
||
" values = set()\n",
|
||
" for val in set(df[col]):\n",
|
||
" if type(val) == float: # skip NaN values\n",
|
||
" continue\n",
|
||
" values = values.union([x.strip() for x in set(val.split(\",\"))])\n",
|
||
" attributes = attributes.union(values)\n",
|
||
" \n",
|
||
" \n",
|
||
" row_attrs = [set() for _ in range(len(df))] # get the attributes specific to each row\n",
|
||
" for col in cols:\n",
|
||
" for i, row in enumerate(df[col]):\n",
|
||
" if type(row) != float:\n",
|
||
" row_attrs[i] = row_attrs[i].union([x.strip() for x in row.split(\",\")])\n",
|
||
"\n",
|
||
" for attr in attributes: # Add attributes rows\n",
|
||
" df[attr] = list(attr in row_attrs[i] for i in range(len(df[col])))\n",
|
||
" for col in cols:\n",
|
||
" del df[col]\n",
|
||
"\n",
|
||
" return df.copy()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "fda6aaad-7b1e-4daa-8d28-cd049df9cec2",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"data_features=filter_df(data)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "a1b022a3-a2f9-4e39-9e79-48ae9f6adca5",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Classification"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "94bcde38-784b-41d9-89b0-3e2e17aa2979",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from sklearn import tree"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "7b2b9d3e-d7da-4f43-9e1c-4e62e837ed0b",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"c=tree.DecisionTreeClassifier()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "0b52b082-0554-45f2-9eff-e6a3ba6a8d08",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"c.fit("
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "6d0b0d25-3476-4fbb-84c7-008437e87903",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"data[[\"country\",\"region\"]]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "100a7c2e-2d24-4814-bd68-4b9f6433ce4d",
|
||
"metadata": {},
|
||
"source": [
|
||
"Transformer: la couleur en RGB; la localisation en GPS\n",
|
||
"1ère question: est-ce que la couleur suffit à savoir d'où ça vient ? \n",
|
||
"2ème question: est-ce que si on ajoute le type ça marche ? \n",
|
||
"3ème question: et les caractéristiques gustatives ?\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "24e7ff6e-c308-4cc8-aeac-eeb372f4c479",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"data_features"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "ca969d41-a88a-47d9-b94b-8b633d3d3348",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "e2c9b84f-b899-4c99-abb7-37a9deeafbb5",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "d41c1cfc-1564-4131-8391-c8a8971b9d13",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "42a1e4ed-9a1e-41f8-a322-b5d2de68d24a",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"data[pd.isnull(data[\"country\"])&pd.isnull(data[\"region\"])]\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "038cd38e-3890-4f73-91a7-c30294b3bc5b",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Pattern Mining"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "2e6b0dc1-030c-4239-803f-52736a41bcb5",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "23b75579-95bb-4889-928f-9c3c1309a18a",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"apriori(data)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "61959c04-61bf-464a-89ca-72ec4782f927",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "744f8d1d-0874-4b92-921f-5a85ccf598ad",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "f99af4d2-20e9-4bff-802a-dbdb91f95a96",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "fa04bb73-ba5c-4164-a1af-f061d9627557",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "36ff5c84-93f3-4854-b2c4-e6082859c974",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.12.3"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|