3189 lines
142 KiB
Plaintext
3189 lines
142 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "5f7c9658-c285-4854-96c0-e899fc55421b",
|
||
"metadata": {},
|
||
"source": [
|
||
"# DM project: cheese"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "7f4f2b89-8257-468c-9f5e-a77e11b8b8ff",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"from mlxtend.preprocessing import TransactionEncoder\n",
|
||
"from mlxtend.frequent_patterns import apriori\n",
|
||
"from geopy.geocoders import Nominatim\n",
|
||
"import matplotlib.pyplot as plt\n",
|
||
"import time\n",
|
||
"import tqdm.notebook as tqdm\n",
|
||
"import random\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"id": "1a0afba8-692b-4377-a2ce-5114983e3bbb",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>cheese</th>\n",
|
||
" <th>url</th>\n",
|
||
" <th>milk</th>\n",
|
||
" <th>country</th>\n",
|
||
" <th>region</th>\n",
|
||
" <th>family</th>\n",
|
||
" <th>type</th>\n",
|
||
" <th>fat_content</th>\n",
|
||
" <th>calcium_content</th>\n",
|
||
" <th>texture</th>\n",
|
||
" <th>rind</th>\n",
|
||
" <th>color</th>\n",
|
||
" <th>flavor</th>\n",
|
||
" <th>aroma</th>\n",
|
||
" <th>vegetarian</th>\n",
|
||
" <th>vegan</th>\n",
|
||
" <th>synonyms</th>\n",
|
||
" <th>alt_spellings</th>\n",
|
||
" <th>producers</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>Aarewasser</td>\n",
|
||
" <td>https://www.cheese.com/aarewasser/</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>Switzerland</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-soft</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>buttery</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>yellow</td>\n",
|
||
" <td>sweet</td>\n",
|
||
" <td>buttery</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Jumi</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>Abbaye de Belloc</td>\n",
|
||
" <td>https://www.cheese.com/abbaye-de-belloc/</td>\n",
|
||
" <td>sheep</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>Pays Basque</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard, artisan</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>creamy, dense, firm</td>\n",
|
||
" <td>natural</td>\n",
|
||
" <td>yellow</td>\n",
|
||
" <td>burnt caramel</td>\n",
|
||
" <td>lanoline</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Abbaye Notre-Dame de Belloc</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>Abbaye de Belval</td>\n",
|
||
" <td>https://www.cheese.com/abbaye-de-belval/</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard</td>\n",
|
||
" <td>40-46%</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>elastic</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>ivory</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>aromatic</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>Abbaye de Citeaux</td>\n",
|
||
" <td>https://www.cheese.com/abbaye-de-citeaux/</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>Burgundy</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-soft, artisan, brined</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>creamy, dense, smooth</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>white</td>\n",
|
||
" <td>acidic, milky, smooth</td>\n",
|
||
" <td>barnyardy, earthy</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>Abbaye de Tamié</td>\n",
|
||
" <td>https://www.cheese.com/tamie/</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>Savoie</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>soft, artisan</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>creamy, open, smooth</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>white</td>\n",
|
||
" <td>fruity, nutty</td>\n",
|
||
" <td>perfumed, pungent</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Tamié, Trappiste de Tamie, Abbey of Tamie</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1182</th>\n",
|
||
" <td>Sveciaost</td>\n",
|
||
" <td>https://www.cheese.com/sveciaost/</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>Sweden</td>\n",
|
||
" <td>Low-laying regions</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard, brined</td>\n",
|
||
" <td>45%</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>creamy, supple</td>\n",
|
||
" <td>rindless</td>\n",
|
||
" <td>pale yellow</td>\n",
|
||
" <td>acidic</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1183</th>\n",
|
||
" <td>Swag</td>\n",
|
||
" <td>https://www.cheese.com/swag/</td>\n",
|
||
" <td>goat</td>\n",
|
||
" <td>Australia</td>\n",
|
||
" <td>South Australia</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fresh firm, artisan</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>creamy, crumbly</td>\n",
|
||
" <td>ash coated</td>\n",
|
||
" <td>white</td>\n",
|
||
" <td>acidic, creamy</td>\n",
|
||
" <td>fresh</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Woodside Cheese Wrights</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1184</th>\n",
|
||
" <td>Swaledale</td>\n",
|
||
" <td>https://www.cheese.com/swaledale/</td>\n",
|
||
" <td>sheep</td>\n",
|
||
" <td>England</td>\n",
|
||
" <td>Swaledale, North Yorkshire</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>hard</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi firm</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>yellow</td>\n",
|
||
" <td>smooth, sweet</td>\n",
|
||
" <td>floral</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Swaledale Sheep Cheese</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1185</th>\n",
|
||
" <td>Sweet Style Swiss</td>\n",
|
||
" <td>https://www.cheese.com/sweet-style-swiss/</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Switzerland</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard, artisan</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>firm, supple</td>\n",
|
||
" <td>waxed</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>nutty</td>\n",
|
||
" <td>nutty, sweet</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1186</th>\n",
|
||
" <td>Swiss cheese</td>\n",
|
||
" <td>https://www.cheese.com/swiss/</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>United States</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Swiss Cheese</td>\n",
|
||
" <td>hard, artisan, processed</td>\n",
|
||
" <td>7.8 g/100g</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>firm</td>\n",
|
||
" <td>rindless</td>\n",
|
||
" <td>pale yellow</td>\n",
|
||
" <td>nutty, sweet</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>American Swiss Cheese</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Various</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>1187 rows × 19 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" cheese url milk \\\n",
|
||
"0 Aarewasser https://www.cheese.com/aarewasser/ cow \n",
|
||
"1 Abbaye de Belloc https://www.cheese.com/abbaye-de-belloc/ sheep \n",
|
||
"2 Abbaye de Belval https://www.cheese.com/abbaye-de-belval/ cow \n",
|
||
"3 Abbaye de Citeaux https://www.cheese.com/abbaye-de-citeaux/ cow \n",
|
||
"4 Abbaye de Tamié https://www.cheese.com/tamie/ cow \n",
|
||
"... ... ... ... \n",
|
||
"1182 Sveciaost https://www.cheese.com/sveciaost/ cow \n",
|
||
"1183 Swag https://www.cheese.com/swag/ goat \n",
|
||
"1184 Swaledale https://www.cheese.com/swaledale/ sheep \n",
|
||
"1185 Sweet Style Swiss https://www.cheese.com/sweet-style-swiss/ NaN \n",
|
||
"1186 Swiss cheese https://www.cheese.com/swiss/ cow \n",
|
||
"\n",
|
||
" country region family \\\n",
|
||
"0 Switzerland NaN NaN \n",
|
||
"1 France Pays Basque NaN \n",
|
||
"2 France NaN NaN \n",
|
||
"3 France Burgundy NaN \n",
|
||
"4 France Savoie NaN \n",
|
||
"... ... ... ... \n",
|
||
"1182 Sweden Low-laying regions NaN \n",
|
||
"1183 Australia South Australia NaN \n",
|
||
"1184 England Swaledale, North Yorkshire NaN \n",
|
||
"1185 Switzerland NaN NaN \n",
|
||
"1186 United States NaN Swiss Cheese \n",
|
||
"\n",
|
||
" type fat_content calcium_content \\\n",
|
||
"0 semi-soft NaN NaN \n",
|
||
"1 semi-hard, artisan NaN NaN \n",
|
||
"2 semi-hard 40-46% NaN \n",
|
||
"3 semi-soft, artisan, brined NaN NaN \n",
|
||
"4 soft, artisan NaN NaN \n",
|
||
"... ... ... ... \n",
|
||
"1182 semi-hard, brined 45% NaN \n",
|
||
"1183 fresh firm, artisan NaN NaN \n",
|
||
"1184 hard NaN NaN \n",
|
||
"1185 semi-hard, artisan NaN NaN \n",
|
||
"1186 hard, artisan, processed 7.8 g/100g NaN \n",
|
||
"\n",
|
||
" texture rind color flavor \\\n",
|
||
"0 buttery washed yellow sweet \n",
|
||
"1 creamy, dense, firm natural yellow burnt caramel \n",
|
||
"2 elastic washed ivory NaN \n",
|
||
"3 creamy, dense, smooth washed white acidic, milky, smooth \n",
|
||
"4 creamy, open, smooth washed white fruity, nutty \n",
|
||
"... ... ... ... ... \n",
|
||
"1182 creamy, supple rindless pale yellow acidic \n",
|
||
"1183 creamy, crumbly ash coated white acidic, creamy \n",
|
||
"1184 semi firm NaN yellow smooth, sweet \n",
|
||
"1185 firm, supple waxed NaN nutty \n",
|
||
"1186 firm rindless pale yellow nutty, sweet \n",
|
||
"\n",
|
||
" aroma vegetarian vegan synonyms \\\n",
|
||
"0 buttery False False NaN \n",
|
||
"1 lanoline True False Abbaye Notre-Dame de Belloc \n",
|
||
"2 aromatic False False NaN \n",
|
||
"3 barnyardy, earthy False False NaN \n",
|
||
"4 perfumed, pungent False False NaN \n",
|
||
"... ... ... ... ... \n",
|
||
"1182 NaN False False NaN \n",
|
||
"1183 fresh True False NaN \n",
|
||
"1184 floral True False Swaledale Sheep Cheese \n",
|
||
"1185 nutty, sweet False False NaN \n",
|
||
"1186 NaN True False American Swiss Cheese \n",
|
||
"\n",
|
||
" alt_spellings producers \n",
|
||
"0 NaN Jumi \n",
|
||
"1 NaN NaN \n",
|
||
"2 NaN NaN \n",
|
||
"3 NaN NaN \n",
|
||
"4 Tamié, Trappiste de Tamie, Abbey of Tamie NaN \n",
|
||
"... ... ... \n",
|
||
"1182 NaN NaN \n",
|
||
"1183 NaN Woodside Cheese Wrights \n",
|
||
"1184 NaN NaN \n",
|
||
"1185 NaN NaN \n",
|
||
"1186 NaN Various \n",
|
||
"\n",
|
||
"[1187 rows x 19 columns]"
|
||
]
|
||
},
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data=pd.read_csv(\"cheeses.csv\")\n",
|
||
"data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "bf3b548c-5ac4-4126-9ae9-5578ad158015",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Cleaning and pre-processing"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"id": "2018aac2-6f3d-489a-b5d0-90b7c7793076",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"{'blue', 'yellow', 'white', 'golden yellow', 'pink and white', 'orange', 'pale yellow', nan, 'ivory', 'green', 'straw', 'red', 'brownish yellow', 'pale white', 'brown', 'golden orange', 'blue-grey', 'cream'}\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>cheese</th>\n",
|
||
" <th>url</th>\n",
|
||
" <th>milk</th>\n",
|
||
" <th>country</th>\n",
|
||
" <th>region</th>\n",
|
||
" <th>family</th>\n",
|
||
" <th>type</th>\n",
|
||
" <th>fat_content</th>\n",
|
||
" <th>calcium_content</th>\n",
|
||
" <th>texture</th>\n",
|
||
" <th>rind</th>\n",
|
||
" <th>color</th>\n",
|
||
" <th>flavor</th>\n",
|
||
" <th>aroma</th>\n",
|
||
" <th>vegetarian</th>\n",
|
||
" <th>vegan</th>\n",
|
||
" <th>synonyms</th>\n",
|
||
" <th>alt_spellings</th>\n",
|
||
" <th>producers</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>10</th>\n",
|
||
" <td>Acapella</td>\n",
|
||
" <td>https://www.cheese.com/acapella/</td>\n",
|
||
" <td>goat</td>\n",
|
||
" <td>United States</td>\n",
|
||
" <td>California</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>soft, soft-ripened</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>buttery</td>\n",
|
||
" <td>fresh, herbal</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>13</th>\n",
|
||
" <td>Acorn</td>\n",
|
||
" <td>https://www.cheese.com/acorn/</td>\n",
|
||
" <td>sheep</td>\n",
|
||
" <td>United Kingdom</td>\n",
|
||
" <td>Bethania</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>hard, artisan</td>\n",
|
||
" <td>52%</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>crumbly, firm</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>burnt caramel, citrusy, herbaceous</td>\n",
|
||
" <td>fruity</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>19</th>\n",
|
||
" <td>Afuega'l Pitu</td>\n",
|
||
" <td>https://www.cheese.com/afuegal-pitu/</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>Spain</td>\n",
|
||
" <td>Asturias</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>soft, artisan</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>smooth</td>\n",
|
||
" <td>cloth wrapped</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>spicy, strong</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>48</th>\n",
|
||
" <td>Alpe di Frabosa</td>\n",
|
||
" <td>https://www.cheese.com/alpe-di-frabosa/</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>Italy</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-soft</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>bitter</td>\n",
|
||
" <td>milky, mushroom</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50</th>\n",
|
||
" <td>Alpicrème</td>\n",
|
||
" <td>https://www.cheese.com/alpicreme/</td>\n",
|
||
" <td>goat</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>soft</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1172</th>\n",
|
||
" <td>Strathdon Blue</td>\n",
|
||
" <td>https://www.cheese.com/strathdon-blue/</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>Scotland</td>\n",
|
||
" <td>Tain</td>\n",
|
||
" <td>Blue</td>\n",
|
||
" <td>semi-soft</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>creamy</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>creamy, spicy</td>\n",
|
||
" <td>aromatic, rich</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Highland Fine Cheeses Limited</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1175</th>\n",
|
||
" <td>String Cheese</td>\n",
|
||
" <td>https://www.cheese.com/string/</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>chewy, firm, stringy</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1177</th>\n",
|
||
" <td>Sulguni</td>\n",
|
||
" <td>https://www.cheese.com/sulguni/</td>\n",
|
||
" <td>buffalo, cow</td>\n",
|
||
" <td>Georgia</td>\n",
|
||
" <td>Svaneti, Samegrelo</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-firm</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>dense, elastic</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>salty, smokey , sour</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Georgian Pickle Cheese</td>\n",
|
||
" <td>Megruli Sulguni, Shebolili Megruli Sulguni</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1181</th>\n",
|
||
" <td>Sussex Slipcote</td>\n",
|
||
" <td>https://www.cheese.com/sussex-slipcote/</td>\n",
|
||
" <td>sheep</td>\n",
|
||
" <td>England</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>soft</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>sharp</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>High Weald Dairy</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1185</th>\n",
|
||
" <td>Sweet Style Swiss</td>\n",
|
||
" <td>https://www.cheese.com/sweet-style-swiss/</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Switzerland</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard, artisan</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>firm, supple</td>\n",
|
||
" <td>waxed</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>nutty</td>\n",
|
||
" <td>nutty, sweet</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>142 rows × 19 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" cheese url \\\n",
|
||
"10 Acapella https://www.cheese.com/acapella/ \n",
|
||
"13 Acorn https://www.cheese.com/acorn/ \n",
|
||
"19 Afuega'l Pitu https://www.cheese.com/afuegal-pitu/ \n",
|
||
"48 Alpe di Frabosa https://www.cheese.com/alpe-di-frabosa/ \n",
|
||
"50 Alpicrème https://www.cheese.com/alpicreme/ \n",
|
||
"... ... ... \n",
|
||
"1172 Strathdon Blue https://www.cheese.com/strathdon-blue/ \n",
|
||
"1175 String Cheese https://www.cheese.com/string/ \n",
|
||
"1177 Sulguni https://www.cheese.com/sulguni/ \n",
|
||
"1181 Sussex Slipcote https://www.cheese.com/sussex-slipcote/ \n",
|
||
"1185 Sweet Style Swiss https://www.cheese.com/sweet-style-swiss/ \n",
|
||
"\n",
|
||
" milk country region family \\\n",
|
||
"10 goat United States California NaN \n",
|
||
"13 sheep United Kingdom Bethania NaN \n",
|
||
"19 cow Spain Asturias NaN \n",
|
||
"48 cow Italy NaN NaN \n",
|
||
"50 goat France NaN NaN \n",
|
||
"... ... ... ... ... \n",
|
||
"1172 cow Scotland Tain Blue \n",
|
||
"1175 NaN NaN NaN NaN \n",
|
||
"1177 buffalo, cow Georgia Svaneti, Samegrelo NaN \n",
|
||
"1181 sheep England NaN NaN \n",
|
||
"1185 NaN Switzerland NaN NaN \n",
|
||
"\n",
|
||
" type fat_content calcium_content texture \\\n",
|
||
"10 soft, soft-ripened NaN NaN NaN \n",
|
||
"13 hard, artisan 52% NaN crumbly, firm \n",
|
||
"19 soft, artisan NaN NaN smooth \n",
|
||
"48 semi-soft NaN NaN NaN \n",
|
||
"50 soft NaN NaN NaN \n",
|
||
"... ... ... ... ... \n",
|
||
"1172 semi-soft NaN NaN creamy \n",
|
||
"1175 semi-hard NaN NaN chewy, firm, stringy \n",
|
||
"1177 semi-firm NaN NaN dense, elastic \n",
|
||
"1181 soft NaN NaN NaN \n",
|
||
"1185 semi-hard, artisan NaN NaN firm, supple \n",
|
||
"\n",
|
||
" rind color flavor \\\n",
|
||
"10 NaN NaN buttery \n",
|
||
"13 NaN NaN burnt caramel, citrusy, herbaceous \n",
|
||
"19 cloth wrapped NaN spicy, strong \n",
|
||
"48 NaN NaN bitter \n",
|
||
"50 NaN NaN NaN \n",
|
||
"... ... ... ... \n",
|
||
"1172 NaN NaN creamy, spicy \n",
|
||
"1175 NaN NaN NaN \n",
|
||
"1177 NaN NaN salty, smokey , sour \n",
|
||
"1181 NaN NaN sharp \n",
|
||
"1185 waxed NaN nutty \n",
|
||
"\n",
|
||
" aroma vegetarian vegan synonyms \\\n",
|
||
"10 fresh, herbal False False NaN \n",
|
||
"13 fruity True False NaN \n",
|
||
"19 NaN False False NaN \n",
|
||
"48 milky, mushroom False False NaN \n",
|
||
"50 NaN False False NaN \n",
|
||
"... ... ... ... ... \n",
|
||
"1172 aromatic, rich True False NaN \n",
|
||
"1175 NaN NaN NaN NaN \n",
|
||
"1177 NaN NaN NaN Georgian Pickle Cheese \n",
|
||
"1181 NaN True False NaN \n",
|
||
"1185 nutty, sweet False False NaN \n",
|
||
"\n",
|
||
" alt_spellings \\\n",
|
||
"10 NaN \n",
|
||
"13 NaN \n",
|
||
"19 NaN \n",
|
||
"48 NaN \n",
|
||
"50 NaN \n",
|
||
"... ... \n",
|
||
"1172 NaN \n",
|
||
"1175 NaN \n",
|
||
"1177 Megruli Sulguni, Shebolili Megruli Sulguni \n",
|
||
"1181 NaN \n",
|
||
"1185 NaN \n",
|
||
"\n",
|
||
" producers \n",
|
||
"10 NaN \n",
|
||
"13 NaN \n",
|
||
"19 NaN \n",
|
||
"48 NaN \n",
|
||
"50 NaN \n",
|
||
"... ... \n",
|
||
"1172 Highland Fine Cheeses Limited \n",
|
||
"1175 NaN \n",
|
||
"1177 NaN \n",
|
||
"1181 High Weald Dairy \n",
|
||
"1185 NaN \n",
|
||
"\n",
|
||
"[142 rows x 19 columns]"
|
||
]
|
||
},
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"print(set(data[\"color\"]))\n",
|
||
"data[pd.isnull(data[\"color\"])]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"id": "a0a77563-518e-4808-b744-9fc0c76763fe",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"1162\n",
|
||
"939\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(len(data[pd.isnull(data[\"calcium_content\"])]))\n",
|
||
"print(len(data[pd.isnull(data[\"fat_content\"])]))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"id": "c8489ffa-1067-4eb7-b65a-2fa18fdb4b04",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"del data[\"alt_spellings\"]\n",
|
||
"del data[\"producers\"]\n",
|
||
"del data[\"calcium_content\"]\n",
|
||
"del data[\"url\"]\n",
|
||
"del data[\"fat_content\"]\n",
|
||
"del data[\"synonyms\"]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"id": "5379265a-cd49-41fa-845c-bfae33bb8f5a",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>cheese</th>\n",
|
||
" <th>milk</th>\n",
|
||
" <th>country</th>\n",
|
||
" <th>region</th>\n",
|
||
" <th>family</th>\n",
|
||
" <th>type</th>\n",
|
||
" <th>texture</th>\n",
|
||
" <th>rind</th>\n",
|
||
" <th>color</th>\n",
|
||
" <th>flavor</th>\n",
|
||
" <th>aroma</th>\n",
|
||
" <th>vegetarian</th>\n",
|
||
" <th>vegan</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>Aarewasser</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>Switzerland</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-soft</td>\n",
|
||
" <td>buttery</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>yellow</td>\n",
|
||
" <td>sweet</td>\n",
|
||
" <td>buttery</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>Abbaye de Belloc</td>\n",
|
||
" <td>sheep</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>Pays Basque</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard, artisan</td>\n",
|
||
" <td>creamy, dense, firm</td>\n",
|
||
" <td>natural</td>\n",
|
||
" <td>yellow</td>\n",
|
||
" <td>burnt caramel</td>\n",
|
||
" <td>lanoline</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>Abbaye de Belval</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard</td>\n",
|
||
" <td>elastic</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>ivory</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>aromatic</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>Abbaye de Citeaux</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>Burgundy</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-soft, artisan, brined</td>\n",
|
||
" <td>creamy, dense, smooth</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>white</td>\n",
|
||
" <td>acidic, milky, smooth</td>\n",
|
||
" <td>barnyardy, earthy</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>Abbaye de Tamié</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>Savoie</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>soft, artisan</td>\n",
|
||
" <td>creamy, open, smooth</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>white</td>\n",
|
||
" <td>fruity, nutty</td>\n",
|
||
" <td>perfumed, pungent</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1182</th>\n",
|
||
" <td>Sveciaost</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>Sweden</td>\n",
|
||
" <td>Low-laying regions</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard, brined</td>\n",
|
||
" <td>creamy, supple</td>\n",
|
||
" <td>rindless</td>\n",
|
||
" <td>pale yellow</td>\n",
|
||
" <td>acidic</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1183</th>\n",
|
||
" <td>Swag</td>\n",
|
||
" <td>goat</td>\n",
|
||
" <td>Australia</td>\n",
|
||
" <td>South Australia</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fresh firm, artisan</td>\n",
|
||
" <td>creamy, crumbly</td>\n",
|
||
" <td>ash coated</td>\n",
|
||
" <td>white</td>\n",
|
||
" <td>acidic, creamy</td>\n",
|
||
" <td>fresh</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1184</th>\n",
|
||
" <td>Swaledale</td>\n",
|
||
" <td>sheep</td>\n",
|
||
" <td>England</td>\n",
|
||
" <td>Swaledale, North Yorkshire</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>hard</td>\n",
|
||
" <td>semi firm</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>yellow</td>\n",
|
||
" <td>smooth, sweet</td>\n",
|
||
" <td>floral</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1185</th>\n",
|
||
" <td>Sweet Style Swiss</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Switzerland</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard, artisan</td>\n",
|
||
" <td>firm, supple</td>\n",
|
||
" <td>waxed</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>nutty</td>\n",
|
||
" <td>nutty, sweet</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1186</th>\n",
|
||
" <td>Swiss cheese</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>United States</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Swiss Cheese</td>\n",
|
||
" <td>hard, artisan, processed</td>\n",
|
||
" <td>firm</td>\n",
|
||
" <td>rindless</td>\n",
|
||
" <td>pale yellow</td>\n",
|
||
" <td>nutty, sweet</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>1187 rows × 13 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" cheese milk country region \\\n",
|
||
"0 Aarewasser cow Switzerland NaN \n",
|
||
"1 Abbaye de Belloc sheep France Pays Basque \n",
|
||
"2 Abbaye de Belval cow France NaN \n",
|
||
"3 Abbaye de Citeaux cow France Burgundy \n",
|
||
"4 Abbaye de Tamié cow France Savoie \n",
|
||
"... ... ... ... ... \n",
|
||
"1182 Sveciaost cow Sweden Low-laying regions \n",
|
||
"1183 Swag goat Australia South Australia \n",
|
||
"1184 Swaledale sheep England Swaledale, North Yorkshire \n",
|
||
"1185 Sweet Style Swiss NaN Switzerland NaN \n",
|
||
"1186 Swiss cheese cow United States NaN \n",
|
||
"\n",
|
||
" family type texture \\\n",
|
||
"0 NaN semi-soft buttery \n",
|
||
"1 NaN semi-hard, artisan creamy, dense, firm \n",
|
||
"2 NaN semi-hard elastic \n",
|
||
"3 NaN semi-soft, artisan, brined creamy, dense, smooth \n",
|
||
"4 NaN soft, artisan creamy, open, smooth \n",
|
||
"... ... ... ... \n",
|
||
"1182 NaN semi-hard, brined creamy, supple \n",
|
||
"1183 NaN fresh firm, artisan creamy, crumbly \n",
|
||
"1184 NaN hard semi firm \n",
|
||
"1185 NaN semi-hard, artisan firm, supple \n",
|
||
"1186 Swiss Cheese hard, artisan, processed firm \n",
|
||
"\n",
|
||
" rind color flavor aroma \\\n",
|
||
"0 washed yellow sweet buttery \n",
|
||
"1 natural yellow burnt caramel lanoline \n",
|
||
"2 washed ivory NaN aromatic \n",
|
||
"3 washed white acidic, milky, smooth barnyardy, earthy \n",
|
||
"4 washed white fruity, nutty perfumed, pungent \n",
|
||
"... ... ... ... ... \n",
|
||
"1182 rindless pale yellow acidic NaN \n",
|
||
"1183 ash coated white acidic, creamy fresh \n",
|
||
"1184 NaN yellow smooth, sweet floral \n",
|
||
"1185 waxed NaN nutty nutty, sweet \n",
|
||
"1186 rindless pale yellow nutty, sweet NaN \n",
|
||
"\n",
|
||
" vegetarian vegan \n",
|
||
"0 False False \n",
|
||
"1 True False \n",
|
||
"2 False False \n",
|
||
"3 False False \n",
|
||
"4 False False \n",
|
||
"... ... ... \n",
|
||
"1182 False False \n",
|
||
"1183 True False \n",
|
||
"1184 True False \n",
|
||
"1185 False False \n",
|
||
"1186 True False \n",
|
||
"\n",
|
||
"[1187 rows x 13 columns]"
|
||
]
|
||
},
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"id": "633ed80e-e416-41f6-ae58-b86ce4c132af",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"1181 rows remaining\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"data=data.dropna(subset=[\"country\",\"region\"], how=\"all\")\n",
|
||
"data=data.fillna(value={\"country\":\"\"})\n",
|
||
"data=data.fillna(value={\"region\":\"\"})\n",
|
||
"print(f\"{len(data)} rows remaining\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "fd66568f-78d4-4e1a-a91c-8ec483b4b03c",
|
||
"metadata": {},
|
||
"source": [
|
||
"We removed 6 rows for which we could not find a suitable location. "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"id": "5a4c0e30-8535-498b-9a9e-0d7d232d4eb7",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>cheese</th>\n",
|
||
" <th>milk</th>\n",
|
||
" <th>country</th>\n",
|
||
" <th>region</th>\n",
|
||
" <th>family</th>\n",
|
||
" <th>type</th>\n",
|
||
" <th>texture</th>\n",
|
||
" <th>rind</th>\n",
|
||
" <th>color</th>\n",
|
||
" <th>flavor</th>\n",
|
||
" <th>aroma</th>\n",
|
||
" <th>vegetarian</th>\n",
|
||
" <th>vegan</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>Aarewasser</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>Switzerland</td>\n",
|
||
" <td></td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-soft</td>\n",
|
||
" <td>buttery</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>yellow</td>\n",
|
||
" <td>sweet</td>\n",
|
||
" <td>buttery</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>Abbaye de Belloc</td>\n",
|
||
" <td>sheep</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>Pays Basque</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard, artisan</td>\n",
|
||
" <td>creamy, dense, firm</td>\n",
|
||
" <td>natural</td>\n",
|
||
" <td>yellow</td>\n",
|
||
" <td>burnt caramel</td>\n",
|
||
" <td>lanoline</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>Abbaye de Belval</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td></td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard</td>\n",
|
||
" <td>elastic</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>ivory</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>aromatic</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>Abbaye de Citeaux</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>Burgundy</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-soft, artisan, brined</td>\n",
|
||
" <td>creamy, dense, smooth</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>white</td>\n",
|
||
" <td>acidic, milky, smooth</td>\n",
|
||
" <td>barnyardy, earthy</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>Abbaye de Tamié</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>Savoie</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>soft, artisan</td>\n",
|
||
" <td>creamy, open, smooth</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>white</td>\n",
|
||
" <td>fruity, nutty</td>\n",
|
||
" <td>perfumed, pungent</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1182</th>\n",
|
||
" <td>Sveciaost</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>Sweden</td>\n",
|
||
" <td>Low-laying regions</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard, brined</td>\n",
|
||
" <td>creamy, supple</td>\n",
|
||
" <td>rindless</td>\n",
|
||
" <td>pale yellow</td>\n",
|
||
" <td>acidic</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1183</th>\n",
|
||
" <td>Swag</td>\n",
|
||
" <td>goat</td>\n",
|
||
" <td>Australia</td>\n",
|
||
" <td>South Australia</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fresh firm, artisan</td>\n",
|
||
" <td>creamy, crumbly</td>\n",
|
||
" <td>ash coated</td>\n",
|
||
" <td>white</td>\n",
|
||
" <td>acidic, creamy</td>\n",
|
||
" <td>fresh</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1184</th>\n",
|
||
" <td>Swaledale</td>\n",
|
||
" <td>sheep</td>\n",
|
||
" <td>England</td>\n",
|
||
" <td>Swaledale, North Yorkshire</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>hard</td>\n",
|
||
" <td>semi firm</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>yellow</td>\n",
|
||
" <td>smooth, sweet</td>\n",
|
||
" <td>floral</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1185</th>\n",
|
||
" <td>Sweet Style Swiss</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Switzerland</td>\n",
|
||
" <td></td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard, artisan</td>\n",
|
||
" <td>firm, supple</td>\n",
|
||
" <td>waxed</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>nutty</td>\n",
|
||
" <td>nutty, sweet</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1186</th>\n",
|
||
" <td>Swiss cheese</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>United States</td>\n",
|
||
" <td></td>\n",
|
||
" <td>Swiss Cheese</td>\n",
|
||
" <td>hard, artisan, processed</td>\n",
|
||
" <td>firm</td>\n",
|
||
" <td>rindless</td>\n",
|
||
" <td>pale yellow</td>\n",
|
||
" <td>nutty, sweet</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>1181 rows × 13 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" cheese milk country region \\\n",
|
||
"0 Aarewasser cow Switzerland \n",
|
||
"1 Abbaye de Belloc sheep France Pays Basque \n",
|
||
"2 Abbaye de Belval cow France \n",
|
||
"3 Abbaye de Citeaux cow France Burgundy \n",
|
||
"4 Abbaye de Tamié cow France Savoie \n",
|
||
"... ... ... ... ... \n",
|
||
"1182 Sveciaost cow Sweden Low-laying regions \n",
|
||
"1183 Swag goat Australia South Australia \n",
|
||
"1184 Swaledale sheep England Swaledale, North Yorkshire \n",
|
||
"1185 Sweet Style Swiss NaN Switzerland \n",
|
||
"1186 Swiss cheese cow United States \n",
|
||
"\n",
|
||
" family type texture \\\n",
|
||
"0 NaN semi-soft buttery \n",
|
||
"1 NaN semi-hard, artisan creamy, dense, firm \n",
|
||
"2 NaN semi-hard elastic \n",
|
||
"3 NaN semi-soft, artisan, brined creamy, dense, smooth \n",
|
||
"4 NaN soft, artisan creamy, open, smooth \n",
|
||
"... ... ... ... \n",
|
||
"1182 NaN semi-hard, brined creamy, supple \n",
|
||
"1183 NaN fresh firm, artisan creamy, crumbly \n",
|
||
"1184 NaN hard semi firm \n",
|
||
"1185 NaN semi-hard, artisan firm, supple \n",
|
||
"1186 Swiss Cheese hard, artisan, processed firm \n",
|
||
"\n",
|
||
" rind color flavor aroma \\\n",
|
||
"0 washed yellow sweet buttery \n",
|
||
"1 natural yellow burnt caramel lanoline \n",
|
||
"2 washed ivory NaN aromatic \n",
|
||
"3 washed white acidic, milky, smooth barnyardy, earthy \n",
|
||
"4 washed white fruity, nutty perfumed, pungent \n",
|
||
"... ... ... ... ... \n",
|
||
"1182 rindless pale yellow acidic NaN \n",
|
||
"1183 ash coated white acidic, creamy fresh \n",
|
||
"1184 NaN yellow smooth, sweet floral \n",
|
||
"1185 waxed NaN nutty nutty, sweet \n",
|
||
"1186 rindless pale yellow nutty, sweet NaN \n",
|
||
"\n",
|
||
" vegetarian vegan \n",
|
||
"0 False False \n",
|
||
"1 True False \n",
|
||
"2 False False \n",
|
||
"3 False False \n",
|
||
"4 False False \n",
|
||
"... ... ... \n",
|
||
"1182 False False \n",
|
||
"1183 True False \n",
|
||
"1184 True False \n",
|
||
"1185 False False \n",
|
||
"1186 True False \n",
|
||
"\n",
|
||
"[1181 rows x 13 columns]"
|
||
]
|
||
},
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"id": "7ef7494b-ff08-40a5-890f-e0f718cf2842",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"data.loc[data.country.str.contains(\"England, Great Britain, United Kingdom\")|data.country.str.contains(\"England, United Kingdom\"),\"country\"]=\"England\"\n",
|
||
"data.loc[data.country.str.contains(\"Scotland\"),\"country\"]=\"Scotland\"\n",
|
||
"data.loc[data.country.str.contains(\"Great Britain, United Kingdom, Wales\")|data.country.str.contains(\"United Kingdom, Wales\"),\"country\"]=\"Wales\"\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"id": "fb044984-c33c-492c-91a2-4e9fff29ceb3",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"data=data.drop(index=data[data[\"country\"].str.contains(\",\")].index)\n",
|
||
"data=data.drop(index=data[data[\"country\"].str.contains(\" and \")].index)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "2f42c973-247a-4f51-947e-fbd76f8f12fc",
|
||
"metadata": {},
|
||
"source": [
|
||
"We removed 41 cheeses because they can come froms several countries. "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"id": "59c4e6e7-d624-45a5-a9ea-eb375102b771",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"data[\"location\"]=data[\"region\"]+\", \"+data[\"country\"]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"id": "0dee0f25-4699-4e46-97d0-21bb36d9c603",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>cheese</th>\n",
|
||
" <th>milk</th>\n",
|
||
" <th>country</th>\n",
|
||
" <th>region</th>\n",
|
||
" <th>family</th>\n",
|
||
" <th>type</th>\n",
|
||
" <th>texture</th>\n",
|
||
" <th>rind</th>\n",
|
||
" <th>color</th>\n",
|
||
" <th>flavor</th>\n",
|
||
" <th>aroma</th>\n",
|
||
" <th>vegetarian</th>\n",
|
||
" <th>vegan</th>\n",
|
||
" <th>location</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>Aarewasser</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>Switzerland</td>\n",
|
||
" <td></td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-soft</td>\n",
|
||
" <td>buttery</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>yellow</td>\n",
|
||
" <td>sweet</td>\n",
|
||
" <td>buttery</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>, Switzerland</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>Abbaye de Belloc</td>\n",
|
||
" <td>sheep</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>Pays Basque</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard, artisan</td>\n",
|
||
" <td>creamy, dense, firm</td>\n",
|
||
" <td>natural</td>\n",
|
||
" <td>yellow</td>\n",
|
||
" <td>burnt caramel</td>\n",
|
||
" <td>lanoline</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Pays Basque, France</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>Abbaye de Belval</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td></td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard</td>\n",
|
||
" <td>elastic</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>ivory</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>aromatic</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>, France</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>Abbaye de Citeaux</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>Burgundy</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-soft, artisan, brined</td>\n",
|
||
" <td>creamy, dense, smooth</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>white</td>\n",
|
||
" <td>acidic, milky, smooth</td>\n",
|
||
" <td>barnyardy, earthy</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Burgundy, France</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>Abbaye de Tamié</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>France</td>\n",
|
||
" <td>Savoie</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>soft, artisan</td>\n",
|
||
" <td>creamy, open, smooth</td>\n",
|
||
" <td>washed</td>\n",
|
||
" <td>white</td>\n",
|
||
" <td>fruity, nutty</td>\n",
|
||
" <td>perfumed, pungent</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Savoie, France</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1182</th>\n",
|
||
" <td>Sveciaost</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>Sweden</td>\n",
|
||
" <td>Low-laying regions</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard, brined</td>\n",
|
||
" <td>creamy, supple</td>\n",
|
||
" <td>rindless</td>\n",
|
||
" <td>pale yellow</td>\n",
|
||
" <td>acidic</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Low-laying regions, Sweden</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1183</th>\n",
|
||
" <td>Swag</td>\n",
|
||
" <td>goat</td>\n",
|
||
" <td>Australia</td>\n",
|
||
" <td>South Australia</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>fresh firm, artisan</td>\n",
|
||
" <td>creamy, crumbly</td>\n",
|
||
" <td>ash coated</td>\n",
|
||
" <td>white</td>\n",
|
||
" <td>acidic, creamy</td>\n",
|
||
" <td>fresh</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>South Australia, Australia</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1184</th>\n",
|
||
" <td>Swaledale</td>\n",
|
||
" <td>sheep</td>\n",
|
||
" <td>England</td>\n",
|
||
" <td>Swaledale, North Yorkshire</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>hard</td>\n",
|
||
" <td>semi firm</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>yellow</td>\n",
|
||
" <td>smooth, sweet</td>\n",
|
||
" <td>floral</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Swaledale, North Yorkshire, England</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1185</th>\n",
|
||
" <td>Sweet Style Swiss</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Switzerland</td>\n",
|
||
" <td></td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>semi-hard, artisan</td>\n",
|
||
" <td>firm, supple</td>\n",
|
||
" <td>waxed</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>nutty</td>\n",
|
||
" <td>nutty, sweet</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>, Switzerland</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1186</th>\n",
|
||
" <td>Swiss cheese</td>\n",
|
||
" <td>cow</td>\n",
|
||
" <td>United States</td>\n",
|
||
" <td></td>\n",
|
||
" <td>Swiss Cheese</td>\n",
|
||
" <td>hard, artisan, processed</td>\n",
|
||
" <td>firm</td>\n",
|
||
" <td>rindless</td>\n",
|
||
" <td>pale yellow</td>\n",
|
||
" <td>nutty, sweet</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>, United States</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>1140 rows × 14 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" cheese milk country region \\\n",
|
||
"0 Aarewasser cow Switzerland \n",
|
||
"1 Abbaye de Belloc sheep France Pays Basque \n",
|
||
"2 Abbaye de Belval cow France \n",
|
||
"3 Abbaye de Citeaux cow France Burgundy \n",
|
||
"4 Abbaye de Tamié cow France Savoie \n",
|
||
"... ... ... ... ... \n",
|
||
"1182 Sveciaost cow Sweden Low-laying regions \n",
|
||
"1183 Swag goat Australia South Australia \n",
|
||
"1184 Swaledale sheep England Swaledale, North Yorkshire \n",
|
||
"1185 Sweet Style Swiss NaN Switzerland \n",
|
||
"1186 Swiss cheese cow United States \n",
|
||
"\n",
|
||
" family type texture \\\n",
|
||
"0 NaN semi-soft buttery \n",
|
||
"1 NaN semi-hard, artisan creamy, dense, firm \n",
|
||
"2 NaN semi-hard elastic \n",
|
||
"3 NaN semi-soft, artisan, brined creamy, dense, smooth \n",
|
||
"4 NaN soft, artisan creamy, open, smooth \n",
|
||
"... ... ... ... \n",
|
||
"1182 NaN semi-hard, brined creamy, supple \n",
|
||
"1183 NaN fresh firm, artisan creamy, crumbly \n",
|
||
"1184 NaN hard semi firm \n",
|
||
"1185 NaN semi-hard, artisan firm, supple \n",
|
||
"1186 Swiss Cheese hard, artisan, processed firm \n",
|
||
"\n",
|
||
" rind color flavor aroma \\\n",
|
||
"0 washed yellow sweet buttery \n",
|
||
"1 natural yellow burnt caramel lanoline \n",
|
||
"2 washed ivory NaN aromatic \n",
|
||
"3 washed white acidic, milky, smooth barnyardy, earthy \n",
|
||
"4 washed white fruity, nutty perfumed, pungent \n",
|
||
"... ... ... ... ... \n",
|
||
"1182 rindless pale yellow acidic NaN \n",
|
||
"1183 ash coated white acidic, creamy fresh \n",
|
||
"1184 NaN yellow smooth, sweet floral \n",
|
||
"1185 waxed NaN nutty nutty, sweet \n",
|
||
"1186 rindless pale yellow nutty, sweet NaN \n",
|
||
"\n",
|
||
" vegetarian vegan location \n",
|
||
"0 False False , Switzerland \n",
|
||
"1 True False Pays Basque, France \n",
|
||
"2 False False , France \n",
|
||
"3 False False Burgundy, France \n",
|
||
"4 False False Savoie, France \n",
|
||
"... ... ... ... \n",
|
||
"1182 False False Low-laying regions, Sweden \n",
|
||
"1183 True False South Australia, Australia \n",
|
||
"1184 True False Swaledale, North Yorkshire, England \n",
|
||
"1185 False False , Switzerland \n",
|
||
"1186 True False , United States \n",
|
||
"\n",
|
||
"[1140 rows x 14 columns]"
|
||
]
|
||
},
|
||
"execution_count": 14,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "2ef7351c-f117-403b-bc6e-f9f30a98c9d2",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "353724eb-8d64-4b64-84c6-f06be36acd8b",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "54d56bd4-c83a-4e8c-8751-b4b2f7830a9e",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "bd68f1bb-c9f6-4c57-951b-8ac1f3192f09",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "eeff487e-9b66-4c4b-b4f6-dc5352fb2144",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "cdb0d04c-e0f2-4553-8906-e9282f4942d2",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "a551e0f4-3f99-4dae-9b31-6205b772ebf5",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "7c02cc29-fe07-4ff9-8c6b-8638d37830cd",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "de579631-a29c-4620-9bbf-7085b83d16b7",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 15,
|
||
"id": "e2e868d4-33a1-4602-af97-afb1d29e612f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"{', Afghanistan',\n",
|
||
" ', Argentina',\n",
|
||
" ', Armenia',\n",
|
||
" ', Australia',\n",
|
||
" ', Austria',\n",
|
||
" ', Belgium',\n",
|
||
" ', Brazil',\n",
|
||
" ', Canada',\n",
|
||
" ', Cyprus',\n",
|
||
" ', Denmark',\n",
|
||
" ', England',\n",
|
||
" ', France',\n",
|
||
" ', Germany',\n",
|
||
" ', Greece',\n",
|
||
" ', Holland',\n",
|
||
" ', Hungary',\n",
|
||
" ', Iceland',\n",
|
||
" ', Iraq',\n",
|
||
" ', Ireland',\n",
|
||
" ', Israel',\n",
|
||
" ', Italy',\n",
|
||
" ', Mauritania',\n",
|
||
" ', Mexico',\n",
|
||
" ', Middle East',\n",
|
||
" ', Mongolia',\n",
|
||
" ', Netherlands',\n",
|
||
" ', New Zealand',\n",
|
||
" ', Poland',\n",
|
||
" ', Portugal',\n",
|
||
" ', Scotland',\n",
|
||
" ', Serbia',\n",
|
||
" ', Spain',\n",
|
||
" ', Sweden',\n",
|
||
" ', Switzerland',\n",
|
||
" ', Turkey',\n",
|
||
" ', United Kingdom',\n",
|
||
" ', United States',\n",
|
||
" ', Wales',\n",
|
||
" 'Aberdeenshire, Scotland',\n",
|
||
" 'Aconcagua, Chile',\n",
|
||
" 'Adamstown, Co Wexford, Ireland',\n",
|
||
" 'Airedale farming district, New Zealand',\n",
|
||
" 'Alba, Italy',\n",
|
||
" 'All Holland, Netherlands',\n",
|
||
" 'Allagau, Bavarian Alps, Germany',\n",
|
||
" 'Allgaeu Alps, Germany',\n",
|
||
" 'Allgau, Germany',\n",
|
||
" 'Allgäu, Germany',\n",
|
||
" 'Amou, Gascony, France',\n",
|
||
" 'Anjou, France',\n",
|
||
" 'Ann Arbor, MI, United States',\n",
|
||
" 'Ann Arbor, Michigan, United States',\n",
|
||
" 'Aquitaine, France',\n",
|
||
" 'Asiago, Italy',\n",
|
||
" 'Asturias, Spain',\n",
|
||
" 'Auvergne, France',\n",
|
||
" 'Auvergne, Salers, France',\n",
|
||
" 'Averyon, France',\n",
|
||
" 'Avesnes, France',\n",
|
||
" 'Aveyron, France',\n",
|
||
" 'Aveyron, Laguiole, France',\n",
|
||
" 'Avila, Spain',\n",
|
||
" 'Azores, Portugal',\n",
|
||
" 'Ballarat, Victoria, Australia',\n",
|
||
" 'Banks Peninsular in Canterbury, New Zealand',\n",
|
||
" 'Banon, France',\n",
|
||
" 'Barcelona, Spain',\n",
|
||
" 'Bas-Languedoc, Comtat Venaissin, France',\n",
|
||
" 'Basilicata, Italy',\n",
|
||
" 'Basque, Pyrenees Mountains, France',\n",
|
||
" 'Bavaria, Germany',\n",
|
||
" 'Beara Peninsula, Co. Cork, Ireland',\n",
|
||
" 'Beira Baixa Province, Portugal',\n",
|
||
" 'Belvederis, Lithuania',\n",
|
||
" 'Bergues, France',\n",
|
||
" 'Bermondsey, London, England',\n",
|
||
" 'Berry, France',\n",
|
||
" 'Bethania, United Kingdom',\n",
|
||
" 'Bjurholm, Sweden',\n",
|
||
" 'Blarney, Ireland',\n",
|
||
" 'Bloomdale, United States',\n",
|
||
" 'Bornholm, Denmark',\n",
|
||
" 'Bourgogne, France',\n",
|
||
" 'Bregenzerwald, Kleinwalsertal, Großwalsertal, Laiblachtal (Pfänderstock) and Rheintal, Austria',\n",
|
||
" 'Brickhill, Co. Clare, Ireland',\n",
|
||
" 'Brisbane, Australia',\n",
|
||
" 'British Columbia, Canada',\n",
|
||
" 'Brittany, France',\n",
|
||
" 'Brooklyn NY, United States',\n",
|
||
" 'Burgund, France',\n",
|
||
" 'Burgundy, France',\n",
|
||
" 'Bursa, Turkey',\n",
|
||
" 'Buxton, Derbyshire, England',\n",
|
||
" 'Béarnaise in Pyrénées-Atlantique, France',\n",
|
||
" 'Calabria, Italy',\n",
|
||
" 'California, United States',\n",
|
||
" 'Campania, Italy',\n",
|
||
" 'Campania, Paestum, Foggia, Italy',\n",
|
||
" 'Canary Islands, Spain',\n",
|
||
" 'Canton of Glarus, Switzerland',\n",
|
||
" 'Carmarthenshire, Wales',\n",
|
||
" 'Carneros, Sonoma, California, United States',\n",
|
||
" 'Carnia, Italy',\n",
|
||
" 'Carrigtwohill, ',\n",
|
||
" 'Carrigtwohill, Ireland',\n",
|
||
" 'Castelo Branco, Fundão and Idanha-a-Nova, Portugal',\n",
|
||
" 'Castile-Leon, Spain',\n",
|
||
" 'Castilla Leon, Spain',\n",
|
||
" 'Castille-Leon, Spain',\n",
|
||
" 'Central Balkan Mountains, Bulgaria',\n",
|
||
" 'Central and Western Macedonia, Thessalia, Greece',\n",
|
||
" 'Central and Western Macedonia, Thessaly, Greece',\n",
|
||
" 'Centre , the department of Loiret, France',\n",
|
||
" 'Centre-Val de Loire, France',\n",
|
||
" 'Ceredigion, United Kingdom',\n",
|
||
" 'Cevenes, France',\n",
|
||
" 'Charentes, France',\n",
|
||
" 'Charentes-Poitou, France',\n",
|
||
" 'Charm, Ohio, United States',\n",
|
||
" 'Chelmarsh, Bridgnorth, Shropshire, England',\n",
|
||
" 'Cheshire, England',\n",
|
||
" 'Chirac, France',\n",
|
||
" 'Co Clare, Ireland',\n",
|
||
" 'Co Limerick, Ireland',\n",
|
||
" 'Co. Carlow, Ireland',\n",
|
||
" 'Co. Cork, Ireland',\n",
|
||
" 'Co. Mayo, Ireland',\n",
|
||
" 'Co. Offaly, Ireland',\n",
|
||
" 'Coast of Oregon, United States',\n",
|
||
" 'Colby, Wisconsin, United States',\n",
|
||
" 'Colorado, United States',\n",
|
||
" 'Comox Valley, Vancouver Island, Canada',\n",
|
||
" 'Coquet, England',\n",
|
||
" 'Cornwall, ',\n",
|
||
" 'Cornwall, England',\n",
|
||
" 'Corsica, France',\n",
|
||
" 'Cotherstone, England',\n",
|
||
" 'Cotswolds, England',\n",
|
||
" 'County Antrim, Ireland',\n",
|
||
" 'County Carlow, Ireland',\n",
|
||
" 'County Cavan, Ireland',\n",
|
||
" 'County Tipperary, Clogheen, Ireland',\n",
|
||
" 'County Wexford, Ireland',\n",
|
||
" 'Croisy-sur-Eure, France',\n",
|
||
" 'Crotone, Italy',\n",
|
||
" 'Cumbrian, United Kingdom',\n",
|
||
" 'Dalmatia, Croatia',\n",
|
||
" 'Derbyshire, Leicestershire, Nottinghamshire, England',\n",
|
||
" 'Devon, England',\n",
|
||
" 'Dorset, England',\n",
|
||
" 'Duhallow, Ireland',\n",
|
||
" 'Dumfries, Scotland',\n",
|
||
" 'Dumfriesshire, Scotland',\n",
|
||
" 'East Midlands, England',\n",
|
||
" 'East Sussex, United Kingdom',\n",
|
||
" 'Emilia Romagna, Italy',\n",
|
||
" 'Emilia-Romagna, Italy',\n",
|
||
" 'Extremadura, Spain',\n",
|
||
" 'Fairview, United States',\n",
|
||
" 'Fethard, Co Tipperary, Ireland',\n",
|
||
" 'Fife, Scotland',\n",
|
||
" 'Flanders, Belgium',\n",
|
||
" 'Fornells de la Selva, Gironès, Spain',\n",
|
||
" 'Franche Comté, France',\n",
|
||
" 'French Basque Country, Midi-Pyrénées, France',\n",
|
||
" 'Friuli Venezia Giulia and Veneto, Italy',\n",
|
||
" 'Friuli-Venezia Giulia and the Veneto, Italy',\n",
|
||
" 'Friuli-Venezia Giulia, Italy',\n",
|
||
" 'Galax, Virginia, United States',\n",
|
||
" 'Galicia, Spain',\n",
|
||
" 'Georgia, United States',\n",
|
||
" 'Gevrey-Chambertin, Burgundy, France',\n",
|
||
" 'Gippsland, Victoria, Australia',\n",
|
||
" 'Gloucestershire County, England',\n",
|
||
" 'Gloucestershire, England',\n",
|
||
" 'Gravina in Puglia, Murgia, Italy',\n",
|
||
" 'Greensboro, VT, United States',\n",
|
||
" 'Greenville, Indiana, United States',\n",
|
||
" 'Gujarat, India',\n",
|
||
" 'Gâtinais, France',\n",
|
||
" 'Hamilton, New Zealand',\n",
|
||
" 'Haute Vienne, France',\n",
|
||
" 'Haute-Savoie / Upper Savoy, France',\n",
|
||
" 'Herault, France',\n",
|
||
" 'Herefordshire, West Midlands, United Kingdom',\n",
|
||
" 'Het Groene Hart, Netherlands',\n",
|
||
" 'Huizen, Netherlands',\n",
|
||
" 'Hunter Valley, Australia',\n",
|
||
" 'Ile de France, France',\n",
|
||
" 'Ile-de-France/Champagne, France',\n",
|
||
" 'Illinois, United States',\n",
|
||
" 'Illoud (Haute-Marne), France',\n",
|
||
" 'Inagh, Co Clare, ',\n",
|
||
" 'Inagh, Co Clare, Ireland',\n",
|
||
" 'Indiana, United States',\n",
|
||
" 'Iowa, United States',\n",
|
||
" 'Isere, France',\n",
|
||
" 'Island of Pag, Croatia',\n",
|
||
" 'Jura, Switzerland',\n",
|
||
" 'Karlovy Vary, Czech Republic',\n",
|
||
" 'Kent, United Kingdom',\n",
|
||
" 'Kilmallock County Limerick, Ireland',\n",
|
||
" 'Kimball, United States',\n",
|
||
" 'Kinfauns, Perthshire, Scotland',\n",
|
||
" 'La Velle, Wisconsin, United States',\n",
|
||
" 'Lanarkshire, Scotland',\n",
|
||
" 'Landford, England',\n",
|
||
" 'Landshut, Germany',\n",
|
||
" 'Languedoc, France',\n",
|
||
" 'Languedoc-Roussillon, France',\n",
|
||
" 'Lapland, Finland',\n",
|
||
" 'Laqueuille, France',\n",
|
||
" 'Laruns, France',\n",
|
||
" 'Larzac, France',\n",
|
||
" 'Lazio, Sardinia, Italy',\n",
|
||
" 'Lebanon, CT, United States',\n",
|
||
" 'Leiden, Netherlands',\n",
|
||
" 'Lincolnshire, England',\n",
|
||
" 'Lodi, Italy',\n",
|
||
" 'Loire Valley, France',\n",
|
||
" 'Loire, France',\n",
|
||
" 'Lombardy, Italy',\n",
|
||
" 'Low-laying regions, Sweden',\n",
|
||
" 'Lower Normandy, France',\n",
|
||
" \"Lucerne, Schwyz, Unterwald, and Zoug, and the following additional places: Muri district in d'Argovi, Switzerland\",\n",
|
||
" 'Macedonia, Thrace, Thessalia, Peloponissos, Ionian Islands, Aegean islands, Crete Island and Epirus, Greece',\n",
|
||
" 'Maine, United States',\n",
|
||
" 'Manitoba, Canada',\n",
|
||
" 'Mankato, MN, United States',\n",
|
||
" 'Marathon, NY, United States',\n",
|
||
" 'Maribo, Denmark',\n",
|
||
" 'Massachusetts, United States',\n",
|
||
" 'Menorca, Balearic Islands, Spain',\n",
|
||
" 'Midi-Pyrenees, France',\n",
|
||
" 'Midi-Pyrénées, France',\n",
|
||
" 'Milford, NJ, United States',\n",
|
||
" 'Minas Gerais, Brazil',\n",
|
||
" 'Minnesota, United States',\n",
|
||
" 'Missouri, United States',\n",
|
||
" 'Modena, Italy',\n",
|
||
" 'Moliterno, Italy',\n",
|
||
" 'Mols, Denmark',\n",
|
||
" 'Monterey, California, United States',\n",
|
||
" 'Mornington Peninsula, Melbourne, Australia',\n",
|
||
" 'Murazzano, Italy',\n",
|
||
" 'Murcia, Spain',\n",
|
||
" 'NY, United States',\n",
|
||
" 'Naples, Italy',\n",
|
||
" 'New Hampshire, United States',\n",
|
||
" 'New Jersey, United States',\n",
|
||
" 'New South Wales, Australia',\n",
|
||
" 'New York, France',\n",
|
||
" 'New York, United States',\n",
|
||
" 'Nicasio, United States',\n",
|
||
" 'Nord-Pas-de-Calais, France',\n",
|
||
" 'Normandy, Auvilliers, France',\n",
|
||
" 'Normandy, France',\n",
|
||
" 'North Carolina, United States',\n",
|
||
" 'North Cornwall, England',\n",
|
||
" 'North East Victoria, ',\n",
|
||
" 'North East Victoria, Australia',\n",
|
||
" 'North Wootton, England',\n",
|
||
" 'North Yorkshire, England',\n",
|
||
" 'Northeastern Brazil, Brazil',\n",
|
||
" 'Northern Holland, Netherlands',\n",
|
||
" 'Northern Wisconsin, United States',\n",
|
||
" 'Northwest, United States',\n",
|
||
" 'Nottinghamshire, England',\n",
|
||
" 'Odell, Bedfordshire, England',\n",
|
||
" 'Ontario, Canada',\n",
|
||
" 'Oregon Coast Range, United States',\n",
|
||
" 'Oregon, United States',\n",
|
||
" 'Oristano, Italy',\n",
|
||
" 'Orkney Islands, Scotland',\n",
|
||
" 'Orkney Isles, Scotland',\n",
|
||
" 'Oviken, Sweden',\n",
|
||
" 'Oxfordshire, Great Britain',\n",
|
||
" 'Passendale, Belgium',\n",
|
||
" 'Pays Basque, France',\n",
|
||
" 'Pays d’Auge, Normandy, France',\n",
|
||
" 'Peekskill, United States',\n",
|
||
" 'Pembrokeshire, United Kingdom',\n",
|
||
" 'Pembrokeshire, Wales',\n",
|
||
" 'Pennsylvania, United States',\n",
|
||
" 'Pesaro-Urbino, Italy',\n",
|
||
" 'Petaluma, California, United States',\n",
|
||
" 'Piave Valley, Italy, Italy',\n",
|
||
" 'Piedmont, Italy',\n",
|
||
" 'Piemonte, Italy',\n",
|
||
" 'Pienza, Italy',\n",
|
||
" 'Pinconning, Michigan, United States',\n",
|
||
" 'Piora Valley, Switzerland',\n",
|
||
" 'Po valley region, Italy',\n",
|
||
" 'Poitou-Charentes, France',\n",
|
||
" 'Pokolbin, Hunter Valley, Australia',\n",
|
||
" 'Port Townsend, United States',\n",
|
||
" 'Postel, Belgium',\n",
|
||
" 'Prince Edward County, Ontario, Canada',\n",
|
||
" 'Prince Edward Island, Canada',\n",
|
||
" 'Provencale, France',\n",
|
||
" 'Provence, France',\n",
|
||
" 'Puimichel in Provence Alpes, France',\n",
|
||
" 'Pullman, Washington, United States',\n",
|
||
" 'Pyrenees, France',\n",
|
||
" 'Pyrenees-Atlantiques, France',\n",
|
||
" 'Pyrénées, France',\n",
|
||
" 'Pyrénées-Atlantiques, France',\n",
|
||
" 'Póvoa de Lanhoso, Portugal',\n",
|
||
" 'Quebec, Canada',\n",
|
||
" 'Queenstown, New Zealand',\n",
|
||
" 'Québec, Canada',\n",
|
||
" 'Rhone Valley, France',\n",
|
||
" 'Rhone-Alps, France',\n",
|
||
" 'Rhône-Alpes, France',\n",
|
||
" 'Richfield, Wisconsin, United States',\n",
|
||
" 'Rio Grande do Sul, Brazil',\n",
|
||
" 'Romanian Carpathians, Romania',\n",
|
||
" 'Roncq, France',\n",
|
||
" 'Roxburghshire, Scotland',\n",
|
||
" 'Sardegna, Italy',\n",
|
||
" 'Sardinia & Campania, Italy',\n",
|
||
" 'Savoie, France',\n",
|
||
" 'Schoonrewoerd, Leerdam, Netherlands',\n",
|
||
" 'Seattle, Washington, United States',\n",
|
||
" 'Sebastopol, California, United States',\n",
|
||
" 'Serra da Canastra, Minas Gerais state, Brazil',\n",
|
||
" 'Serra da Estrela, Portugal',\n",
|
||
" 'Setubal, Palmela and Sesimbra, Portugal',\n",
|
||
" 'Severn Valley, England',\n",
|
||
" 'Shelburne Farms, United States',\n",
|
||
" 'Somerset, England',\n",
|
||
" 'Sonoma, California, United States',\n",
|
||
" 'South Australia, Australia',\n",
|
||
" 'South East England, United Kingdom',\n",
|
||
" 'South West England, England',\n",
|
||
" 'South West England, United Kingdom',\n",
|
||
" 'Southern California, United States',\n",
|
||
" 'Southwestern Wisconsin, United States',\n",
|
||
" 'St Antoine, France',\n",
|
||
" 'St. Gallen (canton), Tufertschwil, Switzerland',\n",
|
||
" 'St. Louis, Missouri, United States',\n",
|
||
" 'Staffordshire, England',\n",
|
||
" 'Stawley, near Wellington, Somerset, England',\n",
|
||
" 'Stewarton, Scotland',\n",
|
||
" 'Stonegate, East Sussex, England',\n",
|
||
" 'Stoneyford, Ireland',\n",
|
||
" 'Stranraer, Scotland',\n",
|
||
" 'Sulzberg, Austria',\n",
|
||
" 'Svaneti, Samegrelo, Georgia',\n",
|
||
" 'Swabia, Germany',\n",
|
||
" 'Swaledale, North Yorkshire, England',\n",
|
||
" 'Tain, Scotland',\n",
|
||
" 'Tasmania, Australia',\n",
|
||
" 'Taxco, Mexico',\n",
|
||
" 'Tieton, Washington, United States',\n",
|
||
" 'Timsbury, Somerset, England',\n",
|
||
" 'Timsbury, Somerset, Scotland',\n",
|
||
" 'Tipperary, Ireland',\n",
|
||
" 'Tomales, California, United States',\n",
|
||
" 'Treviso, Veneto, Italy',\n",
|
||
" 'Troyes , Aube, France',\n",
|
||
" 'Tuscany, Italy',\n",
|
||
" 'Umbria, Lazio, Italy',\n",
|
||
" 'Upper Corsica, France',\n",
|
||
" 'Utah, United States',\n",
|
||
" 'Valencia, Spain',\n",
|
||
" 'Valpadana, Italy',\n",
|
||
" 'Veneto, ',\n",
|
||
" 'Veneto, Italy',\n",
|
||
" 'Veneto, Trentino, Italy',\n",
|
||
" 'Vermont, United States',\n",
|
||
" 'Victoria, Australia',\n",
|
||
" 'Virginia, United States',\n",
|
||
" 'Vorarlberg, Austria',\n",
|
||
" 'Västra Götaland, Sweden',\n",
|
||
" 'Wales, Great Britain',\n",
|
||
" 'Wales, London, Wales',\n",
|
||
" 'Wallonia, Belgium',\n",
|
||
" 'Websterville, VT, United States',\n",
|
||
" 'West Bengal, India',\n",
|
||
" 'West Pawlet, VT, United States',\n",
|
||
" 'Wigtownshire, Scotland',\n",
|
||
" 'Wisconsin, United States',\n",
|
||
" 'Zasavica, Serbia',\n",
|
||
" 'island wide, Cyprus',\n",
|
||
" 'massif des Causses, France',\n",
|
||
" 'old Liburnia (Dalmatia), Croatia',\n",
|
||
" 'province of Brittany, France',\n",
|
||
" 'Äänekoski, Finland'}"
|
||
]
|
||
},
|
||
"execution_count": 15,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"locs=set(data[\"location\"])\n",
|
||
"locs"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "f3bb9a47-56fa-49c4-8761-0db015944446",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "debb780e-ec13-4502-ac44-6001335e507d",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "eed3ac7b-5283-4d8e-bc26-61e1d821ccaf",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 16,
|
||
"id": "0043fe0d-e2d2-48f0-8953-ffc3dee52ba6",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def str_to_gps(loc):\n",
|
||
" l=loc.split(\",\")\n",
|
||
" loc=\",\".join([l[0],l[-1]])# removing details gives less errors while fetching the GPS coordinates\n",
|
||
" try:\n",
|
||
" res=Nominatim(user_agent=\"dmProject\").geocode(loc) \n",
|
||
" return (res.latitude, res.longitude)\n",
|
||
" except AttributeError:\n",
|
||
" loc=l[-1]\n",
|
||
" res=Nominatim(user_agent=\"dmProject\").geocode(loc) \n",
|
||
" return (res.latitude, res.longitude)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 17,
|
||
"id": "710341db-408f-4a4a-a849-65b963582ebc",
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"application/vnd.jupyter.widget-view+json": {
|
||
"model_id": "07fbeae8f58240ee921eae823a2a69a0",
|
||
"version_major": 2,
|
||
"version_minor": 0
|
||
},
|
||
"text/plain": [
|
||
" 0%| | 0/389 [00:00<?, ?it/s]"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Galax, Virginia, United States (36.6612387, -80.9239671)\n",
|
||
"Northeastern Brazil, Brazil (-10.3333333, -53.2)\n",
|
||
"Tieton, Washington, United States (46.7020686, -120.75535)\n",
|
||
"Central Balkan Mountains, Bulgaria (42.6073975, 25.4856617)\n",
|
||
"Postel, Belgium (51.2874865, 5.1897863)\n",
|
||
", Netherlands (52.2434979, 5.6343227)\n",
|
||
"Äänekoski, Finland (62.6032186, 25.7301361)\n",
|
||
"New Hampshire, United States (43.4849133, -71.6553992)\n",
|
||
"Friuli Venezia Giulia and Veneto, Italy (42.6384261, 12.674297)\n",
|
||
"County Cavan, Ireland (54.03497495, -7.2937022825583675)\n",
|
||
"Charentes-Poitou, France (45.4039367, 0.3756199)\n",
|
||
"Veneto, (45.6476663, 11.8665254)\n",
|
||
"Lazio, Sardinia, Italy (41.9808038, 12.7662312)\n",
|
||
"Fairview, United States (40.6333724, -90.1637318)\n",
|
||
"County Wexford, Ireland (52.46018745, -6.606515459159162)\n",
|
||
"Southwestern Wisconsin, United States (42.53116075, -90.43762550236468)\n",
|
||
"Co. Carlow, Ireland (52.69078865, -6.825145150844913)\n",
|
||
"West Pawlet, VT, United States (43.356893, -73.24966776033015)\n",
|
||
"Passendale, Belgium (50.9003015, 3.0203891)\n",
|
||
"Serra da Estrela, Portugal (40.4358994, -7.516865473467464)\n",
|
||
"Centre-Val de Loire, France (47.5490251, 1.7324062)\n",
|
||
"Lucerne, Schwyz, Unterwald, and Zoug, and the following additional places: Muri district in d'Argovi, Switzerland (47.0505452, 8.3054682)\n",
|
||
"Bjurholm, Sweden (63.966667, 19.0)\n",
|
||
"Po valley region, Italy (42.6384261, 12.674297)\n",
|
||
"Wales, Great Britain (53.28236125, -3.8286207326666837)\n",
|
||
", Denmark (55.670249, 10.3333283)\n",
|
||
"Asiago, Italy (45.8753771, 11.5106998)\n",
|
||
"Flanders, Belgium (51.096246199999996, 4.178629103169916)\n",
|
||
"Languedoc, France (43.65420305, 3.674669940206605)\n",
|
||
"Brickhill, Co. Clare, Ireland (52.70642975, -8.751786760846851)\n",
|
||
"Ontario, Canada (50.000678, -86.000977)\n",
|
||
", Armenia (4.491976149999999, -75.74135085294314)\n",
|
||
"Mornington Peninsula, Melbourne, Australia (-38.3312575, 145.08106868543985)\n",
|
||
"Bergues, France (50.9683886, 2.4325247)\n",
|
||
"Pesaro-Urbino, Italy (43.694112700000005, 12.701443660480004)\n",
|
||
"Cheshire, England (53.2141028, -2.471770086071205)\n",
|
||
"Indiana, United States (40.3270127, -86.1746933)\n",
|
||
"Rhone-Alps, France (45.2968119, 4.6604809)\n",
|
||
"Stranraer, Scotland (54.9044332, -5.026204)\n",
|
||
"Seattle, Washington, United States (47.6038321, -122.330062)\n",
|
||
"St. Gallen (canton), Tufertschwil, Switzerland (47.1561047, 9.338323)\n",
|
||
"South East England, United Kingdom (51.45115235, -0.9935673736185868)\n",
|
||
"Lombardy, Italy (45.5703694, 9.7732524)\n",
|
||
"Utah, United States (39.4225192, -111.714358)\n",
|
||
"Illinois, United States (40.0796606, -89.4337288)\n",
|
||
", Wales (52.2928116, -3.73893)\n",
|
||
"Low-laying regions, Sweden (59.6749712, 14.5208584)\n",
|
||
"Avesnes, France (50.5495906, 1.9721295)\n",
|
||
", Middle East (39.3014159, -76.5888477)\n",
|
||
"Midi-Pyrenees, France (43.8099068, 1.4344926056549236)\n",
|
||
"Southern California, United States (34.169262950000004, -116.787448181486)\n",
|
||
"Piedmont, Italy (45.060735, 7.923549)\n",
|
||
"California, United States (36.7014631, -118.755997)\n",
|
||
"Gloucestershire, England (51.7643786, -2.1880661838719386)\n",
|
||
"North Wootton, England (52.7923213, 0.4278194)\n",
|
||
"Severn Valley, England (52.454530899999995, -2.3755702502240226)\n",
|
||
"Extremadura, Spain (39.1748426, -6.1529891)\n",
|
||
"Cumbrian, United Kingdom (54.478530449999994, -3.06553306667584)\n",
|
||
"Lebanon, CT, United States (40.375713, -76.4626118)\n",
|
||
"Lodi, Italy (45.2613104, 9.491678060021837)\n",
|
||
"Websterville, VT, United States (44.162985, -72.47576806895253)\n",
|
||
"Swabia, Germany (48.15313125, 10.47129100603846)\n",
|
||
"Stewarton, Scotland (55.680265, -4.5153023)\n",
|
||
"Oxfordshire, Great Britain (51.7412674, -1.2245951)\n",
|
||
"Ballarat, Victoria, Australia (-37.5623013, 143.8605645)\n",
|
||
"old Liburnia (Dalmatia), Croatia (45.3658443, 15.6575209)\n",
|
||
"Orkney Isles, Scotland (56.7861112, -4.1140518)\n",
|
||
"Puimichel in Provence Alpes, France (46.603354, 1.8883335)\n",
|
||
", Israel (30.8124247, 34.8594762)\n",
|
||
"Port Townsend, United States (48.1179702, -122.769544)\n",
|
||
", Iraq (33.0955793, 44.1749775)\n",
|
||
", Greece (38.9953683, 21.9877132)\n",
|
||
"Greensboro, VT, United States (36.0726355, -79.7919754)\n",
|
||
"Cornwall, England (50.443348900000004, -4.62465658489158)\n",
|
||
"South West England, England (51.50076785, -2.5503106339491204)\n",
|
||
"Pembrokeshire, United Kingdom (51.8339209, -4.916667)\n",
|
||
"Northern Holland, Netherlands (52.2434979, 5.6343227)\n",
|
||
"Prince Edward Island, Canada (46.3355508, -63.1466676)\n",
|
||
"Central and Western Macedonia, Thessalia, Greece (38.9953683, 21.9877132)\n",
|
||
"Richfield, Wisconsin, United States (44.8766431, -93.2877877)\n",
|
||
"Burgundy, France (47.27808725, 4.222486304306048)\n",
|
||
"Pokolbin, Hunter Valley, Australia (-32.7792106, 151.2978747)\n",
|
||
"Cotswolds, England (51.7489422, -0.2303579)\n",
|
||
"Beara Peninsula, Co. Cork, Ireland (51.7280451, -9.767906428362338)\n",
|
||
"County Tipperary, Clogheen, Ireland (52.68482145, -7.898147186112796)\n",
|
||
"Ceredigion, United Kingdom (52.2945764, -3.9527809990291427)\n",
|
||
"Dorset, England (50.79683685, -2.34473226124306)\n",
|
||
"Maine, United States (45.709097, -68.8590201)\n",
|
||
", Switzerland (46.7985624, 8.2319736)\n",
|
||
"Bethania, United Kingdom (52.2509586, -4.0888576)\n",
|
||
"Laruns, France (42.9882368, -0.4266324)\n",
|
||
", Sweden (59.6749712, 14.5208584)\n",
|
||
"St. Louis, Missouri, United States (38.6280278, -90.1910154)\n",
|
||
"Moliterno, Italy (40.2423024, 15.868903)\n",
|
||
"Minas Gerais, Brazil (-18.5264844, -44.1588654)\n",
|
||
"Troyes , Aube, France (48.2971626, 4.0746257)\n",
|
||
"Veneto, Trentino, Italy (45.6476663, 11.8665254)\n",
|
||
"Lincolnshire, England (53.1823034, -0.2031208542548153)\n",
|
||
"Co. Offaly, Ireland (53.13617215, -7.810340751784169)\n",
|
||
"Quebec, Canada (52.4760892, -71.8258668)\n",
|
||
"Allgau, Germany (47.8241593, 11.6822424)\n",
|
||
", Hungary (47.1817585, 19.5060937)\n",
|
||
"Marathon, NY, United States (44.8914036, -89.7748098)\n",
|
||
"Timsbury, Somerset, Scotland (56.7861112, -4.1140518)\n",
|
||
"Petaluma, California, United States (38.2325829, -122.636465)\n",
|
||
"Landford, England (50.9700788, -1.6363792)\n",
|
||
"Piora Valley, Switzerland (46.7985624, 8.2319736)\n",
|
||
"Burgund, France (47.27808725, 4.222486304306048)\n",
|
||
"Wigtownshire, Scotland (54.9042579, -5.0248893)\n",
|
||
"Macedonia, Thrace, Thessalia, Peloponissos, Ionian Islands, Aegean islands, Crete Island and Epirus, Greece (40.6186482, 22.91795361795959)\n",
|
||
", Australia (-24.7761086, 134.755)\n",
|
||
"Staffordshire, England (52.824694199999996, -2.0074546723348115)\n",
|
||
"Haute Vienne, France (45.91901925, 1.203176771876291)\n",
|
||
"Alba, Italy (44.7007236, 8.0357786)\n",
|
||
"Campania, Italy (40.860672, 14.843984)\n",
|
||
"Taxco, Mexico (18.5565446, -99.6052838)\n",
|
||
"New York, United States (40.7127281, -74.0060152)\n",
|
||
"Svaneti, Samegrelo, Georgia (42.63545205, 42.24871384619953)\n",
|
||
"East Midlands, England (53.1188177, -1.2597068974971675)\n",
|
||
"Fornells de la Selva, Gironès, Spain (41.9319675, 2.8090343)\n",
|
||
"North Cornwall, England (51.7561518, 0.4645198)\n",
|
||
"Colorado, United States (38.7251776, -105.607716)\n",
|
||
"Nicasio, United States (38.0615885, -122.6985975)\n",
|
||
", Brazil (-10.3333333, -53.2)\n",
|
||
"Pyrenees, France (48.8738565, 2.3852638)\n",
|
||
"Derbyshire, Leicestershire, Nottinghamshire, England (53.1185033, -1.5566179305872214)\n",
|
||
"Québec, Canada (52.4760892, -71.8258668)\n",
|
||
"Manitoba, Canada (55.001251, -97.001038)\n",
|
||
"Auvergne, Salers, France (45.2968119, 4.6604809)\n",
|
||
"Upper Corsica, France (46.603354, 1.8883335)\n",
|
||
"Setubal, Palmela and Sesimbra, Portugal (38.5241783, -8.8932341)\n",
|
||
"Roncq, France (50.7531232, 3.1209016)\n",
|
||
"Oregon Coast Range, United States (39.7837304, -100.445882)\n",
|
||
", Mexico (23.6585116, -102.0077097)\n",
|
||
"Roxburghshire, Scotland (56.7861112, -4.1140518)\n",
|
||
"Tipperary, Ireland (52.4734839, -8.1614446)\n",
|
||
"Wisconsin, United States (44.4308975, -89.6884637)\n",
|
||
"Mankato, MN, United States (44.1634663, -93.9993505)\n",
|
||
"Devon, England (50.724140500000004, -3.6607788161410735)\n",
|
||
"Kimball, United States (41.1881305, -103.6997645)\n",
|
||
"Brooklyn NY, United States (40.6526006, -73.9497211)\n",
|
||
"Northern Wisconsin, United States (44.94725995, -91.39373410986752)\n",
|
||
"All Holland, Netherlands (52.4601118, 4.6056526)\n",
|
||
"Tuscany, Italy (43.4586541, 11.1389204)\n",
|
||
"Avila, Spain (40.656478, -4.7002172)\n",
|
||
"Belvederis, Lithuania (55.0820118, 23.3897023)\n",
|
||
"Naples, Italy (40.8358846, 14.2487679)\n",
|
||
"Galicia, Spain (42.61946, -7.863112)\n",
|
||
"Castelo Branco, Fundão and Idanha-a-Nova, Portugal (39.97675825, -7.446059929966704)\n",
|
||
"Pullman, Washington, United States (46.7304268, -117.173895)\n",
|
||
"Aveyron, France (44.315857449999996, 2.5065697302419823)\n",
|
||
"Ann Arbor, Michigan, United States (42.2813722, -83.7484616)\n",
|
||
"Oregon, United States (43.9792797, -120.737257)\n",
|
||
", Austria (47.59397, 14.12456)\n",
|
||
"Bregenzerwald, Kleinwalsertal, Großwalsertal, Laiblachtal (Pfänderstock) and Rheintal, Austria (47.387028799999996, 9.95606355528393)\n",
|
||
"Airedale farming district, New Zealand (-41.5000831, 172.8344077)\n",
|
||
"Cevenes, France (44.3509309, 3.8584812791602983)\n",
|
||
"massif des Causses, France (46.603354, 1.8883335)\n",
|
||
"Stawley, near Wellington, Somerset, England (50.9961449, -3.3382702)\n",
|
||
"Canary Islands, Spain (28.286398900000002, -16.796011808910734)\n",
|
||
"Timsbury, Somerset, England (51.3271927, -2.4755923)\n",
|
||
"Anjou, France (45.3462, 4.88149)\n",
|
||
"Co Limerick, Ireland (52.518831649999996, -8.795834650292788)\n",
|
||
"Coquet, England (54.88091196980861, -1.5495607774216265)\n",
|
||
"Modena, Italy (44.5384728, 10.935960870530739)\n",
|
||
"Herefordshire, West Midlands, United Kingdom (52.083333, -2.75)\n",
|
||
"Dalmatia, Croatia (43.538796500000004, 16.175235566874342)\n",
|
||
"Gloucestershire County, England (51.9425914, -2.100154122305389)\n",
|
||
"Prince Edward County, Ontario, Canada (44.031827, -77.246933)\n",
|
||
"Karlovy Vary, Czech Republic (50.2306216, 12.8701437)\n",
|
||
", Canada (61.0666922, -107.991707)\n",
|
||
", Afghanistan (33.7680065, 66.2385139)\n",
|
||
", Portugal (39.6621648, -8.1353519)\n",
|
||
"Chirac, France (45.9140138, 0.6548543)\n",
|
||
"Allgäu, Germany (47.5926009, 10.209156)\n",
|
||
"Ile de France, France (48.6443057, 2.7537863)\n",
|
||
"French Basque Country, Midi-Pyrénées, France (46.603354, 1.8883335)\n",
|
||
"St Antoine, France (44.0361186, 0.8397552)\n",
|
||
", Spain (39.3260685, -4.8379791)\n",
|
||
"Charentes, France (45.4039367, 0.3756199)\n",
|
||
"Béarnaise in Pyrénées-Atlantique, France (46.603354, 1.8883335)\n",
|
||
"Emilia-Romagna, Italy (44.525696, 11.039437)\n",
|
||
"Kinfauns, Perthshire, Scotland (56.3822607, -3.36565)\n",
|
||
"Bas-Languedoc, Comtat Venaissin, France (43.687621, 4.2691037)\n",
|
||
"Piave Valley, Italy, Italy (45.735099578237005, 7.320173335891282)\n",
|
||
"North East Victoria, Australia (-37.8633951, 145.0100994)\n",
|
||
"Brittany, France (48.2640845, -2.9202408)\n",
|
||
"Fethard, Co Tipperary, Ireland (52.467222, -7.691111)\n",
|
||
"Castilla Leon, Spain (40.4598868, -3.4720773)\n",
|
||
"Carneros, Sonoma, California, United States (33.223027, -111.70568456233669)\n",
|
||
"Loire Valley, France (47.44927715, -0.34954901296919916)\n",
|
||
"Bermondsey, London, England (51.4970125, -0.063268)\n",
|
||
"Bursa, Turkey (39.9895878, 28.8944669)\n",
|
||
"Centre , the department of Loiret, France (47.5490251, 1.7324062)\n",
|
||
", Argentina (-34.9964963, -64.9672817)\n",
|
||
"Pays Basque, France (47.6867895, 7.3927627)\n",
|
||
"Vermont, United States (44.5990718, -72.5002608)\n",
|
||
"Carmarthenshire, Wales (51.893669849999995, -4.217282721429928)\n",
|
||
"Island of Pag, Croatia (45.3658443, 15.6575209)\n",
|
||
"Sebastopol, California, United States (38.4021038, -122.824222)\n",
|
||
"Friuli-Venezia Giulia, Italy (46.151042, 13.055904)\n",
|
||
", Turkey (39.294076, 35.2316631)\n",
|
||
"Schoonrewoerd, Leerdam, Netherlands (51.920458, 5.1156505)\n",
|
||
"Asturias, Spain (43.3133868, -5.94192)\n",
|
||
"Ile-de-France/Champagne, France (48.5499833, 7.7510775)\n",
|
||
"island wide, Cyprus (34.9174159, 32.889902651331866)\n",
|
||
"Rhône-Alpes, France (45.3175313, 5.721294352585611)\n",
|
||
"Aberdeenshire, Scotland (57.166667, -2.666667)\n",
|
||
"Co. Cork, Ireland (51.917535900000004, -8.58597726870895)\n",
|
||
"Oristano, Italy (40.02656765, 8.679641647435716)\n",
|
||
"Zasavica, Serbia (44.9454307, 19.4987239)\n",
|
||
"North Yorkshire, England (54.13453275, -1.498628491239545)\n",
|
||
", Italy (42.6384261, 12.674297)\n",
|
||
"Orkney Islands, Scotland (58.94182309999999, -3.129694439563327)\n",
|
||
"Croisy-sur-Eure, France (49.0291802, 1.346505)\n",
|
||
"Swaledale, North Yorkshire, England (54.38299324007608, -1.9841360652878535)\n",
|
||
"Canton of Glarus, Switzerland (46.7985624, 8.2319736)\n",
|
||
"Pyrenees-Atlantiques, France (43.18718655, -0.728247400084667)\n",
|
||
"Pyrénées, France (48.8738565, 2.3852638)\n",
|
||
"Valencia, Spain (39.4697065, -0.3763353)\n",
|
||
"East Sussex, United Kingdom (50.9404266, 0.3699977891068864)\n",
|
||
", Serbia (44.024322850000004, 21.07657433209902)\n",
|
||
"Oviken, Sweden (62.9974877, 14.3928671)\n",
|
||
"Aquitaine, France (44.24620635, -0.18399787392367906)\n",
|
||
"Dumfriesshire, Scotland (55.166667, -3.5)\n",
|
||
"Huizen, Netherlands (52.2958122, 5.2567209)\n",
|
||
"Leiden, Netherlands (52.1594747, 4.4908843)\n",
|
||
"Gevrey-Chambertin, Burgundy, France (47.2261857, 4.9705666)\n",
|
||
"Azores, Portugal (37.80855645, -25.473137391245295)\n",
|
||
"Dumfries, Scotland (55.0691397, -3.6107936)\n",
|
||
"Rhone Valley, France (46.603354, 1.8883335)\n",
|
||
"Blarney, Ireland (51.9325233, -8.5678665)\n",
|
||
"British Columbia, Canada (55.001251, -125.002441)\n",
|
||
"Castile-Leon, Spain (41.55005, -5.1387401)\n",
|
||
"Illoud (Haute-Marne), France (48.2089823, 5.5615126)\n",
|
||
"Pienza, Italy (43.0765485, 11.6789076)\n",
|
||
"Averyon, France (46.603354, 1.8883335)\n",
|
||
"Banks Peninsular in Canterbury, New Zealand (-41.5000831, 172.8344077)\n",
|
||
"Pyrénées-Atlantiques, France (43.18718655, -0.728247400084667)\n",
|
||
"Hamilton, New Zealand (-37.7878809, 175.281788)\n",
|
||
"New South Wales, Australia (-31.8759835, 147.2869493)\n",
|
||
"Savoie, France (45.494895150000005, 6.384660381375652)\n",
|
||
"Tomales, California, United States (38.2468075, -122.90630461024097)\n",
|
||
"Wallonia, Belgium (50.154540049999994, 5.399359762971196)\n",
|
||
"Aveyron, Laguiole, France (44.315857449999996, 2.5065697302419823)\n",
|
||
", United States (39.7837304, -100.445882)\n",
|
||
"Banon, France (44.0381752, 5.6296701)\n",
|
||
"Emilia Romagna, Italy (44.525696, 11.039437)\n",
|
||
"Wales, London, Wales (52.41446365, -4.0685073132923)\n",
|
||
"New York, France (44.8705067, -0.5504343)\n",
|
||
"Veneto, Italy (45.6476663, 11.8665254)\n",
|
||
"Pays d’Auge, Normandy, France (49.0356178, 0.1573083324015645)\n",
|
||
"Gâtinais, France (48.15497515, 2.811419156673292)\n",
|
||
"Co Clare, Ireland (52.857257450000006, -8.937435925994537)\n",
|
||
"Gravina in Puglia, Murgia, Italy (40.819725, 16.422779)\n",
|
||
"Gippsland, Victoria, Australia (-37.87448275, 146.9111788341666)\n",
|
||
"Monterey, California, United States (36.2231079, -121.387742)\n",
|
||
", Germany (51.1638175, 10.4478313)\n",
|
||
", Mongolia (46.8250388, 103.8499736)\n",
|
||
"Serra da Canastra, Minas Gerais state, Brazil (-20.1615, -46.76365461658604)\n",
|
||
"Pinconning, Michigan, United States (43.853633, -83.964987)\n",
|
||
"Co. Mayo, Ireland (53.9087056, -9.298304863654256)\n",
|
||
"Västra Götaland, Sweden (58.215850200000006, 12.651820794914073)\n",
|
||
"Somerset, England (51.161750850000004, -3.0753867062227487)\n",
|
||
"Carrigtwohill, (51.9094617, -8.2611809)\n",
|
||
"Calabria, Italy (39.0565974, 16.5249864)\n",
|
||
"Bavaria, Germany (48.9467562, 11.4038717)\n",
|
||
", United Kingdom (54.7023545, -3.2765753)\n",
|
||
", Poland (52.215933, 19.134422)\n",
|
||
"Sulzberg, Austria (47.52121792021591, 9.91273065974192)\n",
|
||
", Mauritania (20.2540382, -9.2399263)\n",
|
||
"Piemonte, Italy (45.060735, 7.923549)\n",
|
||
"Franche Comté, France (48.68347355, 2.1776625116070027)\n",
|
||
"Beira Baixa Province, Portugal (39.6621648, -8.1353519)\n",
|
||
"Valpadana, Italy (39.60288591417616, 16.626802703803317)\n",
|
||
"Carrigtwohill, Ireland (51.9094617, -8.2611809)\n",
|
||
"Maribo, Denmark (54.7758018, 11.5054333)\n",
|
||
"South West England, United Kingdom (51.0339991, -2.949011916021195)\n",
|
||
"Basque, Pyrenees Mountains, France (45.359277750000004, -1.1480053453043046)\n",
|
||
"Crotone, Italy (39.1873894, 16.87828188895307)\n",
|
||
"Provence, France (44.0580563, 6.0638506)\n",
|
||
"province of Brittany, France (46.603354, 1.8883335)\n",
|
||
"Campania, Paestum, Foggia, Italy (40.860672, 14.843984)\n",
|
||
"Shelburne Farms, United States (34.89046225, -82.24340593189123)\n",
|
||
"Treviso, Veneto, Italy (45.806691349999994, 12.206315763116372)\n",
|
||
"Missouri, United States (38.7604815, -92.5617875)\n",
|
||
"Georgia, United States (32.3293809, -83.1137366)\n",
|
||
"North East Victoria, (54.699881, -1.6031519)\n",
|
||
"Tasmania, Australia (-42.035067, 146.6366887)\n",
|
||
"Amou, Gascony, France (43.59266, -0.7475291)\n",
|
||
"Normandy, France (49.0677708, 0.3138532)\n",
|
||
"County Carlow, Ireland (52.69078865, -6.825145150844913)\n",
|
||
"Berry, France (44.5760186, 0.3095833)\n",
|
||
"Inagh, Co Clare, Ireland (52.9071704, -9.2345138)\n",
|
||
"Mols, Denmark (56.187042, 10.460782482125193)\n",
|
||
"Odell, Bedfordshire, England (52.21843025, -0.5921260787763352)\n",
|
||
"Pembrokeshire, Wales (51.8339209, -4.916667)\n",
|
||
"Comox Valley, Vancouver Island, Canada (49.670981, -125.0353253)\n",
|
||
"Provencale, France (43.7899311033635, 7.524017533510311)\n",
|
||
"Milford, NJ, United States (41.2222218, -73.0570603)\n",
|
||
"Carnia, Italy (46.3738716, 13.1339829)\n",
|
||
"Northwest, United States (34.3129464, -78.1611077)\n",
|
||
"Auvergne, France (45.2968119, 4.6604809)\n",
|
||
"Friuli-Venezia Giulia and the Veneto, Italy (42.6384261, 12.674297)\n",
|
||
", New Zealand (-41.5000831, 172.8344077)\n",
|
||
"Adamstown, Co Wexford, Ireland (53.3360017, -6.4692321)\n",
|
||
"Castille-Leon, Spain (41.55005, -5.1387401)\n",
|
||
"Murazzano, Italy (44.4746494, 8.0213361)\n",
|
||
"Massachusetts, United States (42.3788774, -72.032366)\n",
|
||
"Haute-Savoie / Upper Savoy, France (46.06904065, 6.344532137164)\n",
|
||
", Belgium (50.6402809, 4.6667145)\n",
|
||
"Kent, United Kingdom (51.20707485, 0.7210361813401444)\n",
|
||
"Landshut, Germany (48.536217, 12.1516551)\n",
|
||
"Brisbane, Australia (-27.4689682, 153.0234991)\n",
|
||
"Allgaeu Alps, Germany (51.1638175, 10.4478313)\n",
|
||
"Aconcagua, Chile (-33.030515957094444, -71.5177984276482)\n",
|
||
"Rio Grande do Sul, Brazil (-29.8425284, -53.7680577)\n",
|
||
"Menorca, Balearic Islands, Spain (39.949257200000005, 4.0499641751186415)\n",
|
||
"Isere, France (45.28979315, 5.634382477386232)\n",
|
||
"Stoneyford, Ireland (52.5362671, -7.2278963)\n",
|
||
"Loire, France (45.75385355, 4.045473682551104)\n",
|
||
"Cotherstone, England (54.570244, -1.9804176)\n",
|
||
", England (52.5310214, -1.2649062)\n",
|
||
"Jura, Switzerland (47.3566699, 7.1598893)\n",
|
||
"Charm, Ohio, United States (40.5067308, -81.7848553)\n",
|
||
"Chelmarsh, Bridgnorth, Shropshire, England (52.4875359, -2.4124407)\n",
|
||
"West Bengal, India (22.9964948, 87.6855882)\n",
|
||
"County Antrim, Ireland (54.864725500000006, -6.143637910742768)\n",
|
||
", France (46.603354, 1.8883335)\n",
|
||
"Pennsylvania, United States (40.9699889, -77.7278831)\n",
|
||
"Stonegate, East Sussex, England (53.960910892985034, -1.0833748506182423)\n",
|
||
"Gujarat, India (22.3850051, 71.745261)\n",
|
||
", Cyprus (34.9174159, 32.889902651331866)\n",
|
||
"Vorarlberg, Austria (47.25, 9.9166667)\n",
|
||
"Poitou-Charentes, France (46.13244785, -0.15455354898441043)\n",
|
||
"Larzac, France (44.74693, 1.0075)\n",
|
||
"Sonoma, California, United States (38.5110803, -122.8473388)\n",
|
||
"Midi-Pyrénées, France (43.8099068, 1.4344926056549236)\n",
|
||
"Corsica, France (42.188089649999995, 9.068413771427695)\n",
|
||
"Coast of Oregon, United States (39.7837304, -100.445882)\n",
|
||
"Kilmallock County Limerick, Ireland (52.400645600000004, -8.571161660434711)\n",
|
||
"Greenville, Indiana, United States (34.851354, -82.3984882)\n",
|
||
"Romanian Carpathians, Romania (46.4046483, 22.9894841933513)\n",
|
||
"Basilicata, Italy (40.500571, 16.081953)\n",
|
||
"Lower Normandy, France (48.953684499999994, -0.573365358805311)\n",
|
||
"Lapland, Finland (67.69291045, 26.728214183162986)\n",
|
||
"Normandy, Auvilliers, France (49.0677708, 0.3138532)\n",
|
||
"Languedoc-Roussillon, France (43.65420305, 3.674669940206605)\n",
|
||
"Allagau, Bavarian Alps, Germany (51.1638175, 10.4478313)\n",
|
||
"Buxton, Derbyshire, England (53.2593422, -1.9100768)\n",
|
||
"Bornholm, Denmark (55.143122399999996, 14.922629658767558)\n",
|
||
"Bloomdale, United States (33.2473378, -96.6794382)\n",
|
||
"Cornwall, (50.416667, -4.75)\n",
|
||
"Barcelona, Spain (41.3828939, 2.1774322)\n",
|
||
"Hunter Valley, Australia (-32.5421803, 151.2185641)\n",
|
||
"North Carolina, United States (35.6729639, -79.0392919)\n",
|
||
"Laqueuille, France (45.6507627, 2.7320917)\n",
|
||
"Minnesota, United States (45.9896587, -94.6113288)\n",
|
||
"Sardinia & Campania, Italy (42.6384261, 12.674297)\n",
|
||
"South Australia, Australia (-30.5343665, 135.6301212)\n",
|
||
"Central and Western Macedonia, Thessaly, Greece (38.9953683, 21.9877132)\n",
|
||
", Ireland (52.865196, -7.9794599)\n",
|
||
"NY, United States (43.1561681, -75.8449946)\n",
|
||
"Bourgogne, France (47.27808725, 4.222486304306048)\n",
|
||
"Virginia, United States (37.1232245, -78.4927721)\n",
|
||
"Póvoa de Lanhoso, Portugal (41.5759516, -8.2699521)\n",
|
||
"Colby, Wisconsin, United States (44.5684775, -69.66002565150262)\n",
|
||
"Lanarkshire, Scotland (55.5752966, -3.833333)\n",
|
||
"Herault, France (43.591422, 3.3553309364095925)\n",
|
||
", Scotland (56.7861112, -4.1140518)\n",
|
||
"Murcia, Spain (37.9923795, -1.1305431)\n",
|
||
"La Velle, Wisconsin, United States (45.1699553, -109.88428658948227)\n",
|
||
"Ann Arbor, MI, United States (42.2813722, -83.7484616)\n",
|
||
"New Jersey, United States (40.0757384, -74.4041622)\n",
|
||
"Duhallow, Ireland (52.1837912, -9.00609563330653)\n",
|
||
", Iceland (64.9841821, -18.1059013)\n",
|
||
"Iowa, United States (41.9216734, -93.3122705)\n",
|
||
"Queenstown, New Zealand (-45.0321923, 168.661)\n",
|
||
"Tain, Scotland (57.8119372, -4.0550663)\n",
|
||
"Sardegna, Italy (40.0912813, 9.0305773)\n",
|
||
"Nottinghamshire, England (53.1459288, -1.0214971168122484)\n",
|
||
"Peekskill, United States (41.289811, -73.9204922)\n",
|
||
"Fife, Scotland (56.3333331, -3.0000001)\n",
|
||
"Umbria, Lazio, Italy (42.965916, 12.490236)\n",
|
||
"Nord-Pas-de-Calais, France (50.5289634, 2.454515765955904)\n",
|
||
"Victoria, Australia (-36.5986096, 144.6780052)\n",
|
||
"Het Groene Hart, Netherlands (51.5246, 4.280818038775446)\n",
|
||
"Inagh, Co Clare, (52.9071704, -9.2345138)\n",
|
||
", Holland (52.2434979, 5.6343227)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"locations_to_gps={}\n",
|
||
"errors=set()\n",
|
||
"for loc in tqdm.tqdm(locs):\n",
|
||
" time.sleep(1)\n",
|
||
" try:\n",
|
||
" locations_to_gps[loc]=str_to_gps(loc)\n",
|
||
" print(loc, locations_to_gps[loc])\n",
|
||
" except AttributeError:\n",
|
||
" print(loc, \"ERROR\")\n",
|
||
" errors.add(loc)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 71,
|
||
"id": "a6966ac7-7a69-4829-8588-d10f85ea98a4",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"{'Valencia, Spain': (39.4697065, -0.3763353), 'Crotone, Italy': (39.1873894, 16.87828188895307), 'New Hampshire, United States': (43.4849133, -71.6553992), 'Setubal, Palmela and Sesimbra, Portugal': (38.5241783, -8.8932341), ', Turkey': (39.294076, 35.2316631), 'Wisconsin, United States': (44.4308975, -89.6884637), 'Maine, United States': (45.709097, -68.8590201), 'Beara Peninsula, Co. Cork, Ireland': (51.7280451, -9.767906428362338), 'Castilla Leon, Spain': (40.4598868, -3.4720773), ', Wales': (52.2928116, -3.73893), 'Romanian Carpathians, Romania': (46.4046483, 22.9894841933513), 'Pembrokeshire, Wales': (51.8339209, -4.916667), 'Lodi, Italy': (45.2613104, 9.491678060021837), 'Stoneyford, Ireland': (52.5362671, -7.2278963), 'Central and Western Macedonia, Thessaly, Greece': (38.9953683, 21.9877132), ', Armenia': (4.536307, -75.6723751), 'Indiana, United States': (40.3270127, -86.1746933), 'Alba, Italy': (44.7007236, 8.0357786), 'East Sussex, United Kingdom': (50.9404266, 0.3699977891068864), 'Fornells de la Selva, Gironès, Spain': (41.9319675, 2.8090343), ', Iraq': (33.0955793, 44.1749775), 'Treviso, Veneto, Italy': (45.806691349999994, 12.206315763116372), ', Brazil': (-10.3333333, -53.2), 'North Wootton, England': (52.7923213, 0.4278194), 'Modena, Italy': (44.5384728, 10.935960870530739), 'Minas Gerais, Brazil': (-18.5264844, -44.1588654), 'Emilia-Romagna, Italy': (44.525696, 11.039437), 'Centre-Val de Loire, France': (47.5490251, 1.7324062), 'Dumfriesshire, Scotland': (55.166667, -3.5), 'Brisbane, Australia': (-27.4689682, 153.0234991), 'Carrigtwohill, ': (51.9094617, -8.2611809), 'Azores, Portugal': (37.80855645, -25.473137391245295), ', Austria': (47.59397, 14.12456), 'Basque, Pyrenees Mountains, France': (45.359277750000004, -1.1480053453043046), 'North East Victoria, ': (54.699881, -1.6031519), 'Greensboro, VT, United States': (36.0726355, -79.7919754), 'Stawley, near Wellington, Somerset, England': (50.9961449, -3.3382702), 'Central and Western Macedonia, Thessalia, Greece': (38.9953683, 21.9877132), 'Po valley region, Italy': (42.6384261, 12.674297), 'Southwestern Wisconsin, United States': (42.53116075, -90.43762550236468), 'County Wexford, Ireland': (52.46018745, -6.606515459159162), 'Valpadana, Italy': (39.60288591417616, 16.626802703803317), 'Lapland, Finland': (67.69291045, 26.728214183162986), ', United States': (39.7837304, -100.445882), 'Passendale, Belgium': (50.9003015, 3.0203891), 'Dorset, England': (50.79683685, -2.34473226124306), 'Chirac, France': (45.9140138, 0.6548543), 'North Carolina, United States': (35.6729639, -79.0392919), 'Umbria, Lazio, Italy': (42.965916, 12.490236), 'Iowa, United States': (41.9216734, -93.3122705), 'Inagh, Co Clare, ': (52.9071704, -9.2345138), 'Roxburghshire, Scotland': (56.7861112, -4.1140518), 'Sardegna, Italy': (40.0912813, 9.0305773), 'province of Brittany, France': (46.603354, 1.8883335), 'Troyes , Aube, France': (48.2971626, 4.0746257), 'Carneros, Sonoma, California, United States': (33.223027, -111.70568456233669), 'Svaneti, Samegrelo, Georgia': (42.63545205, 42.24871384619953), 'La Velle, Wisconsin, United States': (45.1699553, -109.88428658948227), 'Georgia, United States': (32.3293809, -83.1137366), 'Brickhill, Co. Clare, Ireland': (52.70642975, -8.751786760846851), 'Sulzberg, Austria': (47.52121792021591, 9.91273065974192), 'Nicasio, United States': (38.0615885, -122.6985975), 'Inagh, Co Clare, Ireland': (52.9071704, -9.2345138), 'Burgundy, France': (47.27808725, 4.222486304306048), 'Aveyron, Laguiole, France': (44.315857449999996, 2.5065697302419823), 'Centre , the department of Loiret, France': (47.5490251, 1.7324062), 'Rhone Valley, France': (46.603354, 1.8883335), 'Tasmania, Australia': (-42.035067, 146.6366887), 'South West England, United Kingdom': (51.0339991, -2.949011916021195), ', Canada': (61.0666922, -107.991707), 'Loire Valley, France': (47.44927715, -0.34954901296919916), 'Extremadura, Spain': (39.1748426, -6.1529891), 'New Jersey, United States': (40.0757384, -74.4041622), 'Nord-Pas-de-Calais, France': (50.5289634, 2.454515765955904), 'Piave Valley, Italy, Italy': (45.735099578237005, 7.320173335891282), 'Friuli-Venezia Giulia and the Veneto, Italy': (42.6384261, 12.674297), 'Co. Cork, Ireland': (51.917535900000004, -8.58597726870895), 'North Yorkshire, England': (54.13453275, -1.498628491239545), 'Illinois, United States': (40.0796606, -89.4337288), 'Gâtinais, France': (48.15497515, 2.811419156673292), 'Campania, Paestum, Foggia, Italy': (40.860672, 14.843984), 'Port Townsend, United States': (48.1179702, -122.769544), 'Piemonte, Italy': (45.060735, 7.923549), 'Bregenzerwald, Kleinwalsertal, Großwalsertal, Laiblachtal (Pfänderstock) and Rheintal, Austria': (47.387028799999996, 9.95606355528393), 'Brooklyn NY, United States': (40.6526006, -73.9497211), 'Avesnes, France': (50.5495906, 1.9721295), 'Petaluma, California, United States': (38.2325829, -122.636465), 'Lanarkshire, Scotland': (55.5752966, -3.833333), 'Co Clare, Ireland': (52.857257450000006, -8.937435925994537), 'Mankato, MN, United States': (44.1634663, -93.9993505), 'Friuli Venezia Giulia and Veneto, Italy': (42.6384261, 12.674297), 'Québec, Canada': (52.4760892, -71.8258668), 'Airedale farming district, New Zealand': (-41.5000831, 172.8344077), 'Galicia, Spain': (42.61946, -7.863112), 'Campania, Italy': (40.860672, 14.843984), 'Asiago, Italy': (45.8753771, 11.5106998), 'Monterey, California, United States': (36.2231079, -121.387742), 'Pullman, Washington, United States': (46.7304268, -117.173895), 'Devon, England': (50.724140500000004, -3.6607788161410735), 'Pesaro-Urbino, Italy': (43.694112700000005, 12.701443660480004), 'Ballarat, Victoria, Australia': (-37.5623013, 143.8605645), 'Savoie, France': (45.494895150000005, 6.384660381375652), 'Hunter Valley, Australia': (-32.5421803, 151.2185641), 'Auvergne, Salers, France': (45.2968119, 4.6604809), 'Chelmarsh, Bridgnorth, Shropshire, England': (52.4875359, -2.4124407), 'Duhallow, Ireland': (52.1837912, -9.00609563330653), ', Serbia': (44.024322850000004, 21.07657433209902), 'Stranraer, Scotland': (54.9044332, -5.026204), 'Cotswolds, England': (51.74894260792542, -0.23033349985221635), 'Manitoba, Canada': (55.001251, -97.001038), 'South West England, England': (51.50076785, -2.5503106339491204), 'Berry, France': (44.5760186, 0.3095833), 'Co Limerick, Ireland': (52.518831649999996, -8.795834650292788), 'Averyon, France': (46.603354, 1.8883335), 'Co. Mayo, Ireland': (53.9087056, -9.298304863654256), 'Corsica, France': (42.188089649999995, 9.068413771427695), 'Moliterno, Italy': (40.2423024, 15.868903), 'Castelo Branco, Fundão and Idanha-a-Nova, Portugal': (39.97675825, -7.446059929966704), 'Oristano, Italy': (40.02656765, 8.679641647435716), 'Postel, Belgium': (51.2874865, 5.1897863), ', Iceland': (64.9841821, -18.1059013), 'Brittany, France': (48.2640845, -2.9202408), 'Rio Grande do Sul, Brazil': (-29.8425284, -53.7680577), 'Greenville, Indiana, United States': (34.851354, -82.3984882), 'Macedonia, Thrace, Thessalia, Peloponissos, Ionian Islands, Aegean islands, Crete Island and Epirus, Greece': (40.6186482, 22.91795361795959), 'Shelburne Farms, United States': (34.89046225, -82.24340593189123), 'Provencale, France': (43.7899311033635, 7.524017533510311), 'Oxfordshire, Great Britain': (51.7412674, -1.2245951), 'Swabia, Germany': (48.15313125, 10.47129100603846), 'Comox Valley, Vancouver Island, Canada': (49.670981, -125.0353253), 'Dalmatia, Croatia': (43.538796500000004, 16.175235566874342), 'Murcia, Spain': (37.9923795, -1.1305431), 'Puimichel in Provence Alpes, France': (46.603354, 1.8883335), 'Peekskill, United States': (41.289811, -73.9204922), 'Menorca, Balearic Islands, Spain': (39.949257200000005, 4.0499641751186415), 'New South Wales, Australia': (-31.8759835, 147.2869493), 'Colorado, United States': (38.7251776, -105.607716), 'Piora Valley, Switzerland': (46.7985624, 8.2319736), 'Loire, France': (45.75385355, 4.045473682551104), 'Tieton, Washington, United States': (46.7020686, -120.75535), 'Oviken, Sweden': (62.9974877, 14.3928671), 'Taxco, Mexico': (18.5565446, -99.6052838), 'Rhône-Alpes, France': (45.3175313, 5.721294352585611), 'Bursa, Turkey': (39.9895878, 28.8944669), 'Nottinghamshire, England': (53.1459288, -1.0214971168122484), 'Karlovy Vary, Czech Republic': (50.2306216, 12.8701437), 'County Cavan, Ireland': (54.03497495, -7.2937022825583675), 'Northwest, United States': (34.3129464, -78.1611077), ', Germany': (51.1638175, 10.4478313), ', Sweden': (59.6749712, 14.5208584), ', Argentina': (-34.9964963, -64.9672817), 'Queenstown, New Zealand': (-45.0321923, 168.661), 'All Holland, Netherlands': (52.4601118, 4.6056526), 'Flanders, Belgium': (51.096246199999996, 4.178629103169916), 'West Pawlet, VT, United States': (43.356893, -73.24966776033015), 'Bjurholm, Sweden': (63.966667, 19.0), 'Allgaeu Alps, Germany': (51.1638175, 10.4478313), 'Somerset, England': (51.161750850000004, -3.0753867062227487), 'Herefordshire, West Midlands, United Kingdom': (52.083333, -2.75), 'Västra Götaland, Sweden': (58.215850200000006, 12.651820794914073), ', Scotland': (56.7861112, -4.1140518), 'Provence, France': (44.0580563, 6.0638506), 'Mornington Peninsula, Melbourne, Australia': (-38.3312575, 145.08106868543985), 'Haute-Savoie / Upper Savoy, France': (46.06904065, 6.344532137164), ', Middle East': (39.3014159, -76.5888477), 'Sonoma, California, United States': (38.5110803, -122.8473388), 'Veneto, Italy': (45.6476663, 11.8665254), 'Haute Vienne, France': (45.91901925, 1.203176771876291), 'Gloucestershire County, England': (51.9425914, -2.100154122305389), 'Massachusetts, United States': (42.3788774, -72.032366), 'Carmarthenshire, Wales': (51.893669849999995, -4.217282721429928), 'Orkney Isles, Scotland': (56.7861112, -4.1140518), ', Belgium': (50.6402809, 4.6667145), 'Lombardy, Italy': (45.5703694, 9.7732524), 'Oregon, United States': (43.9792797, -120.737257), 'Cheshire, England': (53.2141028, -2.471770086071205), 'Blarney, Ireland': (51.9325233, -8.5678665), 'Timsbury, Somerset, England': (51.3271927, -2.4755923), 'Veneto, Trentino, Italy': (45.6476663, 11.8665254), 'Co. Offaly, Ireland': (53.13617215, -7.810340751784169), 'Barcelona, Spain': (41.3828939, 2.1774322), 'Sardinia & Campania, Italy': (42.6384261, 12.674297), 'West Bengal, India': (22.9964948, 87.6855882), 'Banon, France': (44.0381752, 5.6296701), 'Laruns, France': (42.9882368, -0.4266324), 'Gevrey-Chambertin, Burgundy, France': (47.2261857, 4.9705666), 'Pyrenees, France': (48.8738565, 2.3852638), 'Murazzano, Italy': (44.4746494, 8.0213361), 'Southern California, United States': (34.169262950000004, -116.787448181486), 'Normandy, France': (49.0677708, 0.3138532), 'Charentes, France': (45.4039367, 0.3756199), 'island wide, Cyprus': (34.9174159, 32.889902651331866), 'Larzac, France': (44.74693, 1.0075), 'Pays d’Auge, Normandy, France': (49.0356178, 0.1573083324015645), 'Pyrenees-Atlantiques, France': (43.18718655, -0.728247400084667), 'Auvergne, France': (45.2968119, 4.6604809), 'Burgund, France': (47.27808725, 4.222486304306048), ', Portugal': (39.6621648, -8.1353519), 'Jura, Switzerland': (47.3566699, 7.1598893), 'Emilia Romagna, Italy': (44.525696, 11.039437), 'East Midlands, England': (53.1188177, -1.2597068974971675), 'Missouri, United States': (38.7604815, -92.5617875), 'Marathon, NY, United States': (44.8914036, -89.7748098), ', Holland': (52.2434979, 5.6343227), 'Ann Arbor, MI, United States': (42.2813722, -83.7484616), 'Cevenes, France': (44.3509309, 3.8584812791602983), 'Bergues, France': (50.9683886, 2.4325247), 'Pokolbin, Hunter Valley, Australia': (-32.7792106, 151.2978747), 'Pyrénées, France': (48.8738565, 2.3852638), ', Netherlands': (52.2434979, 5.6343227), 'Gippsland, Victoria, Australia': (-37.87448275, 146.9111788341666), 'Cumbrian, United Kingdom': (54.478530449999994, -3.06553306667584), 'Friuli-Venezia Giulia, Italy': (46.151042, 13.055904), 'Kilmallock County Limerick, Ireland': (52.400645600000004, -8.571161660434711), 'Gloucestershire, England': (51.7643786, -2.1880661838719386), 'Vermont, United States': (44.5990718, -72.5002608), 'Stewarton, Scotland': (55.680265, -4.5153023), 'Leiden, Netherlands': (52.1594747, 4.4908843), 'Lebanon, CT, United States': (40.375713, -76.4626118), 'Tomales, California, United States': (38.2468075, -122.90630461024097), 'Bermondsey, London, England': (51.4970125, -0.063268), 'Tipperary, Ireland': (52.4734839, -8.1614446), 'Fife, Scotland': (56.3333331, -3.0000001), 'Allgäu, Germany': (47.5926009, 10.209156), ', France': (46.603354, 1.8883335), 'French Basque Country, Midi-Pyrénées, France': (46.603354, 1.8883335), 'Severn Valley, England': (52.454530899999995, -2.3755702502240226), ', Denmark': (55.670249, 10.3333283), 'Tain, Scotland': (57.8119372, -4.0550663), 'New York, France': (44.8712241, -0.5502475), 'Pyrénées-Atlantiques, France': (43.18718655, -0.728247400084667), 'Buxton, Derbyshire, England': (53.2593422, -1.9100768), 'Galax, Virginia, United States': (36.6612387, -80.9239671), 'Wallonia, Belgium': (50.154540049999994, 5.399359762971196), 'Ile-de-France/Champagne, France': (48.5499711, 7.7508704), 'Lazio, Sardinia, Italy': (41.9808038, 12.7662312), 'Staffordshire, England': (52.824694199999996, -2.0074546723348115), ', Switzerland': (46.7985624, 8.2319736), 'Serra da Estrela, Portugal': (40.4358994, -7.516865473467464), 'County Carlow, Ireland': (52.69078865, -6.825145150844913), 'St. Louis, Missouri, United States': (38.6280278, -90.1910154), 'Coquet, England': (54.88091196980861, -1.5495607774216265), 'Béarnaise in Pyrénées-Atlantique, France': (46.603354, 1.8883335), 'Websterville, VT, United States': (44.162985, -72.47576806895253), ', Poland': (52.215933, 19.134422), 'Utah, United States': (39.4225192, -111.714358), 'Allagau, Bavarian Alps, Germany': (51.1638175, 10.4478313), 'Oregon Coast Range, United States': (39.7837304, -100.445882), 'Cotherstone, England': (54.570244, -1.9804176), 'Swaledale, North Yorkshire, England': (54.38299324007608, -1.9841360652878535), 'Bornholm, Denmark': (55.143122399999996, 14.922629658767558), 'Aquitaine, France': (44.24620635, -0.18399787392367906), 'old Liburnia (Dalmatia), Croatia': (45.3658443, 15.6575209), 'Fethard, Co Tipperary, Ireland': (52.467222, -7.691111), 'Northern Holland, Netherlands': (52.2434979, 5.6343227), 'Castile-Leon, Spain': (41.55005, -5.1387401), 'Naples, Italy': (40.8358846, 14.2487679), 'Gravina in Puglia, Murgia, Italy': (40.819725, 16.422779), 'Castille-Leon, Spain': (41.55005, -5.1387401), ', Israel': (30.8124247, 34.8594762), 'Languedoc-Roussillon, France': (43.65420305, 3.674669940206605), 'Orkney Islands, Scotland': (58.94182309999999, -3.129694439563327), ', Italy': (42.6384261, 12.674297), ', United Kingdom': (54.7023545, -3.2765753), 'Wales, Great Britain': (53.28236125, -3.8286207326666837), 'Wales, London, Wales': (52.41446365, -4.0685073132923), 'California, United States': (36.7014631, -118.755997), 'Belvederis, Lithuania': (55.0820118, 23.3897023), 'Franche Comté, France': (48.68347355, 2.1776625116070027), 'Kent, United Kingdom': (51.20707485, 0.7210361813401444), 'Bloomdale, United States': (33.2473378, -96.6794382), 'Wigtownshire, Scotland': (54.9042579, -5.0248893), ', Ireland': (52.865196, -7.9794599), 'County Tipperary, Clogheen, Ireland': (52.68482145, -7.898147186112796), 'Colby, Wisconsin, United States': (44.5684775, -69.66002565150262), 'Isere, France': (45.28979315, 5.634382477386232), 'Kinfauns, Perthshire, Scotland': (56.3822607, -3.36565), ', Mexico': (23.6585116, -102.0077097), 'Dumfries, Scotland': (55.0691397, -3.6107936), 'Odell, Bedfordshire, England': (52.21843025, -0.5921260787763352), 'Avila, Spain': (40.656478, -4.7002172), 'Bethania, United Kingdom': (52.2509586, -4.0888576), 'Lincolnshire, England': (53.1823034, -0.2031208542548153), 'Hamilton, New Zealand': (-37.7878809, 175.281788), ', Greece': (38.9953683, 21.9877132), 'New York, United States': (40.7127281, -74.0060152), ', Cyprus': (34.9174159, 32.889902651331866), 'South Australia, Australia': (-30.5343665, 135.6301212), 'Low-laying regions, Sweden': (59.6749712, 14.5208584), 'Serra da Canastra, Minas Gerais state, Brazil': (-20.1615, -46.76365461658604), 'Cornwall, ': (50.443348900000004, -4.62465658489158), 'St Antoine, France': (44.0361186, 0.8397552), 'Amou, Gascony, France': (43.59266, -0.7475291), 'Charentes-Poitou, France': (45.4039367, 0.3756199), 'Co. Carlow, Ireland': (52.69078865, -6.825145150844913), 'Äänekoski, Finland': (62.6032186, 25.7301361), ', Spain': (39.3260685, -4.8379791), 'Ile de France, France': (48.6443057, 2.7537863), 'Seattle, Washington, United States': (47.6038321, -122.330062), 'Minnesota, United States': (45.9896587, -94.6113288), 'Milford, NJ, United States': (41.2222218, -73.0570603), 'Charm, Ohio, United States': (40.5067308, -81.7848553), 'Anjou, France': (45.3462, 4.88149), 'Pinconning, Michigan, United States': (43.853633, -83.964987), 'Derbyshire, Leicestershire, Nottinghamshire, England': (53.1666927, -1.5833223), 'Gujarat, India': (22.3850051, 71.745261), ', Mongolia': (46.8250388, 103.8499736), 'Central Balkan Mountains, Bulgaria': (42.6073975, 25.4856617), 'Schoonrewoerd, Leerdam, Netherlands': (51.920458, 5.1156505), 'North East Victoria, Australia': (-37.8633951, 145.0100994), 'Bavaria, Germany': (48.9467562, 11.4038717), 'Kimball, United States': (41.1881305, -103.6997645), 'Illoud (Haute-Marne), France': (48.2089823, 5.5615126), 'Ann Arbor, Michigan, United States': (42.2813722, -83.7484616), 'Rhone-Alps, France': (45.2968119, 4.6604809), 'Mols, Denmark': (56.187042, 10.460782482125193), 'Island of Pag, Croatia': (45.3658443, 15.6575209), ', Hungary': (47.1817585, 19.5060937), 'Lower Normandy, France': (48.953684499999994, -0.573365358805311), 'Midi-Pyrenees, France': (43.8099068, 1.4344926056549236), 'Huizen, Netherlands': (52.2958122, 5.2567209), 'Upper Corsica, France': (46.603354, 1.8883335), 'Basilicata, Italy': (40.500571, 16.081953), 'Landford, England': (50.9700788, -1.6363792), 'Maribo, Denmark': (54.7758018, 11.5054333), 'Virginia, United States': (37.1232245, -78.4927721), 'Adamstown, Co Wexford, Ireland': (53.3360017, -6.4692321), 'Het Groene Hart, Netherlands': (51.5246, 4.280818038775446), 'Sebastopol, California, United States': (38.4021038, -122.824222), 'Carrigtwohill, Ireland': (51.9094617, -8.2611809), 'County Antrim, Ireland': (54.864725500000006, -6.143637910742768), 'Aberdeenshire, Scotland': (57.166667, -2.666667), 'massif des Causses, France': (46.603354, 1.8883335), 'Asturias, Spain': (43.3133868, -5.94192), 'Allgau, Germany': (47.8241593, 11.6822424), 'Croisy-sur-Eure, France': (49.0291802, 1.346505), 'Northern Wisconsin, United States': (44.94725995, -91.39373410986752), 'Carnia, Italy': (46.3738716, 13.1339829), 'Vorarlberg, Austria': (47.25, 9.9166667), 'Quebec, Canada': (52.4760892, -71.8258668), 'Canary Islands, Spain': (28.286398900000002, -16.796011808910734), 'Banks Peninsular in Canterbury, New Zealand': (-41.5000831, 172.8344077), 'Beira Baixa Province, Portugal': (39.6621648, -8.1353519), 'Fairview, United States': (40.6333724, -90.1637318), 'Aconcagua, Chile': (-33.04383715040669, -71.56622455102398), 'Midi-Pyrénées, France': (43.8099068, 1.4344926056549236), 'Landshut, Germany': (48.536217, 12.1516551), ', New Zealand': (-41.5000831, 172.8344077), ', England': (52.5310214, -1.2649062), 'Languedoc, France': (43.65420305, 3.674669940206605), ', Afghanistan': (33.7680065, 66.2385139), 'NY, United States': (43.1561681, -75.8449946), 'Northeastern Brazil, Brazil': (-10.3333333, -53.2), 'Piedmont, Italy': (45.060735, 7.923549), 'St. Gallen (canton), Tufertschwil, Switzerland': (47.1561047, 9.338323), 'Pembrokeshire, United Kingdom': (51.8339209, -4.916667), 'Canton of Glarus, Switzerland': (46.7985624, 8.2319736), 'South East England, United Kingdom': (51.45115235, -0.9935673736185868), 'Ceredigion, United Kingdom': (52.2945764, -3.9527809990291427), 'Stonegate, East Sussex, England': (53.960910892985034, -1.0833748506182423), 'Tuscany, Italy': (43.4586541, 11.1389204), 'Timsbury, Somerset, Scotland': (56.7861112, -4.1140518), \"Lucerne, Schwyz, Unterwald, and Zoug, and the following additional places: Muri district in d'Argovi, Switzerland\": (47.0505452, 8.3054682), 'Prince Edward Island, Canada': (46.3355508, -63.1466676), 'Prince Edward County, Ontario, Canada': (44.031827, -77.246933), 'Aveyron, France': (44.315857449999996, 2.5065697302419823), 'Calabria, Italy': (39.0565974, 16.5249864), 'British Columbia, Canada': (55.001251, -125.002441), 'Ontario, Canada': (50.000678, -86.000977), 'North Cornwall, England': (51.7561518, 0.4645198), 'Pennsylvania, United States': (40.9699889, -77.7278831), 'Coast of Oregon, United States': (39.7837304, -100.445882), 'Veneto, ': (45.6476663, 11.8665254), ', Australia': (-24.7761086, 134.755), 'Victoria, Australia': (-36.5986096, 144.6780052), 'Pays Basque, France': (47.6867895, 7.3927627), ', Mauritania': (20.2540382, -9.2399263), 'Richfield, Wisconsin, United States': (44.8766431, -93.2877877), 'Poitou-Charentes, France': (46.13244785, -0.15455354898441043), 'Pienza, Italy': (43.0765485, 11.6789076), 'Herault, France': (43.591422, 3.3553309364095925), 'Normandy, Auvilliers, France': (49.0677708, 0.3138532), 'Bourgogne, France': (47.27808725, 4.222486304306048), 'Póvoa de Lanhoso, Portugal': (41.5759516, -8.2699521), 'Cornwall, England': (50.416667, -4.75), 'Roncq, France': (50.7531232, 3.1209016), 'Laqueuille, France': (45.6507627, 2.7320917), 'Bas-Languedoc, Comtat Venaissin, France': (43.687621, 4.2691037), 'Zasavica, Serbia': (44.9454307, 19.4987239)}\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(locations_to_gps)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 75,
|
||
"id": "c82851db-2726-4f3a-977c-fef529d9fd8c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"{', Mexico and Caribbean'}\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(f\"{len(errors)} errors : {\";\".join(errors)}\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 76,
|
||
"id": "f86ec162-6391-49f5-964a-f70f17163a4a",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"1"
|
||
]
|
||
},
|
||
"execution_count": 76,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 18,
|
||
"id": "2fc155c5-04d6-4e4e-a3be-e4b85a96cf40",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"'c;b;a'"
|
||
]
|
||
},
|
||
"execution_count": 18,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"id": "466d8424-dcf5-430b-b38d-5789886b0a0a",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import json\n",
|
||
"json.dumps(locations_to_gps)\n",
|
||
"with open(\"locations_to_gps.json\", \"w\") as f:\n",
|
||
" json.dump(locations_to_gps, f)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "0ac0c719-1318-4110-94e1-d040b76a7614",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "6a409cbe-e43e-45a1-a288-ad86cec8d049",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "a11adbb8-0ed1-4184-99e9-a0d17af246b5",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "f3572930-ea91-4d5c-ae95-b0fa9a82f00b",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "279a0428-b8cd-4877-8fd7-bed5a7a6d654",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "5a24e704-89c0-41d0-ac53-59a17037a6b9",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "0120c67b-9558-40f5-a237-79f758e6854e",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "f1ea4400-abc8-4c46-8f6e-240e7fde3c93",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "204d1446-e58f-4585-8ac0-7466930e4291",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "d41b1dc8-90df-44b8-9d83-d218f82a3637",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"len(locations)\n",
|
||
"locations\n",
|
||
"x=[]\n",
|
||
"y=[]\n",
|
||
"for l in locations:\n",
|
||
" if l is not None:\n",
|
||
" x.append(l[0])\n",
|
||
" y.append(l[1])\n",
|
||
"plt.plot(x,y,\".\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "66ce4e4a-7006-411f-abd0-ee94d7cf99b3",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def filter_df(df, cols=None):\n",
|
||
" if cols is None:\n",
|
||
" cols = [\"milk\", \"country\", \"type\", \"texture\", \"flavor\", \"aroma\", \"family\", \"rind\"]\n",
|
||
"\n",
|
||
" df = df.copy()\n",
|
||
" attributes = set() # Get all the possible attributes (some are mixed in different columns)\n",
|
||
" for col in cols:\n",
|
||
" values = set()\n",
|
||
" for val in set(df[col]):\n",
|
||
" if type(val) == float: # skip NaN values\n",
|
||
" continue\n",
|
||
" values = values.union([x.strip() for x in set(val.split(\",\"))])\n",
|
||
" attributes = attributes.union(values)\n",
|
||
" \n",
|
||
" \n",
|
||
" row_attrs = [set() for _ in range(len(df))] # get the attributes specific to each row\n",
|
||
" for col in cols:\n",
|
||
" for i, row in enumerate(df[col]):\n",
|
||
" if type(row) != float:\n",
|
||
" row_attrs[i] = row_attrs[i].union([x.strip() for x in row.split(\",\")])\n",
|
||
"\n",
|
||
" for attr in attributes: # Add attributes rows\n",
|
||
" df[attr] = list(attr in row_attrs[i] for i in range(len(df[col])))\n",
|
||
" for col in cols:\n",
|
||
" del df[col]\n",
|
||
"\n",
|
||
" return df.copy()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "fda6aaad-7b1e-4daa-8d28-cd049df9cec2",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"data_features=filter_df(data)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "a1b022a3-a2f9-4e39-9e79-48ae9f6adca5",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Classification"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "94bcde38-784b-41d9-89b0-3e2e17aa2979",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from sklearn import tree"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "7b2b9d3e-d7da-4f43-9e1c-4e62e837ed0b",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"c=tree.DecisionTreeClassifier()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "0b52b082-0554-45f2-9eff-e6a3ba6a8d08",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"c.fit("
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "6d0b0d25-3476-4fbb-84c7-008437e87903",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"data[[\"country\",\"region\"]]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "100a7c2e-2d24-4814-bd68-4b9f6433ce4d",
|
||
"metadata": {},
|
||
"source": [
|
||
"Transformer: la couleur en RGB; la localisation en GPS\n",
|
||
"1ère question: est-ce que la couleur suffit à savoir d'où ça vient ? \n",
|
||
"2ème question: est-ce que si on ajoute le type ça marche ? \n",
|
||
"3ème question: et les caractéristiques gustatives ?\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "24e7ff6e-c308-4cc8-aeac-eeb372f4c479",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"data_features"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "ca969d41-a88a-47d9-b94b-8b633d3d3348",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "e2c9b84f-b899-4c99-abb7-37a9deeafbb5",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "d41c1cfc-1564-4131-8391-c8a8971b9d13",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "42a1e4ed-9a1e-41f8-a322-b5d2de68d24a",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"data[pd.isnull(data[\"country\"])&pd.isnull(data[\"region\"])]\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "038cd38e-3890-4f73-91a7-c30294b3bc5b",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Pattern Mining"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "2e6b0dc1-030c-4239-803f-52736a41bcb5",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "23b75579-95bb-4889-928f-9c3c1309a18a",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"apriori(data)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "61959c04-61bf-464a-89ca-72ec4782f927",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "744f8d1d-0874-4b92-921f-5a85ccf598ad",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "f99af4d2-20e9-4bff-802a-dbdb91f95a96",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "fa04bb73-ba5c-4164-a1af-f061d9627557",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "36ff5c84-93f3-4854-b2c4-e6082859c974",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.12.3"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|