diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7a1ca15 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.ipynb_checkpoints +.virtual_documents \ No newline at end of file diff --git a/.ipynb_checkpoints/cheese-checkpoint.ipynb b/.ipynb_checkpoints/cheese-checkpoint.ipynb deleted file mode 100644 index d9e8d25..0000000 --- a/.ipynb_checkpoints/cheese-checkpoint.ipynb +++ /dev/null @@ -1,3188 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "5f7c9658-c285-4854-96c0-e899fc55421b", - "metadata": {}, - "source": [ - "# DM project: cheese" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "7f4f2b89-8257-468c-9f5e-a77e11b8b8ff", - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "from mlxtend.preprocessing import TransactionEncoder\n", - "from mlxtend.frequent_patterns import apriori\n", - "from geopy.geocoders import Nominatim\n", - "import matplotlib.pyplot as plt\n", - "import time\n", - "import tqdm.notebook as tqdm\n", - "import random\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "1a0afba8-692b-4377-a2ce-5114983e3bbb", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
cheeseurlmilkcountryregionfamilytypefat_contentcalcium_contenttexturerindcolorflavoraromavegetarianvegansynonymsalt_spellingsproducers
0Aarewasserhttps://www.cheese.com/aarewasser/cowSwitzerlandNaNNaNsemi-softNaNNaNbutterywashedyellowsweetbutteryFalseFalseNaNNaNJumi
1Abbaye de Bellochttps://www.cheese.com/abbaye-de-belloc/sheepFrancePays BasqueNaNsemi-hard, artisanNaNNaNcreamy, dense, firmnaturalyellowburnt caramellanolineTrueFalseAbbaye Notre-Dame de BellocNaNNaN
2Abbaye de Belvalhttps://www.cheese.com/abbaye-de-belval/cowFranceNaNNaNsemi-hard40-46%NaNelasticwashedivoryNaNaromaticFalseFalseNaNNaNNaN
3Abbaye de Citeauxhttps://www.cheese.com/abbaye-de-citeaux/cowFranceBurgundyNaNsemi-soft, artisan, brinedNaNNaNcreamy, dense, smoothwashedwhiteacidic, milky, smoothbarnyardy, earthyFalseFalseNaNNaNNaN
4Abbaye de Tamiéhttps://www.cheese.com/tamie/cowFranceSavoieNaNsoft, artisanNaNNaNcreamy, open, smoothwashedwhitefruity, nuttyperfumed, pungentFalseFalseNaNTamié, Trappiste de Tamie, Abbey of TamieNaN
............................................................
1182Sveciaosthttps://www.cheese.com/sveciaost/cowSwedenLow-laying regionsNaNsemi-hard, brined45%NaNcreamy, supplerindlesspale yellowacidicNaNFalseFalseNaNNaNNaN
1183Swaghttps://www.cheese.com/swag/goatAustraliaSouth AustraliaNaNfresh firm, artisanNaNNaNcreamy, crumblyash coatedwhiteacidic, creamyfreshTrueFalseNaNNaNWoodside Cheese Wrights
1184Swaledalehttps://www.cheese.com/swaledale/sheepEnglandSwaledale, North YorkshireNaNhardNaNNaNsemi firmNaNyellowsmooth, sweetfloralTrueFalseSwaledale Sheep CheeseNaNNaN
1185Sweet Style Swisshttps://www.cheese.com/sweet-style-swiss/NaNSwitzerlandNaNNaNsemi-hard, artisanNaNNaNfirm, supplewaxedNaNnuttynutty, sweetFalseFalseNaNNaNNaN
1186Swiss cheesehttps://www.cheese.com/swiss/cowUnited StatesNaNSwiss Cheesehard, artisan, processed7.8 g/100gNaNfirmrindlesspale yellownutty, sweetNaNTrueFalseAmerican Swiss CheeseNaNVarious
\n", - "

1187 rows × 19 columns

\n", - "
" - ], - "text/plain": [ - " cheese url milk \\\n", - "0 Aarewasser https://www.cheese.com/aarewasser/ cow \n", - "1 Abbaye de Belloc https://www.cheese.com/abbaye-de-belloc/ sheep \n", - "2 Abbaye de Belval https://www.cheese.com/abbaye-de-belval/ cow \n", - "3 Abbaye de Citeaux https://www.cheese.com/abbaye-de-citeaux/ cow \n", - "4 Abbaye de Tamié https://www.cheese.com/tamie/ cow \n", - "... ... ... ... \n", - "1182 Sveciaost https://www.cheese.com/sveciaost/ cow \n", - "1183 Swag https://www.cheese.com/swag/ goat \n", - "1184 Swaledale https://www.cheese.com/swaledale/ sheep \n", - "1185 Sweet Style Swiss https://www.cheese.com/sweet-style-swiss/ NaN \n", - "1186 Swiss cheese https://www.cheese.com/swiss/ cow \n", - "\n", - " country region family \\\n", - "0 Switzerland NaN NaN \n", - "1 France Pays Basque NaN \n", - "2 France NaN NaN \n", - "3 France Burgundy NaN \n", - "4 France Savoie NaN \n", - "... ... ... ... \n", - "1182 Sweden Low-laying regions NaN \n", - "1183 Australia South Australia NaN \n", - "1184 England Swaledale, North Yorkshire NaN \n", - "1185 Switzerland NaN NaN \n", - "1186 United States NaN Swiss Cheese \n", - "\n", - " type fat_content calcium_content \\\n", - "0 semi-soft NaN NaN \n", - "1 semi-hard, artisan NaN NaN \n", - "2 semi-hard 40-46% NaN \n", - "3 semi-soft, artisan, brined NaN NaN \n", - "4 soft, artisan NaN NaN \n", - "... ... ... ... \n", - "1182 semi-hard, brined 45% NaN \n", - "1183 fresh firm, artisan NaN NaN \n", - "1184 hard NaN NaN \n", - "1185 semi-hard, artisan NaN NaN \n", - "1186 hard, artisan, processed 7.8 g/100g NaN \n", - "\n", - " texture rind color flavor \\\n", - "0 buttery washed yellow sweet \n", - "1 creamy, dense, firm natural yellow burnt caramel \n", - "2 elastic washed ivory NaN \n", - "3 creamy, dense, smooth washed white acidic, milky, smooth \n", - "4 creamy, open, smooth washed white fruity, nutty \n", - "... ... ... ... ... \n", - "1182 creamy, supple rindless pale yellow acidic \n", - "1183 creamy, crumbly ash coated white acidic, creamy \n", - "1184 semi firm NaN yellow smooth, sweet \n", - "1185 firm, supple waxed NaN nutty \n", - "1186 firm rindless pale yellow nutty, sweet \n", - "\n", - " aroma vegetarian vegan synonyms \\\n", - "0 buttery False False NaN \n", - "1 lanoline True False Abbaye Notre-Dame de Belloc \n", - "2 aromatic False False NaN \n", - "3 barnyardy, earthy False False NaN \n", - "4 perfumed, pungent False False NaN \n", - "... ... ... ... ... \n", - "1182 NaN False False NaN \n", - "1183 fresh True False NaN \n", - "1184 floral True False Swaledale Sheep Cheese \n", - "1185 nutty, sweet False False NaN \n", - "1186 NaN True False American Swiss Cheese \n", - "\n", - " alt_spellings producers \n", - "0 NaN Jumi \n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 Tamié, Trappiste de Tamie, Abbey of Tamie NaN \n", - "... ... ... \n", - "1182 NaN NaN \n", - "1183 NaN Woodside Cheese Wrights \n", - "1184 NaN NaN \n", - "1185 NaN NaN \n", - "1186 NaN Various \n", - "\n", - "[1187 rows x 19 columns]" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data=pd.read_csv(\"cheeses.csv\")\n", - "data" - ] - }, - { - "cell_type": "markdown", - "id": "bf3b548c-5ac4-4126-9ae9-5578ad158015", - "metadata": {}, - "source": [ - "## Cleaning and pre-processing" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "2018aac2-6f3d-489a-b5d0-90b7c7793076", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'blue', 'yellow', 'white', 'golden yellow', 'pink and white', 'orange', 'pale yellow', nan, 'ivory', 'green', 'straw', 'red', 'brownish yellow', 'pale white', 'brown', 'golden orange', 'blue-grey', 'cream'}\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
cheeseurlmilkcountryregionfamilytypefat_contentcalcium_contenttexturerindcolorflavoraromavegetarianvegansynonymsalt_spellingsproducers
10Acapellahttps://www.cheese.com/acapella/goatUnited StatesCaliforniaNaNsoft, soft-ripenedNaNNaNNaNNaNNaNbutteryfresh, herbalFalseFalseNaNNaNNaN
13Acornhttps://www.cheese.com/acorn/sheepUnited KingdomBethaniaNaNhard, artisan52%NaNcrumbly, firmNaNNaNburnt caramel, citrusy, herbaceousfruityTrueFalseNaNNaNNaN
19Afuega'l Pituhttps://www.cheese.com/afuegal-pitu/cowSpainAsturiasNaNsoft, artisanNaNNaNsmoothcloth wrappedNaNspicy, strongNaNFalseFalseNaNNaNNaN
48Alpe di Frabosahttps://www.cheese.com/alpe-di-frabosa/cowItalyNaNNaNsemi-softNaNNaNNaNNaNNaNbittermilky, mushroomFalseFalseNaNNaNNaN
50Alpicrèmehttps://www.cheese.com/alpicreme/goatFranceNaNNaNsoftNaNNaNNaNNaNNaNNaNNaNFalseFalseNaNNaNNaN
............................................................
1172Strathdon Bluehttps://www.cheese.com/strathdon-blue/cowScotlandTainBluesemi-softNaNNaNcreamyNaNNaNcreamy, spicyaromatic, richTrueFalseNaNNaNHighland Fine Cheeses Limited
1175String Cheesehttps://www.cheese.com/string/NaNNaNNaNNaNsemi-hardNaNNaNchewy, firm, stringyNaNNaNNaNNaNNaNNaNNaNNaNNaN
1177Sulgunihttps://www.cheese.com/sulguni/buffalo, cowGeorgiaSvaneti, SamegreloNaNsemi-firmNaNNaNdense, elasticNaNNaNsalty, smokey , sourNaNNaNNaNGeorgian Pickle CheeseMegruli Sulguni, Shebolili Megruli SulguniNaN
1181Sussex Slipcotehttps://www.cheese.com/sussex-slipcote/sheepEnglandNaNNaNsoftNaNNaNNaNNaNNaNsharpNaNTrueFalseNaNNaNHigh Weald Dairy
1185Sweet Style Swisshttps://www.cheese.com/sweet-style-swiss/NaNSwitzerlandNaNNaNsemi-hard, artisanNaNNaNfirm, supplewaxedNaNnuttynutty, sweetFalseFalseNaNNaNNaN
\n", - "

142 rows × 19 columns

\n", - "
" - ], - "text/plain": [ - " cheese url \\\n", - "10 Acapella https://www.cheese.com/acapella/ \n", - "13 Acorn https://www.cheese.com/acorn/ \n", - "19 Afuega'l Pitu https://www.cheese.com/afuegal-pitu/ \n", - "48 Alpe di Frabosa https://www.cheese.com/alpe-di-frabosa/ \n", - "50 Alpicrème https://www.cheese.com/alpicreme/ \n", - "... ... ... \n", - "1172 Strathdon Blue https://www.cheese.com/strathdon-blue/ \n", - "1175 String Cheese https://www.cheese.com/string/ \n", - "1177 Sulguni https://www.cheese.com/sulguni/ \n", - "1181 Sussex Slipcote https://www.cheese.com/sussex-slipcote/ \n", - "1185 Sweet Style Swiss https://www.cheese.com/sweet-style-swiss/ \n", - "\n", - " milk country region family \\\n", - "10 goat United States California NaN \n", - "13 sheep United Kingdom Bethania NaN \n", - "19 cow Spain Asturias NaN \n", - "48 cow Italy NaN NaN \n", - "50 goat France NaN NaN \n", - "... ... ... ... ... \n", - "1172 cow Scotland Tain Blue \n", - "1175 NaN NaN NaN NaN \n", - "1177 buffalo, cow Georgia Svaneti, Samegrelo NaN \n", - "1181 sheep England NaN NaN \n", - "1185 NaN Switzerland NaN NaN \n", - "\n", - " type fat_content calcium_content texture \\\n", - "10 soft, soft-ripened NaN NaN NaN \n", - "13 hard, artisan 52% NaN crumbly, firm \n", - "19 soft, artisan NaN NaN smooth \n", - "48 semi-soft NaN NaN NaN \n", - "50 soft NaN NaN NaN \n", - "... ... ... ... ... \n", - "1172 semi-soft NaN NaN creamy \n", - "1175 semi-hard NaN NaN chewy, firm, stringy \n", - "1177 semi-firm NaN NaN dense, elastic \n", - "1181 soft NaN NaN NaN \n", - "1185 semi-hard, artisan NaN NaN firm, supple \n", - "\n", - " rind color flavor \\\n", - "10 NaN NaN buttery \n", - "13 NaN NaN burnt caramel, citrusy, herbaceous \n", - "19 cloth wrapped NaN spicy, strong \n", - "48 NaN NaN bitter \n", - "50 NaN NaN NaN \n", - "... ... ... ... \n", - "1172 NaN NaN creamy, spicy \n", - "1175 NaN NaN NaN \n", - "1177 NaN NaN salty, smokey , sour \n", - "1181 NaN NaN sharp \n", - "1185 waxed NaN nutty \n", - "\n", - " aroma vegetarian vegan synonyms \\\n", - "10 fresh, herbal False False NaN \n", - "13 fruity True False NaN \n", - "19 NaN False False NaN \n", - "48 milky, mushroom False False NaN \n", - "50 NaN False False NaN \n", - "... ... ... ... ... \n", - "1172 aromatic, rich True False NaN \n", - "1175 NaN NaN NaN NaN \n", - "1177 NaN NaN NaN Georgian Pickle Cheese \n", - "1181 NaN True False NaN \n", - "1185 nutty, sweet False False NaN \n", - "\n", - " alt_spellings \\\n", - "10 NaN \n", - "13 NaN \n", - "19 NaN \n", - "48 NaN \n", - "50 NaN \n", - "... ... \n", - "1172 NaN \n", - "1175 NaN \n", - "1177 Megruli Sulguni, Shebolili Megruli Sulguni \n", - "1181 NaN \n", - "1185 NaN \n", - "\n", - " producers \n", - "10 NaN \n", - "13 NaN \n", - "19 NaN \n", - "48 NaN \n", - "50 NaN \n", - "... ... \n", - "1172 Highland Fine Cheeses Limited \n", - "1175 NaN \n", - "1177 NaN \n", - "1181 High Weald Dairy \n", - "1185 NaN \n", - "\n", - "[142 rows x 19 columns]" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "print(set(data[\"color\"]))\n", - "data[pd.isnull(data[\"color\"])]" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "a0a77563-518e-4808-b744-9fc0c76763fe", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1162\n", - "939\n" - ] - } - ], - "source": [ - "print(len(data[pd.isnull(data[\"calcium_content\"])]))\n", - "print(len(data[pd.isnull(data[\"fat_content\"])]))" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "c8489ffa-1067-4eb7-b65a-2fa18fdb4b04", - "metadata": {}, - "outputs": [], - "source": [ - "del data[\"alt_spellings\"]\n", - "del data[\"producers\"]\n", - "del data[\"calcium_content\"]\n", - "del data[\"url\"]\n", - "del data[\"fat_content\"]\n", - "del data[\"synonyms\"]" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "5379265a-cd49-41fa-845c-bfae33bb8f5a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
cheesemilkcountryregionfamilytypetexturerindcolorflavoraromavegetarianvegan
0AarewassercowSwitzerlandNaNNaNsemi-softbutterywashedyellowsweetbutteryFalseFalse
1Abbaye de BellocsheepFrancePays BasqueNaNsemi-hard, artisancreamy, dense, firmnaturalyellowburnt caramellanolineTrueFalse
2Abbaye de BelvalcowFranceNaNNaNsemi-hardelasticwashedivoryNaNaromaticFalseFalse
3Abbaye de CiteauxcowFranceBurgundyNaNsemi-soft, artisan, brinedcreamy, dense, smoothwashedwhiteacidic, milky, smoothbarnyardy, earthyFalseFalse
4Abbaye de TamiécowFranceSavoieNaNsoft, artisancreamy, open, smoothwashedwhitefruity, nuttyperfumed, pungentFalseFalse
..........................................
1182SveciaostcowSwedenLow-laying regionsNaNsemi-hard, brinedcreamy, supplerindlesspale yellowacidicNaNFalseFalse
1183SwaggoatAustraliaSouth AustraliaNaNfresh firm, artisancreamy, crumblyash coatedwhiteacidic, creamyfreshTrueFalse
1184SwaledalesheepEnglandSwaledale, North YorkshireNaNhardsemi firmNaNyellowsmooth, sweetfloralTrueFalse
1185Sweet Style SwissNaNSwitzerlandNaNNaNsemi-hard, artisanfirm, supplewaxedNaNnuttynutty, sweetFalseFalse
1186Swiss cheesecowUnited StatesNaNSwiss Cheesehard, artisan, processedfirmrindlesspale yellownutty, sweetNaNTrueFalse
\n", - "

1187 rows × 13 columns

\n", - "
" - ], - "text/plain": [ - " cheese milk country region \\\n", - "0 Aarewasser cow Switzerland NaN \n", - "1 Abbaye de Belloc sheep France Pays Basque \n", - "2 Abbaye de Belval cow France NaN \n", - "3 Abbaye de Citeaux cow France Burgundy \n", - "4 Abbaye de Tamié cow France Savoie \n", - "... ... ... ... ... \n", - "1182 Sveciaost cow Sweden Low-laying regions \n", - "1183 Swag goat Australia South Australia \n", - "1184 Swaledale sheep England Swaledale, North Yorkshire \n", - "1185 Sweet Style Swiss NaN Switzerland NaN \n", - "1186 Swiss cheese cow United States NaN \n", - "\n", - " family type texture \\\n", - "0 NaN semi-soft buttery \n", - "1 NaN semi-hard, artisan creamy, dense, firm \n", - "2 NaN semi-hard elastic \n", - "3 NaN semi-soft, artisan, brined creamy, dense, smooth \n", - "4 NaN soft, artisan creamy, open, smooth \n", - "... ... ... ... \n", - "1182 NaN semi-hard, brined creamy, supple \n", - "1183 NaN fresh firm, artisan creamy, crumbly \n", - "1184 NaN hard semi firm \n", - "1185 NaN semi-hard, artisan firm, supple \n", - "1186 Swiss Cheese hard, artisan, processed firm \n", - "\n", - " rind color flavor aroma \\\n", - "0 washed yellow sweet buttery \n", - "1 natural yellow burnt caramel lanoline \n", - "2 washed ivory NaN aromatic \n", - "3 washed white acidic, milky, smooth barnyardy, earthy \n", - "4 washed white fruity, nutty perfumed, pungent \n", - "... ... ... ... ... \n", - "1182 rindless pale yellow acidic NaN \n", - "1183 ash coated white acidic, creamy fresh \n", - "1184 NaN yellow smooth, sweet floral \n", - "1185 waxed NaN nutty nutty, sweet \n", - "1186 rindless pale yellow nutty, sweet NaN \n", - "\n", - " vegetarian vegan \n", - "0 False False \n", - "1 True False \n", - "2 False False \n", - "3 False False \n", - "4 False False \n", - "... ... ... \n", - "1182 False False \n", - "1183 True False \n", - "1184 True False \n", - "1185 False False \n", - "1186 True False \n", - "\n", - "[1187 rows x 13 columns]" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "633ed80e-e416-41f6-ae58-b86ce4c132af", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1181 rows remaining\n" - ] - } - ], - "source": [ - "data=data.dropna(subset=[\"country\",\"region\"], how=\"all\")\n", - "data=data.fillna(value={\"country\":\"\"})\n", - "data=data.fillna(value={\"region\":\"\"})\n", - "print(f\"{len(data)} rows remaining\")" - ] - }, - { - "cell_type": "markdown", - "id": "fd66568f-78d4-4e1a-a91c-8ec483b4b03c", - "metadata": {}, - "source": [ - "We removed 6 rows for which we could not find a suitable location. " - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "5a4c0e30-8535-498b-9a9e-0d7d232d4eb7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
cheesemilkcountryregionfamilytypetexturerindcolorflavoraromavegetarianvegan
0AarewassercowSwitzerlandNaNsemi-softbutterywashedyellowsweetbutteryFalseFalse
1Abbaye de BellocsheepFrancePays BasqueNaNsemi-hard, artisancreamy, dense, firmnaturalyellowburnt caramellanolineTrueFalse
2Abbaye de BelvalcowFranceNaNsemi-hardelasticwashedivoryNaNaromaticFalseFalse
3Abbaye de CiteauxcowFranceBurgundyNaNsemi-soft, artisan, brinedcreamy, dense, smoothwashedwhiteacidic, milky, smoothbarnyardy, earthyFalseFalse
4Abbaye de TamiécowFranceSavoieNaNsoft, artisancreamy, open, smoothwashedwhitefruity, nuttyperfumed, pungentFalseFalse
..........................................
1182SveciaostcowSwedenLow-laying regionsNaNsemi-hard, brinedcreamy, supplerindlesspale yellowacidicNaNFalseFalse
1183SwaggoatAustraliaSouth AustraliaNaNfresh firm, artisancreamy, crumblyash coatedwhiteacidic, creamyfreshTrueFalse
1184SwaledalesheepEnglandSwaledale, North YorkshireNaNhardsemi firmNaNyellowsmooth, sweetfloralTrueFalse
1185Sweet Style SwissNaNSwitzerlandNaNsemi-hard, artisanfirm, supplewaxedNaNnuttynutty, sweetFalseFalse
1186Swiss cheesecowUnited StatesSwiss Cheesehard, artisan, processedfirmrindlesspale yellownutty, sweetNaNTrueFalse
\n", - "

1181 rows × 13 columns

\n", - "
" - ], - "text/plain": [ - " cheese milk country region \\\n", - "0 Aarewasser cow Switzerland \n", - "1 Abbaye de Belloc sheep France Pays Basque \n", - "2 Abbaye de Belval cow France \n", - "3 Abbaye de Citeaux cow France Burgundy \n", - "4 Abbaye de Tamié cow France Savoie \n", - "... ... ... ... ... \n", - "1182 Sveciaost cow Sweden Low-laying regions \n", - "1183 Swag goat Australia South Australia \n", - "1184 Swaledale sheep England Swaledale, North Yorkshire \n", - "1185 Sweet Style Swiss NaN Switzerland \n", - "1186 Swiss cheese cow United States \n", - "\n", - " family type texture \\\n", - "0 NaN semi-soft buttery \n", - "1 NaN semi-hard, artisan creamy, dense, firm \n", - "2 NaN semi-hard elastic \n", - "3 NaN semi-soft, artisan, brined creamy, dense, smooth \n", - "4 NaN soft, artisan creamy, open, smooth \n", - "... ... ... ... \n", - "1182 NaN semi-hard, brined creamy, supple \n", - "1183 NaN fresh firm, artisan creamy, crumbly \n", - "1184 NaN hard semi firm \n", - "1185 NaN semi-hard, artisan firm, supple \n", - "1186 Swiss Cheese hard, artisan, processed firm \n", - "\n", - " rind color flavor aroma \\\n", - "0 washed yellow sweet buttery \n", - "1 natural yellow burnt caramel lanoline \n", - "2 washed ivory NaN aromatic \n", - "3 washed white acidic, milky, smooth barnyardy, earthy \n", - "4 washed white fruity, nutty perfumed, pungent \n", - "... ... ... ... ... \n", - "1182 rindless pale yellow acidic NaN \n", - "1183 ash coated white acidic, creamy fresh \n", - "1184 NaN yellow smooth, sweet floral \n", - "1185 waxed NaN nutty nutty, sweet \n", - "1186 rindless pale yellow nutty, sweet NaN \n", - "\n", - " vegetarian vegan \n", - "0 False False \n", - "1 True False \n", - "2 False False \n", - "3 False False \n", - "4 False False \n", - "... ... ... \n", - "1182 False False \n", - "1183 True False \n", - "1184 True False \n", - "1185 False False \n", - "1186 True False \n", - "\n", - "[1181 rows x 13 columns]" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "7ef7494b-ff08-40a5-890f-e0f718cf2842", - "metadata": {}, - "outputs": [], - "source": [ - "data.loc[data.country.str.contains(\"England, Great Britain, United Kingdom\")|data.country.str.contains(\"England, United Kingdom\"),\"country\"]=\"England\"\n", - "data.loc[data.country.str.contains(\"Scotland\"),\"country\"]=\"Scotland\"\n", - "data.loc[data.country.str.contains(\"Great Britain, United Kingdom, Wales\")|data.country.str.contains(\"United Kingdom, Wales\"),\"country\"]=\"Wales\"\n" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "fb044984-c33c-492c-91a2-4e9fff29ceb3", - "metadata": {}, - "outputs": [], - "source": [ - "data=data.drop(index=data[data[\"country\"].str.contains(\",\")].index)\n", - "data=data.drop(index=data[data[\"country\"].str.contains(\" and \")].index)" - ] - }, - { - "cell_type": "markdown", - "id": "2f42c973-247a-4f51-947e-fbd76f8f12fc", - "metadata": {}, - "source": [ - "We removed 41 cheeses because they can come froms several countries. " - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "59c4e6e7-d624-45a5-a9ea-eb375102b771", - "metadata": {}, - "outputs": [], - "source": [ - "data[\"location\"]=data[\"region\"]+\", \"+data[\"country\"]" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "0dee0f25-4699-4e46-97d0-21bb36d9c603", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
cheesemilkcountryregionfamilytypetexturerindcolorflavoraromavegetarianveganlocation
0AarewassercowSwitzerlandNaNsemi-softbutterywashedyellowsweetbutteryFalseFalse, Switzerland
1Abbaye de BellocsheepFrancePays BasqueNaNsemi-hard, artisancreamy, dense, firmnaturalyellowburnt caramellanolineTrueFalsePays Basque, France
2Abbaye de BelvalcowFranceNaNsemi-hardelasticwashedivoryNaNaromaticFalseFalse, France
3Abbaye de CiteauxcowFranceBurgundyNaNsemi-soft, artisan, brinedcreamy, dense, smoothwashedwhiteacidic, milky, smoothbarnyardy, earthyFalseFalseBurgundy, France
4Abbaye de TamiécowFranceSavoieNaNsoft, artisancreamy, open, smoothwashedwhitefruity, nuttyperfumed, pungentFalseFalseSavoie, France
.............................................
1182SveciaostcowSwedenLow-laying regionsNaNsemi-hard, brinedcreamy, supplerindlesspale yellowacidicNaNFalseFalseLow-laying regions, Sweden
1183SwaggoatAustraliaSouth AustraliaNaNfresh firm, artisancreamy, crumblyash coatedwhiteacidic, creamyfreshTrueFalseSouth Australia, Australia
1184SwaledalesheepEnglandSwaledale, North YorkshireNaNhardsemi firmNaNyellowsmooth, sweetfloralTrueFalseSwaledale, North Yorkshire, England
1185Sweet Style SwissNaNSwitzerlandNaNsemi-hard, artisanfirm, supplewaxedNaNnuttynutty, sweetFalseFalse, Switzerland
1186Swiss cheesecowUnited StatesSwiss Cheesehard, artisan, processedfirmrindlesspale yellownutty, sweetNaNTrueFalse, United States
\n", - "

1140 rows × 14 columns

\n", - "
" - ], - "text/plain": [ - " cheese milk country region \\\n", - "0 Aarewasser cow Switzerland \n", - "1 Abbaye de Belloc sheep France Pays Basque \n", - "2 Abbaye de Belval cow France \n", - "3 Abbaye de Citeaux cow France Burgundy \n", - "4 Abbaye de Tamié cow France Savoie \n", - "... ... ... ... ... \n", - "1182 Sveciaost cow Sweden Low-laying regions \n", - "1183 Swag goat Australia South Australia \n", - "1184 Swaledale sheep England Swaledale, North Yorkshire \n", - "1185 Sweet Style Swiss NaN Switzerland \n", - "1186 Swiss cheese cow United States \n", - "\n", - " family type texture \\\n", - "0 NaN semi-soft buttery \n", - "1 NaN semi-hard, artisan creamy, dense, firm \n", - "2 NaN semi-hard elastic \n", - "3 NaN semi-soft, artisan, brined creamy, dense, smooth \n", - "4 NaN soft, artisan creamy, open, smooth \n", - "... ... ... ... \n", - "1182 NaN semi-hard, brined creamy, supple \n", - "1183 NaN fresh firm, artisan creamy, crumbly \n", - "1184 NaN hard semi firm \n", - "1185 NaN semi-hard, artisan firm, supple \n", - "1186 Swiss Cheese hard, artisan, processed firm \n", - "\n", - " rind color flavor aroma \\\n", - "0 washed yellow sweet buttery \n", - "1 natural yellow burnt caramel lanoline \n", - "2 washed ivory NaN aromatic \n", - "3 washed white acidic, milky, smooth barnyardy, earthy \n", - "4 washed white fruity, nutty perfumed, pungent \n", - "... ... ... ... ... \n", - "1182 rindless pale yellow acidic NaN \n", - "1183 ash coated white acidic, creamy fresh \n", - "1184 NaN yellow smooth, sweet floral \n", - "1185 waxed NaN nutty nutty, sweet \n", - "1186 rindless pale yellow nutty, sweet NaN \n", - "\n", - " vegetarian vegan location \n", - "0 False False , Switzerland \n", - "1 True False Pays Basque, France \n", - "2 False False , France \n", - "3 False False Burgundy, France \n", - "4 False False Savoie, France \n", - "... ... ... ... \n", - "1182 False False Low-laying regions, Sweden \n", - "1183 True False South Australia, Australia \n", - "1184 True False Swaledale, North Yorkshire, England \n", - "1185 False False , Switzerland \n", - "1186 True False , United States \n", - "\n", - "[1140 rows x 14 columns]" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2ef7351c-f117-403b-bc6e-f9f30a98c9d2", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "353724eb-8d64-4b64-84c6-f06be36acd8b", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "54d56bd4-c83a-4e8c-8751-b4b2f7830a9e", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bd68f1bb-c9f6-4c57-951b-8ac1f3192f09", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "eeff487e-9b66-4c4b-b4f6-dc5352fb2144", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cdb0d04c-e0f2-4553-8906-e9282f4942d2", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a551e0f4-3f99-4dae-9b31-6205b772ebf5", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7c02cc29-fe07-4ff9-8c6b-8638d37830cd", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "de579631-a29c-4620-9bbf-7085b83d16b7", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "e2e868d4-33a1-4602-af97-afb1d29e612f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{', Afghanistan',\n", - " ', Argentina',\n", - " ', Armenia',\n", - " ', Australia',\n", - " ', Austria',\n", - " ', Belgium',\n", - " ', Brazil',\n", - " ', Canada',\n", - " ', Cyprus',\n", - " ', Denmark',\n", - " ', England',\n", - " ', France',\n", - " ', Germany',\n", - " ', Greece',\n", - " ', Holland',\n", - " ', Hungary',\n", - " ', Iceland',\n", - " ', Iraq',\n", - " ', Ireland',\n", - " ', Israel',\n", - " ', Italy',\n", - " ', Mauritania',\n", - " ', Mexico',\n", - " ', Middle East',\n", - " ', Mongolia',\n", - " ', Netherlands',\n", - " ', New Zealand',\n", - " ', Poland',\n", - " ', Portugal',\n", - " ', Scotland',\n", - " ', Serbia',\n", - " ', Spain',\n", - " ', Sweden',\n", - " ', Switzerland',\n", - " ', Turkey',\n", - " ', United Kingdom',\n", - " ', United States',\n", - " ', Wales',\n", - " 'Aberdeenshire, Scotland',\n", - " 'Aconcagua, Chile',\n", - " 'Adamstown, Co Wexford, Ireland',\n", - " 'Airedale farming district, New Zealand',\n", - " 'Alba, Italy',\n", - " 'All Holland, Netherlands',\n", - " 'Allagau, Bavarian Alps, Germany',\n", - " 'Allgaeu Alps, Germany',\n", - " 'Allgau, Germany',\n", - " 'Allgäu, Germany',\n", - " 'Amou, Gascony, France',\n", - " 'Anjou, France',\n", - " 'Ann Arbor, MI, United States',\n", - " 'Ann Arbor, Michigan, United States',\n", - " 'Aquitaine, France',\n", - " 'Asiago, Italy',\n", - " 'Asturias, Spain',\n", - " 'Auvergne, France',\n", - " 'Auvergne, Salers, France',\n", - " 'Averyon, France',\n", - " 'Avesnes, France',\n", - " 'Aveyron, France',\n", - " 'Aveyron, Laguiole, France',\n", - " 'Avila, Spain',\n", - " 'Azores, Portugal',\n", - " 'Ballarat, Victoria, Australia',\n", - " 'Banks Peninsular in Canterbury, New Zealand',\n", - " 'Banon, France',\n", - " 'Barcelona, Spain',\n", - " 'Bas-Languedoc, Comtat Venaissin, France',\n", - " 'Basilicata, Italy',\n", - " 'Basque, Pyrenees Mountains, France',\n", - " 'Bavaria, Germany',\n", - " 'Beara Peninsula, Co. Cork, Ireland',\n", - " 'Beira Baixa Province, Portugal',\n", - " 'Belvederis, Lithuania',\n", - " 'Bergues, France',\n", - " 'Bermondsey, London, England',\n", - " 'Berry, France',\n", - " 'Bethania, United Kingdom',\n", - " 'Bjurholm, Sweden',\n", - " 'Blarney, Ireland',\n", - " 'Bloomdale, United States',\n", - " 'Bornholm, Denmark',\n", - " 'Bourgogne, France',\n", - " 'Bregenzerwald, Kleinwalsertal, Großwalsertal, Laiblachtal (Pfänderstock) and Rheintal, Austria',\n", - " 'Brickhill, Co. Clare, Ireland',\n", - " 'Brisbane, Australia',\n", - " 'British Columbia, Canada',\n", - " 'Brittany, France',\n", - " 'Brooklyn NY, United States',\n", - " 'Burgund, France',\n", - " 'Burgundy, France',\n", - " 'Bursa, Turkey',\n", - " 'Buxton, Derbyshire, England',\n", - " 'Béarnaise in Pyrénées-Atlantique, France',\n", - " 'Calabria, Italy',\n", - " 'California, United States',\n", - " 'Campania, Italy',\n", - " 'Campania, Paestum, Foggia, Italy',\n", - " 'Canary Islands, Spain',\n", - " 'Canton of Glarus, Switzerland',\n", - " 'Carmarthenshire, Wales',\n", - " 'Carneros, Sonoma, California, United States',\n", - " 'Carnia, Italy',\n", - " 'Carrigtwohill, ',\n", - " 'Carrigtwohill, Ireland',\n", - " 'Castelo Branco, Fundão and Idanha-a-Nova, Portugal',\n", - " 'Castile-Leon, Spain',\n", - " 'Castilla Leon, Spain',\n", - " 'Castille-Leon, Spain',\n", - " 'Central Balkan Mountains, Bulgaria',\n", - " 'Central and Western Macedonia, Thessalia, Greece',\n", - " 'Central and Western Macedonia, Thessaly, Greece',\n", - " 'Centre , the department of Loiret, France',\n", - " 'Centre-Val de Loire, France',\n", - " 'Ceredigion, United Kingdom',\n", - " 'Cevenes, France',\n", - " 'Charentes, France',\n", - " 'Charentes-Poitou, France',\n", - " 'Charm, Ohio, United States',\n", - " 'Chelmarsh, Bridgnorth, Shropshire, England',\n", - " 'Cheshire, England',\n", - " 'Chirac, France',\n", - " 'Co Clare, Ireland',\n", - " 'Co Limerick, Ireland',\n", - " 'Co. Carlow, Ireland',\n", - " 'Co. Cork, Ireland',\n", - " 'Co. Mayo, Ireland',\n", - " 'Co. Offaly, Ireland',\n", - " 'Coast of Oregon, United States',\n", - " 'Colby, Wisconsin, United States',\n", - " 'Colorado, United States',\n", - " 'Comox Valley, Vancouver Island, Canada',\n", - " 'Coquet, England',\n", - " 'Cornwall, ',\n", - " 'Cornwall, England',\n", - " 'Corsica, France',\n", - " 'Cotherstone, England',\n", - " 'Cotswolds, England',\n", - " 'County Antrim, Ireland',\n", - " 'County Carlow, Ireland',\n", - " 'County Cavan, Ireland',\n", - " 'County Tipperary, Clogheen, Ireland',\n", - " 'County Wexford, Ireland',\n", - " 'Croisy-sur-Eure, France',\n", - " 'Crotone, Italy',\n", - " 'Cumbrian, United Kingdom',\n", - " 'Dalmatia, Croatia',\n", - " 'Derbyshire, Leicestershire, Nottinghamshire, England',\n", - " 'Devon, England',\n", - " 'Dorset, England',\n", - " 'Duhallow, Ireland',\n", - " 'Dumfries, Scotland',\n", - " 'Dumfriesshire, Scotland',\n", - " 'East Midlands, England',\n", - " 'East Sussex, United Kingdom',\n", - " 'Emilia Romagna, Italy',\n", - " 'Emilia-Romagna, Italy',\n", - " 'Extremadura, Spain',\n", - " 'Fairview, United States',\n", - " 'Fethard, Co Tipperary, Ireland',\n", - " 'Fife, Scotland',\n", - " 'Flanders, Belgium',\n", - " 'Fornells de la Selva, Gironès, Spain',\n", - " 'Franche Comté, France',\n", - " 'French Basque Country, Midi-Pyrénées, France',\n", - " 'Friuli Venezia Giulia and Veneto, Italy',\n", - " 'Friuli-Venezia Giulia and the Veneto, Italy',\n", - " 'Friuli-Venezia Giulia, Italy',\n", - " 'Galax, Virginia, United States',\n", - " 'Galicia, Spain',\n", - " 'Georgia, United States',\n", - " 'Gevrey-Chambertin, Burgundy, France',\n", - " 'Gippsland, Victoria, Australia',\n", - " 'Gloucestershire County, England',\n", - " 'Gloucestershire, England',\n", - " 'Gravina in Puglia, Murgia, Italy',\n", - " 'Greensboro, VT, United States',\n", - " 'Greenville, Indiana, United States',\n", - " 'Gujarat, India',\n", - " 'Gâtinais, France',\n", - " 'Hamilton, New Zealand',\n", - " 'Haute Vienne, France',\n", - " 'Haute-Savoie / Upper Savoy, France',\n", - " 'Herault, France',\n", - " 'Herefordshire, West Midlands, United Kingdom',\n", - " 'Het Groene Hart, Netherlands',\n", - " 'Huizen, Netherlands',\n", - " 'Hunter Valley, Australia',\n", - " 'Ile de France, France',\n", - " 'Ile-de-France/Champagne, France',\n", - " 'Illinois, United States',\n", - " 'Illoud (Haute-Marne), France',\n", - " 'Inagh, Co Clare, ',\n", - " 'Inagh, Co Clare, Ireland',\n", - " 'Indiana, United States',\n", - " 'Iowa, United States',\n", - " 'Isere, France',\n", - " 'Island of Pag, Croatia',\n", - " 'Jura, Switzerland',\n", - " 'Karlovy Vary, Czech Republic',\n", - " 'Kent, United Kingdom',\n", - " 'Kilmallock County Limerick, Ireland',\n", - " 'Kimball, United States',\n", - " 'Kinfauns, Perthshire, Scotland',\n", - " 'La Velle, Wisconsin, United States',\n", - " 'Lanarkshire, Scotland',\n", - " 'Landford, England',\n", - " 'Landshut, Germany',\n", - " 'Languedoc, France',\n", - " 'Languedoc-Roussillon, France',\n", - " 'Lapland, Finland',\n", - " 'Laqueuille, France',\n", - " 'Laruns, France',\n", - " 'Larzac, France',\n", - " 'Lazio, Sardinia, Italy',\n", - " 'Lebanon, CT, United States',\n", - " 'Leiden, Netherlands',\n", - " 'Lincolnshire, England',\n", - " 'Lodi, Italy',\n", - " 'Loire Valley, France',\n", - " 'Loire, France',\n", - " 'Lombardy, Italy',\n", - " 'Low-laying regions, Sweden',\n", - " 'Lower Normandy, France',\n", - " \"Lucerne, Schwyz, Unterwald, and Zoug, and the following additional places: Muri district in d'Argovi, Switzerland\",\n", - " 'Macedonia, Thrace, Thessalia, Peloponissos, Ionian Islands, Aegean islands, Crete Island and Epirus, Greece',\n", - " 'Maine, United States',\n", - " 'Manitoba, Canada',\n", - " 'Mankato, MN, United States',\n", - " 'Marathon, NY, United States',\n", - " 'Maribo, Denmark',\n", - " 'Massachusetts, United States',\n", - " 'Menorca, Balearic Islands, Spain',\n", - " 'Midi-Pyrenees, France',\n", - " 'Midi-Pyrénées, France',\n", - " 'Milford, NJ, United States',\n", - " 'Minas Gerais, Brazil',\n", - " 'Minnesota, United States',\n", - " 'Missouri, United States',\n", - " 'Modena, Italy',\n", - " 'Moliterno, Italy',\n", - " 'Mols, Denmark',\n", - " 'Monterey, California, United States',\n", - " 'Mornington Peninsula, Melbourne, Australia',\n", - " 'Murazzano, Italy',\n", - " 'Murcia, Spain',\n", - " 'NY, United States',\n", - " 'Naples, Italy',\n", - " 'New Hampshire, United States',\n", - " 'New Jersey, United States',\n", - " 'New South Wales, Australia',\n", - " 'New York, France',\n", - " 'New York, United States',\n", - " 'Nicasio, United States',\n", - " 'Nord-Pas-de-Calais, France',\n", - " 'Normandy, Auvilliers, France',\n", - " 'Normandy, France',\n", - " 'North Carolina, United States',\n", - " 'North Cornwall, England',\n", - " 'North East Victoria, ',\n", - " 'North East Victoria, Australia',\n", - " 'North Wootton, England',\n", - " 'North Yorkshire, England',\n", - " 'Northeastern Brazil, Brazil',\n", - " 'Northern Holland, Netherlands',\n", - " 'Northern Wisconsin, United States',\n", - " 'Northwest, United States',\n", - " 'Nottinghamshire, England',\n", - " 'Odell, Bedfordshire, England',\n", - " 'Ontario, Canada',\n", - " 'Oregon Coast Range, United States',\n", - " 'Oregon, United States',\n", - " 'Oristano, Italy',\n", - " 'Orkney Islands, Scotland',\n", - " 'Orkney Isles, Scotland',\n", - " 'Oviken, Sweden',\n", - " 'Oxfordshire, Great Britain',\n", - " 'Passendale, Belgium',\n", - " 'Pays Basque, France',\n", - " 'Pays d’Auge, Normandy, France',\n", - " 'Peekskill, United States',\n", - " 'Pembrokeshire, United Kingdom',\n", - " 'Pembrokeshire, Wales',\n", - " 'Pennsylvania, United States',\n", - " 'Pesaro-Urbino, Italy',\n", - " 'Petaluma, California, United States',\n", - " 'Piave Valley, Italy, Italy',\n", - " 'Piedmont, Italy',\n", - " 'Piemonte, Italy',\n", - " 'Pienza, Italy',\n", - " 'Pinconning, Michigan, United States',\n", - " 'Piora Valley, Switzerland',\n", - " 'Po valley region, Italy',\n", - " 'Poitou-Charentes, France',\n", - " 'Pokolbin, Hunter Valley, Australia',\n", - " 'Port Townsend, United States',\n", - " 'Postel, Belgium',\n", - " 'Prince Edward County, Ontario, Canada',\n", - " 'Prince Edward Island, Canada',\n", - " 'Provencale, France',\n", - " 'Provence, France',\n", - " 'Puimichel in Provence Alpes, France',\n", - " 'Pullman, Washington, United States',\n", - " 'Pyrenees, France',\n", - " 'Pyrenees-Atlantiques, France',\n", - " 'Pyrénées, France',\n", - " 'Pyrénées-Atlantiques, France',\n", - " 'Póvoa de Lanhoso, Portugal',\n", - " 'Quebec, Canada',\n", - " 'Queenstown, New Zealand',\n", - " 'Québec, Canada',\n", - " 'Rhone Valley, France',\n", - " 'Rhone-Alps, France',\n", - " 'Rhône-Alpes, France',\n", - " 'Richfield, Wisconsin, United States',\n", - " 'Rio Grande do Sul, Brazil',\n", - " 'Romanian Carpathians, Romania',\n", - " 'Roncq, France',\n", - " 'Roxburghshire, Scotland',\n", - " 'Sardegna, Italy',\n", - " 'Sardinia & Campania, Italy',\n", - " 'Savoie, France',\n", - " 'Schoonrewoerd, Leerdam, Netherlands',\n", - " 'Seattle, Washington, United States',\n", - " 'Sebastopol, California, United States',\n", - " 'Serra da Canastra, Minas Gerais state, Brazil',\n", - " 'Serra da Estrela, Portugal',\n", - " 'Setubal, Palmela and Sesimbra, Portugal',\n", - " 'Severn Valley, England',\n", - " 'Shelburne Farms, United States',\n", - " 'Somerset, England',\n", - " 'Sonoma, California, United States',\n", - " 'South Australia, Australia',\n", - " 'South East England, United Kingdom',\n", - " 'South West England, England',\n", - " 'South West England, United Kingdom',\n", - " 'Southern California, United States',\n", - " 'Southwestern Wisconsin, United States',\n", - " 'St Antoine, France',\n", - " 'St. Gallen (canton), Tufertschwil, Switzerland',\n", - " 'St. Louis, Missouri, United States',\n", - " 'Staffordshire, England',\n", - " 'Stawley, near Wellington, Somerset, England',\n", - " 'Stewarton, Scotland',\n", - " 'Stonegate, East Sussex, England',\n", - " 'Stoneyford, Ireland',\n", - " 'Stranraer, Scotland',\n", - " 'Sulzberg, Austria',\n", - " 'Svaneti, Samegrelo, Georgia',\n", - " 'Swabia, Germany',\n", - " 'Swaledale, North Yorkshire, England',\n", - " 'Tain, Scotland',\n", - " 'Tasmania, Australia',\n", - " 'Taxco, Mexico',\n", - " 'Tieton, Washington, United States',\n", - " 'Timsbury, Somerset, England',\n", - " 'Timsbury, Somerset, Scotland',\n", - " 'Tipperary, Ireland',\n", - " 'Tomales, California, United States',\n", - " 'Treviso, Veneto, Italy',\n", - " 'Troyes , Aube, France',\n", - " 'Tuscany, Italy',\n", - " 'Umbria, Lazio, Italy',\n", - " 'Upper Corsica, France',\n", - " 'Utah, United States',\n", - " 'Valencia, Spain',\n", - " 'Valpadana, Italy',\n", - " 'Veneto, ',\n", - " 'Veneto, Italy',\n", - " 'Veneto, Trentino, Italy',\n", - " 'Vermont, United States',\n", - " 'Victoria, Australia',\n", - " 'Virginia, United States',\n", - " 'Vorarlberg, Austria',\n", - " 'Västra Götaland, Sweden',\n", - " 'Wales, Great Britain',\n", - " 'Wales, London, Wales',\n", - " 'Wallonia, Belgium',\n", - " 'Websterville, VT, United States',\n", - " 'West Bengal, India',\n", - " 'West Pawlet, VT, United States',\n", - " 'Wigtownshire, Scotland',\n", - " 'Wisconsin, United States',\n", - " 'Zasavica, Serbia',\n", - " 'island wide, Cyprus',\n", - " 'massif des Causses, France',\n", - " 'old Liburnia (Dalmatia), Croatia',\n", - " 'province of Brittany, France',\n", - " 'Äänekoski, Finland'}" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "locs=set(data[\"location\"])\n", - "locs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f3bb9a47-56fa-49c4-8761-0db015944446", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "debb780e-ec13-4502-ac44-6001335e507d", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "eed3ac7b-5283-4d8e-bc26-61e1d821ccaf", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "0043fe0d-e2d2-48f0-8953-ffc3dee52ba6", - "metadata": {}, - "outputs": [], - "source": [ - "def str_to_gps(loc):\n", - " l=loc.split(\",\")\n", - " loc=\",\".join([l[0],l[-1]])# removing details gives less errors while fetching the GPS coordinates\n", - " try:\n", - " res=Nominatim(user_agent=\"dmProject\").geocode(loc) \n", - " return (res.latitude, res.longitude)\n", - " except AttributeError:\n", - " loc=l[-1]\n", - " res=Nominatim(user_agent=\"dmProject\").geocode(loc) \n", - " return (res.latitude, res.longitude)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "710341db-408f-4a4a-a849-65b963582ebc", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "07fbeae8f58240ee921eae823a2a69a0", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/389 [00:00