{ "cells": [ { "cell_type": "markdown", "id": "5f7c9658-c285-4854-96c0-e899fc55421b", "metadata": {}, "source": [ "# DM project: cheese" ] }, { "cell_type": "code", "execution_count": 73, "id": "7f4f2b89-8257-468c-9f5e-a77e11b8b8ff", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from mlxtend.preprocessing import TransactionEncoder\n", "from mlxtend.frequent_patterns import apriori\n", "import geopy\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": null, "id": "38096d49-fe67-4c60-a05e-8ad8c442b19c", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 2, "id": "1a0afba8-692b-4377-a2ce-5114983e3bbb", "metadata": {}, "outputs": [], "source": [ "data=pd.read_csv(\"cheeses.csv\")" ] }, { "cell_type": "code", "execution_count": 3, "id": "06f7d328-7b6a-4193-b155-3d47519a1e9a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cheeseurlmilkcountryregionfamilytypefat_contentcalcium_contenttexturerindcolorflavoraromavegetarianvegansynonymsalt_spellingsproducers
0Aarewasserhttps://www.cheese.com/aarewasser/cowSwitzerlandNaNNaNsemi-softNaNNaNbutterywashedyellowsweetbutteryFalseFalseNaNNaNJumi
1Abbaye de Bellochttps://www.cheese.com/abbaye-de-belloc/sheepFrancePays BasqueNaNsemi-hard, artisanNaNNaNcreamy, dense, firmnaturalyellowburnt caramellanolineTrueFalseAbbaye Notre-Dame de BellocNaNNaN
2Abbaye de Belvalhttps://www.cheese.com/abbaye-de-belval/cowFranceNaNNaNsemi-hard40-46%NaNelasticwashedivoryNaNaromaticFalseFalseNaNNaNNaN
3Abbaye de Citeauxhttps://www.cheese.com/abbaye-de-citeaux/cowFranceBurgundyNaNsemi-soft, artisan, brinedNaNNaNcreamy, dense, smoothwashedwhiteacidic, milky, smoothbarnyardy, earthyFalseFalseNaNNaNNaN
4Abbaye de Tamiéhttps://www.cheese.com/tamie/cowFranceSavoieNaNsoft, artisanNaNNaNcreamy, open, smoothwashedwhitefruity, nuttyperfumed, pungentFalseFalseNaNTamié, Trappiste de Tamie, Abbey of TamieNaN
............................................................
1182Sveciaosthttps://www.cheese.com/sveciaost/cowSwedenLow-laying regionsNaNsemi-hard, brined45%NaNcreamy, supplerindlesspale yellowacidicNaNFalseFalseNaNNaNNaN
1183Swaghttps://www.cheese.com/swag/goatAustraliaSouth AustraliaNaNfresh firm, artisanNaNNaNcreamy, crumblyash coatedwhiteacidic, creamyfreshTrueFalseNaNNaNWoodside Cheese Wrights
1184Swaledalehttps://www.cheese.com/swaledale/sheepEnglandSwaledale, North YorkshireNaNhardNaNNaNsemi firmNaNyellowsmooth, sweetfloralTrueFalseSwaledale Sheep CheeseNaNNaN
1185Sweet Style Swisshttps://www.cheese.com/sweet-style-swiss/NaNSwitzerlandNaNNaNsemi-hard, artisanNaNNaNfirm, supplewaxedNaNnuttynutty, sweetFalseFalseNaNNaNNaN
1186Swiss cheesehttps://www.cheese.com/swiss/cowUnited StatesNaNSwiss Cheesehard, artisan, processed7.8 g/100gNaNfirmrindlesspale yellownutty, sweetNaNTrueFalseAmerican Swiss CheeseNaNVarious
\n", "

1187 rows × 19 columns

\n", "
" ], "text/plain": [ " cheese url milk \\\n", "0 Aarewasser https://www.cheese.com/aarewasser/ cow \n", "1 Abbaye de Belloc https://www.cheese.com/abbaye-de-belloc/ sheep \n", "2 Abbaye de Belval https://www.cheese.com/abbaye-de-belval/ cow \n", "3 Abbaye de Citeaux https://www.cheese.com/abbaye-de-citeaux/ cow \n", "4 Abbaye de Tamié https://www.cheese.com/tamie/ cow \n", "... ... ... ... \n", "1182 Sveciaost https://www.cheese.com/sveciaost/ cow \n", "1183 Swag https://www.cheese.com/swag/ goat \n", "1184 Swaledale https://www.cheese.com/swaledale/ sheep \n", "1185 Sweet Style Swiss https://www.cheese.com/sweet-style-swiss/ NaN \n", "1186 Swiss cheese https://www.cheese.com/swiss/ cow \n", "\n", " country region family \\\n", "0 Switzerland NaN NaN \n", "1 France Pays Basque NaN \n", "2 France NaN NaN \n", "3 France Burgundy NaN \n", "4 France Savoie NaN \n", "... ... ... ... \n", "1182 Sweden Low-laying regions NaN \n", "1183 Australia South Australia NaN \n", "1184 England Swaledale, North Yorkshire NaN \n", "1185 Switzerland NaN NaN \n", "1186 United States NaN Swiss Cheese \n", "\n", " type fat_content calcium_content \\\n", "0 semi-soft NaN NaN \n", "1 semi-hard, artisan NaN NaN \n", "2 semi-hard 40-46% NaN \n", "3 semi-soft, artisan, brined NaN NaN \n", "4 soft, artisan NaN NaN \n", "... ... ... ... \n", "1182 semi-hard, brined 45% NaN \n", "1183 fresh firm, artisan NaN NaN \n", "1184 hard NaN NaN \n", "1185 semi-hard, artisan NaN NaN \n", "1186 hard, artisan, processed 7.8 g/100g NaN \n", "\n", " texture rind color flavor \\\n", "0 buttery washed yellow sweet \n", "1 creamy, dense, firm natural yellow burnt caramel \n", "2 elastic washed ivory NaN \n", "3 creamy, dense, smooth washed white acidic, milky, smooth \n", "4 creamy, open, smooth washed white fruity, nutty \n", "... ... ... ... ... \n", "1182 creamy, supple rindless pale yellow acidic \n", "1183 creamy, crumbly ash coated white acidic, creamy \n", "1184 semi firm NaN yellow smooth, sweet \n", "1185 firm, supple waxed NaN nutty \n", "1186 firm rindless pale yellow nutty, sweet \n", "\n", " aroma vegetarian vegan synonyms \\\n", "0 buttery False False NaN \n", "1 lanoline True False Abbaye Notre-Dame de Belloc \n", "2 aromatic False False NaN \n", "3 barnyardy, earthy False False NaN \n", "4 perfumed, pungent False False NaN \n", "... ... ... ... ... \n", "1182 NaN False False NaN \n", "1183 fresh True False NaN \n", "1184 floral True False Swaledale Sheep Cheese \n", "1185 nutty, sweet False False NaN \n", "1186 NaN True False American Swiss Cheese \n", "\n", " alt_spellings producers \n", "0 NaN Jumi \n", "1 NaN NaN \n", "2 NaN NaN \n", "3 NaN NaN \n", "4 Tamié, Trappiste de Tamie, Abbey of Tamie NaN \n", "... ... ... \n", "1182 NaN NaN \n", "1183 NaN Woodside Cheese Wrights \n", "1184 NaN NaN \n", "1185 NaN NaN \n", "1186 NaN Various \n", "\n", "[1187 rows x 19 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": null, "id": "7d354e9e-3abc-4d6a-9aea-a00a92ce3c3c", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 4, "id": "2018aac2-6f3d-489a-b5d0-90b7c7793076", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'blue',\n", " 'blue-grey',\n", " 'brown',\n", " 'brownish yellow',\n", " 'cream',\n", " 'golden orange',\n", " 'golden yellow',\n", " 'green',\n", " 'ivory',\n", " nan,\n", " 'orange',\n", " 'pale white',\n", " 'pale yellow',\n", " 'pink and white',\n", " 'red',\n", " 'straw',\n", " 'white',\n", " 'yellow'}" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "set(data[\"color\"])" ] }, { "cell_type": "code", "execution_count": 5, "id": "71b1853e-d2a4-4afe-9ad0-4f15689f2e23", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cheeseurlmilkcountryregionfamilytypefat_contentcalcium_contenttexturerindcolorflavoraromavegetarianvegansynonymsalt_spellingsproducers
10Acapellahttps://www.cheese.com/acapella/goatUnited StatesCaliforniaNaNsoft, soft-ripenedNaNNaNNaNNaNNaNbutteryfresh, herbalFalseFalseNaNNaNNaN
13Acornhttps://www.cheese.com/acorn/sheepUnited KingdomBethaniaNaNhard, artisan52%NaNcrumbly, firmNaNNaNburnt caramel, citrusy, herbaceousfruityTrueFalseNaNNaNNaN
19Afuega'l Pituhttps://www.cheese.com/afuegal-pitu/cowSpainAsturiasNaNsoft, artisanNaNNaNsmoothcloth wrappedNaNspicy, strongNaNFalseFalseNaNNaNNaN
48Alpe di Frabosahttps://www.cheese.com/alpe-di-frabosa/cowItalyNaNNaNsemi-softNaNNaNNaNNaNNaNbittermilky, mushroomFalseFalseNaNNaNNaN
50Alpicrèmehttps://www.cheese.com/alpicreme/goatFranceNaNNaNsoftNaNNaNNaNNaNNaNNaNNaNFalseFalseNaNNaNNaN
............................................................
1172Strathdon Bluehttps://www.cheese.com/strathdon-blue/cowScotlandTainBluesemi-softNaNNaNcreamyNaNNaNcreamy, spicyaromatic, richTrueFalseNaNNaNHighland Fine Cheeses Limited
1175String Cheesehttps://www.cheese.com/string/NaNNaNNaNNaNsemi-hardNaNNaNchewy, firm, stringyNaNNaNNaNNaNNaNNaNNaNNaNNaN
1177Sulgunihttps://www.cheese.com/sulguni/buffalo, cowGeorgiaSvaneti, SamegreloNaNsemi-firmNaNNaNdense, elasticNaNNaNsalty, smokey , sourNaNNaNNaNGeorgian Pickle CheeseMegruli Sulguni, Shebolili Megruli SulguniNaN
1181Sussex Slipcotehttps://www.cheese.com/sussex-slipcote/sheepEnglandNaNNaNsoftNaNNaNNaNNaNNaNsharpNaNTrueFalseNaNNaNHigh Weald Dairy
1185Sweet Style Swisshttps://www.cheese.com/sweet-style-swiss/NaNSwitzerlandNaNNaNsemi-hard, artisanNaNNaNfirm, supplewaxedNaNnuttynutty, sweetFalseFalseNaNNaNNaN
\n", "

142 rows × 19 columns

\n", "
" ], "text/plain": [ " cheese url \\\n", "10 Acapella https://www.cheese.com/acapella/ \n", "13 Acorn https://www.cheese.com/acorn/ \n", "19 Afuega'l Pitu https://www.cheese.com/afuegal-pitu/ \n", "48 Alpe di Frabosa https://www.cheese.com/alpe-di-frabosa/ \n", "50 Alpicrème https://www.cheese.com/alpicreme/ \n", "... ... ... \n", "1172 Strathdon Blue https://www.cheese.com/strathdon-blue/ \n", "1175 String Cheese https://www.cheese.com/string/ \n", "1177 Sulguni https://www.cheese.com/sulguni/ \n", "1181 Sussex Slipcote https://www.cheese.com/sussex-slipcote/ \n", "1185 Sweet Style Swiss https://www.cheese.com/sweet-style-swiss/ \n", "\n", " milk country region family \\\n", "10 goat United States California NaN \n", "13 sheep United Kingdom Bethania NaN \n", "19 cow Spain Asturias NaN \n", "48 cow Italy NaN NaN \n", "50 goat France NaN NaN \n", "... ... ... ... ... \n", "1172 cow Scotland Tain Blue \n", "1175 NaN NaN NaN NaN \n", "1177 buffalo, cow Georgia Svaneti, Samegrelo NaN \n", "1181 sheep England NaN NaN \n", "1185 NaN Switzerland NaN NaN \n", "\n", " type fat_content calcium_content texture \\\n", "10 soft, soft-ripened NaN NaN NaN \n", "13 hard, artisan 52% NaN crumbly, firm \n", "19 soft, artisan NaN NaN smooth \n", "48 semi-soft NaN NaN NaN \n", "50 soft NaN NaN NaN \n", "... ... ... ... ... \n", "1172 semi-soft NaN NaN creamy \n", "1175 semi-hard NaN NaN chewy, firm, stringy \n", "1177 semi-firm NaN NaN dense, elastic \n", "1181 soft NaN NaN NaN \n", "1185 semi-hard, artisan NaN NaN firm, supple \n", "\n", " rind color flavor \\\n", "10 NaN NaN buttery \n", "13 NaN NaN burnt caramel, citrusy, herbaceous \n", "19 cloth wrapped NaN spicy, strong \n", "48 NaN NaN bitter \n", "50 NaN NaN NaN \n", "... ... ... ... \n", "1172 NaN NaN creamy, spicy \n", "1175 NaN NaN NaN \n", "1177 NaN NaN salty, smokey , sour \n", "1181 NaN NaN sharp \n", "1185 waxed NaN nutty \n", "\n", " aroma vegetarian vegan synonyms \\\n", "10 fresh, herbal False False NaN \n", "13 fruity True False NaN \n", "19 NaN False False NaN \n", "48 milky, mushroom False False NaN \n", "50 NaN False False NaN \n", "... ... ... ... ... \n", "1172 aromatic, rich True False NaN \n", "1175 NaN NaN NaN NaN \n", "1177 NaN NaN NaN Georgian Pickle Cheese \n", "1181 NaN True False NaN \n", "1185 nutty, sweet False False NaN \n", "\n", " alt_spellings \\\n", "10 NaN \n", "13 NaN \n", "19 NaN \n", "48 NaN \n", "50 NaN \n", "... ... \n", "1172 NaN \n", "1175 NaN \n", "1177 Megruli Sulguni, Shebolili Megruli Sulguni \n", "1181 NaN \n", "1185 NaN \n", "\n", " producers \n", "10 NaN \n", "13 NaN \n", "19 NaN \n", "48 NaN \n", "50 NaN \n", "... ... \n", "1172 Highland Fine Cheeses Limited \n", "1175 NaN \n", "1177 NaN \n", "1181 High Weald Dairy \n", "1185 NaN \n", "\n", "[142 rows x 19 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[pd.isnull(data[\"color\"])]" ] }, { "cell_type": "markdown", "id": "bf3b548c-5ac4-4126-9ae9-5578ad158015", "metadata": {}, "source": [ "## Cleaning" ] }, { "cell_type": "code", "execution_count": 6, "id": "fb5ddb0a-8551-4e7c-971e-00a819ebb4b3", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cheeseurlmilkcountryregionfamilytypefat_contentcalcium_contenttexturerindcolorflavoraromavegetarianvegansynonymsalt_spellingsproducers
10Acapellahttps://www.cheese.com/acapella/goatUnited StatesCaliforniaNaNsoft, soft-ripenedNaNNaNNaNNaNNaNbutteryfresh, herbalFalseFalseNaNNaNNaN
13Acornhttps://www.cheese.com/acorn/sheepUnited KingdomBethaniaNaNhard, artisan52%NaNcrumbly, firmNaNNaNburnt caramel, citrusy, herbaceousfruityTrueFalseNaNNaNNaN
19Afuega'l Pituhttps://www.cheese.com/afuegal-pitu/cowSpainAsturiasNaNsoft, artisanNaNNaNsmoothcloth wrappedNaNspicy, strongNaNFalseFalseNaNNaNNaN
48Alpe di Frabosahttps://www.cheese.com/alpe-di-frabosa/cowItalyNaNNaNsemi-softNaNNaNNaNNaNNaNbittermilky, mushroomFalseFalseNaNNaNNaN
50Alpicrèmehttps://www.cheese.com/alpicreme/goatFranceNaNNaNsoftNaNNaNNaNNaNNaNNaNNaNFalseFalseNaNNaNNaN
............................................................
1172Strathdon Bluehttps://www.cheese.com/strathdon-blue/cowScotlandTainBluesemi-softNaNNaNcreamyNaNNaNcreamy, spicyaromatic, richTrueFalseNaNNaNHighland Fine Cheeses Limited
1175String Cheesehttps://www.cheese.com/string/NaNNaNNaNNaNsemi-hardNaNNaNchewy, firm, stringyNaNNaNNaNNaNNaNNaNNaNNaNNaN
1177Sulgunihttps://www.cheese.com/sulguni/buffalo, cowGeorgiaSvaneti, SamegreloNaNsemi-firmNaNNaNdense, elasticNaNNaNsalty, smokey , sourNaNNaNNaNGeorgian Pickle CheeseMegruli Sulguni, Shebolili Megruli SulguniNaN
1181Sussex Slipcotehttps://www.cheese.com/sussex-slipcote/sheepEnglandNaNNaNsoftNaNNaNNaNNaNNaNsharpNaNTrueFalseNaNNaNHigh Weald Dairy
1185Sweet Style Swisshttps://www.cheese.com/sweet-style-swiss/NaNSwitzerlandNaNNaNsemi-hard, artisanNaNNaNfirm, supplewaxedNaNnuttynutty, sweetFalseFalseNaNNaNNaN
\n", "

142 rows × 19 columns

\n", "
" ], "text/plain": [ " cheese url \\\n", "10 Acapella https://www.cheese.com/acapella/ \n", "13 Acorn https://www.cheese.com/acorn/ \n", "19 Afuega'l Pitu https://www.cheese.com/afuegal-pitu/ \n", "48 Alpe di Frabosa https://www.cheese.com/alpe-di-frabosa/ \n", "50 Alpicrème https://www.cheese.com/alpicreme/ \n", "... ... ... \n", "1172 Strathdon Blue https://www.cheese.com/strathdon-blue/ \n", "1175 String Cheese https://www.cheese.com/string/ \n", "1177 Sulguni https://www.cheese.com/sulguni/ \n", "1181 Sussex Slipcote https://www.cheese.com/sussex-slipcote/ \n", "1185 Sweet Style Swiss https://www.cheese.com/sweet-style-swiss/ \n", "\n", " milk country region family \\\n", "10 goat United States California NaN \n", "13 sheep United Kingdom Bethania NaN \n", "19 cow Spain Asturias NaN \n", "48 cow Italy NaN NaN \n", "50 goat France NaN NaN \n", "... ... ... ... ... \n", "1172 cow Scotland Tain Blue \n", "1175 NaN NaN NaN NaN \n", "1177 buffalo, cow Georgia Svaneti, Samegrelo NaN \n", "1181 sheep England NaN NaN \n", "1185 NaN Switzerland NaN NaN \n", "\n", " type fat_content calcium_content texture \\\n", "10 soft, soft-ripened NaN NaN NaN \n", "13 hard, artisan 52% NaN crumbly, firm \n", "19 soft, artisan NaN NaN smooth \n", "48 semi-soft NaN NaN NaN \n", "50 soft NaN NaN NaN \n", "... ... ... ... ... \n", "1172 semi-soft NaN NaN creamy \n", "1175 semi-hard NaN NaN chewy, firm, stringy \n", "1177 semi-firm NaN NaN dense, elastic \n", "1181 soft NaN NaN NaN \n", "1185 semi-hard, artisan NaN NaN firm, supple \n", "\n", " rind color flavor \\\n", "10 NaN NaN buttery \n", "13 NaN NaN burnt caramel, citrusy, herbaceous \n", "19 cloth wrapped NaN spicy, strong \n", "48 NaN NaN bitter \n", "50 NaN NaN NaN \n", "... ... ... ... \n", "1172 NaN NaN creamy, spicy \n", "1175 NaN NaN NaN \n", "1177 NaN NaN salty, smokey , sour \n", "1181 NaN NaN sharp \n", "1185 waxed NaN nutty \n", "\n", " aroma vegetarian vegan synonyms \\\n", "10 fresh, herbal False False NaN \n", "13 fruity True False NaN \n", "19 NaN False False NaN \n", "48 milky, mushroom False False NaN \n", "50 NaN False False NaN \n", "... ... ... ... ... \n", "1172 aromatic, rich True False NaN \n", "1175 NaN NaN NaN NaN \n", "1177 NaN NaN NaN Georgian Pickle Cheese \n", "1181 NaN True False NaN \n", "1185 nutty, sweet False False NaN \n", "\n", " alt_spellings \\\n", "10 NaN \n", "13 NaN \n", "19 NaN \n", "48 NaN \n", "50 NaN \n", "... ... \n", "1172 NaN \n", "1175 NaN \n", "1177 Megruli Sulguni, Shebolili Megruli Sulguni \n", "1181 NaN \n", "1185 NaN \n", "\n", " producers \n", "10 NaN \n", "13 NaN \n", "19 NaN \n", "48 NaN \n", "50 NaN \n", "... ... \n", "1172 Highland Fine Cheeses Limited \n", "1175 NaN \n", "1177 NaN \n", "1181 High Weald Dairy \n", "1185 NaN \n", "\n", "[142 rows x 19 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[data[\"color\"].isnull()]" ] }, { "cell_type": "code", "execution_count": 7, "id": "a0a77563-518e-4808-b744-9fc0c76763fe", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1162\n", "939\n" ] } ], "source": [ "print(len(data[pd.isnull(data[\"calcium_content\"])]))\n", "print(len(data[pd.isnull(data[\"fat_content\"])]))" ] }, { "cell_type": "code", "execution_count": 8, "id": "c8489ffa-1067-4eb7-b65a-2fa18fdb4b04", "metadata": {}, "outputs": [], "source": [ "del data[\"alt_spellings\"]\n", "del data[\"producers\"]\n", "del data[\"calcium_content\"]\n", "del data[\"url\"]\n", "del data[\"fat_content\"]\n", "del data[\"synonyms\"]" ] }, { "cell_type": "code", "execution_count": 9, "id": "5379265a-cd49-41fa-845c-bfae33bb8f5a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cheesemilkcountryregionfamilytypetexturerindcolorflavoraromavegetarianvegan
0AarewassercowSwitzerlandNaNNaNsemi-softbutterywashedyellowsweetbutteryFalseFalse
1Abbaye de BellocsheepFrancePays BasqueNaNsemi-hard, artisancreamy, dense, firmnaturalyellowburnt caramellanolineTrueFalse
2Abbaye de BelvalcowFranceNaNNaNsemi-hardelasticwashedivoryNaNaromaticFalseFalse
3Abbaye de CiteauxcowFranceBurgundyNaNsemi-soft, artisan, brinedcreamy, dense, smoothwashedwhiteacidic, milky, smoothbarnyardy, earthyFalseFalse
4Abbaye de TamiécowFranceSavoieNaNsoft, artisancreamy, open, smoothwashedwhitefruity, nuttyperfumed, pungentFalseFalse
..........................................
1182SveciaostcowSwedenLow-laying regionsNaNsemi-hard, brinedcreamy, supplerindlesspale yellowacidicNaNFalseFalse
1183SwaggoatAustraliaSouth AustraliaNaNfresh firm, artisancreamy, crumblyash coatedwhiteacidic, creamyfreshTrueFalse
1184SwaledalesheepEnglandSwaledale, North YorkshireNaNhardsemi firmNaNyellowsmooth, sweetfloralTrueFalse
1185Sweet Style SwissNaNSwitzerlandNaNNaNsemi-hard, artisanfirm, supplewaxedNaNnuttynutty, sweetFalseFalse
1186Swiss cheesecowUnited StatesNaNSwiss Cheesehard, artisan, processedfirmrindlesspale yellownutty, sweetNaNTrueFalse
\n", "

1187 rows × 13 columns

\n", "
" ], "text/plain": [ " cheese milk country region \\\n", "0 Aarewasser cow Switzerland NaN \n", "1 Abbaye de Belloc sheep France Pays Basque \n", "2 Abbaye de Belval cow France NaN \n", "3 Abbaye de Citeaux cow France Burgundy \n", "4 Abbaye de Tamié cow France Savoie \n", "... ... ... ... ... \n", "1182 Sveciaost cow Sweden Low-laying regions \n", "1183 Swag goat Australia South Australia \n", "1184 Swaledale sheep England Swaledale, North Yorkshire \n", "1185 Sweet Style Swiss NaN Switzerland NaN \n", "1186 Swiss cheese cow United States NaN \n", "\n", " family type texture \\\n", "0 NaN semi-soft buttery \n", "1 NaN semi-hard, artisan creamy, dense, firm \n", "2 NaN semi-hard elastic \n", "3 NaN semi-soft, artisan, brined creamy, dense, smooth \n", "4 NaN soft, artisan creamy, open, smooth \n", "... ... ... ... \n", "1182 NaN semi-hard, brined creamy, supple \n", "1183 NaN fresh firm, artisan creamy, crumbly \n", "1184 NaN hard semi firm \n", "1185 NaN semi-hard, artisan firm, supple \n", "1186 Swiss Cheese hard, artisan, processed firm \n", "\n", " rind color flavor aroma \\\n", "0 washed yellow sweet buttery \n", "1 natural yellow burnt caramel lanoline \n", "2 washed ivory NaN aromatic \n", "3 washed white acidic, milky, smooth barnyardy, earthy \n", "4 washed white fruity, nutty perfumed, pungent \n", "... ... ... ... ... \n", "1182 rindless pale yellow acidic NaN \n", "1183 ash coated white acidic, creamy fresh \n", "1184 NaN yellow smooth, sweet floral \n", "1185 waxed NaN nutty nutty, sweet \n", "1186 rindless pale yellow nutty, sweet NaN \n", "\n", " vegetarian vegan \n", "0 False False \n", "1 True False \n", "2 False False \n", "3 False False \n", "4 False False \n", "... ... ... \n", "1182 False False \n", "1183 True False \n", "1184 True False \n", "1185 False False \n", "1186 True False \n", "\n", "[1187 rows x 13 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 10, "id": "633ed80e-e416-41f6-ae58-b86ce4c132af", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cheesemilkcountryregionfamilytypetexturerindcolorflavoraromavegetarianvegan
0AarewassercowSwitzerlandNaNNaNsemi-softbutterywashedyellowsweetbutteryFalseFalse
1Abbaye de BellocsheepFrancePays BasqueNaNsemi-hard, artisancreamy, dense, firmnaturalyellowburnt caramellanolineTrueFalse
2Abbaye de BelvalcowFranceNaNNaNsemi-hardelasticwashedivoryNaNaromaticFalseFalse
3Abbaye de CiteauxcowFranceBurgundyNaNsemi-soft, artisan, brinedcreamy, dense, smoothwashedwhiteacidic, milky, smoothbarnyardy, earthyFalseFalse
4Abbaye de TamiécowFranceSavoieNaNsoft, artisancreamy, open, smoothwashedwhitefruity, nuttyperfumed, pungentFalseFalse
..........................................
1182SveciaostcowSwedenLow-laying regionsNaNsemi-hard, brinedcreamy, supplerindlesspale yellowacidicNaNFalseFalse
1183SwaggoatAustraliaSouth AustraliaNaNfresh firm, artisancreamy, crumblyash coatedwhiteacidic, creamyfreshTrueFalse
1184SwaledalesheepEnglandSwaledale, North YorkshireNaNhardsemi firmNaNyellowsmooth, sweetfloralTrueFalse
1185Sweet Style SwissNaNSwitzerlandNaNNaNsemi-hard, artisanfirm, supplewaxedNaNnuttynutty, sweetFalseFalse
1186Swiss cheesecowUnited StatesNaNSwiss Cheesehard, artisan, processedfirmrindlesspale yellownutty, sweetNaNTrueFalse
\n", "

1181 rows × 13 columns

\n", "
" ], "text/plain": [ " cheese milk country region \\\n", "0 Aarewasser cow Switzerland NaN \n", "1 Abbaye de Belloc sheep France Pays Basque \n", "2 Abbaye de Belval cow France NaN \n", "3 Abbaye de Citeaux cow France Burgundy \n", "4 Abbaye de Tamié cow France Savoie \n", "... ... ... ... ... \n", "1182 Sveciaost cow Sweden Low-laying regions \n", "1183 Swag goat Australia South Australia \n", "1184 Swaledale sheep England Swaledale, North Yorkshire \n", "1185 Sweet Style Swiss NaN Switzerland NaN \n", "1186 Swiss cheese cow United States NaN \n", "\n", " family type texture \\\n", "0 NaN semi-soft buttery \n", "1 NaN semi-hard, artisan creamy, dense, firm \n", "2 NaN semi-hard elastic \n", "3 NaN semi-soft, artisan, brined creamy, dense, smooth \n", "4 NaN soft, artisan creamy, open, smooth \n", "... ... ... ... \n", "1182 NaN semi-hard, brined creamy, supple \n", "1183 NaN fresh firm, artisan creamy, crumbly \n", "1184 NaN hard semi firm \n", "1185 NaN semi-hard, artisan firm, supple \n", "1186 Swiss Cheese hard, artisan, processed firm \n", "\n", " rind color flavor aroma \\\n", "0 washed yellow sweet buttery \n", "1 natural yellow burnt caramel lanoline \n", "2 washed ivory NaN aromatic \n", "3 washed white acidic, milky, smooth barnyardy, earthy \n", "4 washed white fruity, nutty perfumed, pungent \n", "... ... ... ... ... \n", "1182 rindless pale yellow acidic NaN \n", "1183 ash coated white acidic, creamy fresh \n", "1184 NaN yellow smooth, sweet floral \n", "1185 waxed NaN nutty nutty, sweet \n", "1186 rindless pale yellow nutty, sweet NaN \n", "\n", " vegetarian vegan \n", "0 False False \n", "1 True False \n", "2 False False \n", "3 False False \n", "4 False False \n", "... ... ... \n", "1182 False False \n", "1183 True False \n", "1184 True False \n", "1185 False False \n", "1186 True False \n", "\n", "[1181 rows x 13 columns]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data=data.dropna(subset=[\"country\",\"region\"], how=\"all\")\n", "data" ] }, { "cell_type": "code", "execution_count": null, "id": "e28eb2ee-1bed-4dd7-8133-c99b1f7ea26b", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 11, "id": "5a4c0e30-8535-498b-9a9e-0d7d232d4eb7", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_39592/2649664722.py:2: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " data[\"country\"]=data[\"country\"].fillna(\"\")\n", "/tmp/ipykernel_39592/2649664722.py:3: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " data[\"region\"]=data[\"region\"].fillna(\"\")\n" ] } ], "source": [ "data[[\"country\",\"region\"]]\n", "data[\"country\"]=data[\"country\"].fillna(\"\")\n", "data[\"region\"]=data[\"region\"].fillna(\"\")" ] }, { "cell_type": "code", "execution_count": 35, "id": "0edcee6f-a2c3-4804-a665-507b77f2651b", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_39592/3582726305.py:1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " data[\"location\"]=data[\"region\"]+\", \"+data[\"country\"]\n" ] } ], "source": [ "data[\"location\"]=data[\"region\"]+\", \"+data[\"country\"]" ] }, { "cell_type": "code", "execution_count": 36, "id": "017e30da-4f60-4d16-b6d4-56fb76acb740", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cheesemilkcountryregionfamilytypetexturerindcolorflavoraromavegetarianveganlocation
0AarewassercowSwitzerlandNaNsemi-softbutterywashedyellowsweetbutteryFalseFalse, Switzerland
1Abbaye de BellocsheepFrancePays BasqueNaNsemi-hard, artisancreamy, dense, firmnaturalyellowburnt caramellanolineTrueFalsePays Basque, France
2Abbaye de BelvalcowFranceNaNsemi-hardelasticwashedivoryNaNaromaticFalseFalse, France
3Abbaye de CiteauxcowFranceBurgundyNaNsemi-soft, artisan, brinedcreamy, dense, smoothwashedwhiteacidic, milky, smoothbarnyardy, earthyFalseFalseBurgundy, France
4Abbaye de TamiécowFranceSavoieNaNsoft, artisancreamy, open, smoothwashedwhitefruity, nuttyperfumed, pungentFalseFalseSavoie, France
.............................................
1182SveciaostcowSwedenLow-laying regionsNaNsemi-hard, brinedcreamy, supplerindlesspale yellowacidicNaNFalseFalseLow-laying regions, Sweden
1183SwaggoatAustraliaSouth AustraliaNaNfresh firm, artisancreamy, crumblyash coatedwhiteacidic, creamyfreshTrueFalseSouth Australia, Australia
1184SwaledalesheepEnglandSwaledale, North YorkshireNaNhardsemi firmNaNyellowsmooth, sweetfloralTrueFalseSwaledale, North Yorkshire, England
1185Sweet Style SwissNaNSwitzerlandNaNsemi-hard, artisanfirm, supplewaxedNaNnuttynutty, sweetFalseFalse, Switzerland
1186Swiss cheesecowUnited StatesSwiss Cheesehard, artisan, processedfirmrindlesspale yellownutty, sweetNaNTrueFalse, United States
\n", "

1181 rows × 14 columns

\n", "
" ], "text/plain": [ " cheese milk country region \\\n", "0 Aarewasser cow Switzerland \n", "1 Abbaye de Belloc sheep France Pays Basque \n", "2 Abbaye de Belval cow France \n", "3 Abbaye de Citeaux cow France Burgundy \n", "4 Abbaye de Tamié cow France Savoie \n", "... ... ... ... ... \n", "1182 Sveciaost cow Sweden Low-laying regions \n", "1183 Swag goat Australia South Australia \n", "1184 Swaledale sheep England Swaledale, North Yorkshire \n", "1185 Sweet Style Swiss NaN Switzerland \n", "1186 Swiss cheese cow United States \n", "\n", " family type texture \\\n", "0 NaN semi-soft buttery \n", "1 NaN semi-hard, artisan creamy, dense, firm \n", "2 NaN semi-hard elastic \n", "3 NaN semi-soft, artisan, brined creamy, dense, smooth \n", "4 NaN soft, artisan creamy, open, smooth \n", "... ... ... ... \n", "1182 NaN semi-hard, brined creamy, supple \n", "1183 NaN fresh firm, artisan creamy, crumbly \n", "1184 NaN hard semi firm \n", "1185 NaN semi-hard, artisan firm, supple \n", "1186 Swiss Cheese hard, artisan, processed firm \n", "\n", " rind color flavor aroma \\\n", "0 washed yellow sweet buttery \n", "1 natural yellow burnt caramel lanoline \n", "2 washed ivory NaN aromatic \n", "3 washed white acidic, milky, smooth barnyardy, earthy \n", "4 washed white fruity, nutty perfumed, pungent \n", "... ... ... ... ... \n", "1182 rindless pale yellow acidic NaN \n", "1183 ash coated white acidic, creamy fresh \n", "1184 NaN yellow smooth, sweet floral \n", "1185 waxed NaN nutty nutty, sweet \n", "1186 rindless pale yellow nutty, sweet NaN \n", "\n", " vegetarian vegan location \n", "0 False False , Switzerland \n", "1 True False Pays Basque, France \n", "2 False False , France \n", "3 False False Burgundy, France \n", "4 False False Savoie, France \n", "... ... ... ... \n", "1182 False False Low-laying regions, Sweden \n", "1183 True False South Australia, Australia \n", "1184 True False Swaledale, North Yorkshire, England \n", "1185 False False , Switzerland \n", "1186 True False , United States \n", "\n", "[1181 rows x 14 columns]" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 37, "id": "bf9d5b2a-bd47-4c4c-85c4-5ad7769a3f31", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "430" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(set(data[\"location\"]))" ] }, { "cell_type": "code", "execution_count": 44, "id": "e7ae8f76-b33b-42ce-9dea-9fab9e33069e", "metadata": {}, "outputs": [], "source": [ "locs=list(set(loc for loc in data[\"location\"]))" ] }, { "cell_type": "code", "execution_count": 49, "id": "fb044984-c33c-492c-91a2-4e9fff29ceb3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Oristano, Italy',\n", " 'Isere, France',\n", " 'Romanian Carpathians, Romania',\n", " 'French Basque Country, Midi-Pyrénées, France',\n", " 'Kilmallock County Limerick, Ireland',\n", " ', Australia',\n", " 'massif des Causses, France',\n", " 'Basque, Pyrenees Mountains, France',\n", " 'Modena, Italy',\n", " 'Vermont, United States',\n", " 'Rhone-Alps, France',\n", " 'Brooklyn NY, United States',\n", " 'Coast of Oregon, United States',\n", " 'Veneto, Trentino, Italy',\n", " 'Emilia Romagna, Italy',\n", " ', Middle East',\n", " 'Carmarthenshire, Wales',\n", " ', Egypt, Lebanon, Syria',\n", " 'Jura, Switzerland',\n", " ', Australia, France',\n", " 'Rio Grande do Sul, Brazil',\n", " ', Turkey',\n", " 'Banks Peninsular in Canterbury, New Zealand',\n", " 'Loire Valley, France',\n", " 'Berry, France',\n", " 'Seattle, Washington, United States',\n", " 'Marathon, NY, United States',\n", " 'Setubal, Palmela and Sesimbra, Portugal',\n", " 'Central Balkan Mountains, Bulgaria',\n", " 'North Wootton, England',\n", " 'Allagau, Bavarian Alps, Germany',\n", " 'Colorado, United States',\n", " ', Armenia',\n", " 'Lebanon, CT, United States',\n", " 'Centre-Val de Loire, France',\n", " 'Barcelona, Spain',\n", " 'South West England, England',\n", " 'Laqueuille, France',\n", " 'Fethard, Co Tipperary, Ireland',\n", " 'Mornington Peninsula, Melbourne, Australia',\n", " 'Odell, Bedfordshire, England, Great Britain, United Kingdom',\n", " 'Shelburne Farms, United States',\n", " ', Israel',\n", " 'South Australia, Australia',\n", " 'Bornholm, Denmark',\n", " 'Pyrénées, France',\n", " 'Sardinia & Campania, Italy',\n", " 'Hunter Valley, Australia',\n", " 'Greenville, Indiana, United States',\n", " 'Pokolbin, Hunter Valley, Australia',\n", " 'Landford, England, Great Britain, United Kingdom',\n", " 'Carrigtwohill, Ireland',\n", " 'Minnesota, United States',\n", " 'Lombardy, Italy',\n", " 'County Cavan, Ireland',\n", " ', Argentina',\n", " ', France',\n", " 'Avila, Spain',\n", " 'Crotone, Italy',\n", " 'New Hampshire, United States',\n", " ', Serbia',\n", " 'Lombardy, Canada, Italy',\n", " 'Pays d’Auge, Normandy, France',\n", " 'Wisconsin, United States',\n", " 'Tomales, California, United States',\n", " ', Netherlands',\n", " 'Serra da Estrela, Portugal',\n", " 'St. Gallen (canton), Tufertschwil, Switzerland',\n", " 'Extremadura, Spain',\n", " ', Spain',\n", " 'Pesaro-Urbino, Italy',\n", " 'New York, France, United States',\n", " 'Northern Holland, Netherlands',\n", " 'South West England, United Kingdom',\n", " 'Mankato, MN, United States',\n", " 'Charentes-Poitou, France',\n", " 'Postel, Belgium',\n", " 'Auvergne, Salers, France',\n", " 'Leiden, Netherlands',\n", " 'Apulia, Italy, United States',\n", " ', Afghanistan',\n", " 'Canary Islands, Spain',\n", " ', Great Britain, United Kingdom, Wales',\n", " 'Oregon Coast Range, United States',\n", " 'Calabria, Italy',\n", " 'Utah, United States',\n", " 'Gâtinais, France',\n", " 'North Yorkshire, England',\n", " 'Carnia, Italy',\n", " 'Passendale, Belgium',\n", " \"Lucerne, Schwyz, Unterwald, and Zoug, and the following additional places: Muri district in d'Argovi, Switzerland\",\n", " 'Carrigtwohill, ',\n", " 'Adamstown, Co Wexford, Ireland',\n", " 'Bermondsey, London, England, Great Britain, United Kingdom',\n", " 'Lapland, Finland',\n", " 'Tasmania, Australia',\n", " 'Staffordshire, England, Great Britain, United Kingdom',\n", " 'Ile de France, France',\n", " 'Lodi, Italy',\n", " 'Macedonia, Thrace, Thessalia, Peloponissos, Ionian Islands, Aegean islands, Crete Island and Epirus, Greece',\n", " ', United Kingdom, United States',\n", " 'Southern California, United States',\n", " 'South West England, England, United Kingdom',\n", " 'Haute Vienne, France',\n", " 'Menorca, Balearic Islands, Spain',\n", " 'Aberdeenshire, Scotland',\n", " 'Kimball, United States',\n", " 'Schoonrewoerd, Leerdam, Netherlands',\n", " 'Friuli-Venezia Giulia, Italy',\n", " 'Larzac, France',\n", " 'Nottinghamshire, England, Great Britain, United Kingdom',\n", " 'Charm, Ohio, United States',\n", " 'Troyes , Aube, France',\n", " ', Denmark, Finland, Germany, Iceland, Norway, Sweden',\n", " 'Québec, Canada',\n", " 'Herault, France',\n", " 'Pullman, Washington, United States',\n", " 'Dumfries, Scotland',\n", " 'Tieton, Washington, United States',\n", " 'Murazzano, Italy',\n", " 'Pennsylvania, United States',\n", " 'Co. Carlow, Ireland',\n", " 'Languedoc-Roussillon, France',\n", " 'Aquitaine, France',\n", " 'Franche Comté, France, Switzerland',\n", " 'Duhallow, Ireland',\n", " 'Gippsland, Victoria, Australia',\n", " 'British Columbia, Canada',\n", " ', Austria, Germany',\n", " 'Hamilton, New Zealand',\n", " 'Roncq, France',\n", " 'St. Louis, Missouri, United States',\n", " 'Iowa, United States',\n", " 'Burgund, France',\n", " 'Pembrokeshire, Great Britain, United Kingdom, Wales',\n", " 'Ballarat, Victoria, Australia',\n", " 'Nord-Pas-de-Calais, France',\n", " 'Aconcagua, Chile',\n", " 'Massachusetts, United States',\n", " 'North Yorkshire, England, Great Britain, United Kingdom',\n", " 'Bregenzerwald, Kleinwalsertal, Großwalsertal, Laiblachtal (Pfänderstock) and Rheintal, Austria',\n", " ', Denmark',\n", " 'Ile-de-France/Champagne, France',\n", " 'Castilla Leon, Spain',\n", " 'Asiago, Italy',\n", " 'Manitoba, Canada',\n", " 'Cheshire, England, Great Britain, United Kingdom',\n", " 'County Tipperary, Clogheen, Ireland',\n", " 'Valpadana, Italy',\n", " 'Severn Valley, England, Great Britain, United Kingdom',\n", " 'Co Clare, Ireland',\n", " 'Timsbury, Somerset, England, Great Britain, United Kingdom',\n", " 'Somerset, England, Great Britain, United Kingdom',\n", " 'Greensboro, VT, United States',\n", " 'Dorset, England, Great Britain, United Kingdom',\n", " 'Orkney Islands, Great Britain, Scotland, United Kingdom',\n", " 'East Midlands, England',\n", " 'Northeastern Brazil, Brazil',\n", " 'NY, United States',\n", " ', England, Great Britain, United Kingdom',\n", " 'Alba, Italy',\n", " 'Fife, Scotland',\n", " 'Stranraer, England, Scotland, United Kingdom',\n", " 'Castelo Branco, Fundão and Idanha-a-Nova, Portugal',\n", " 'Moliterno, Italy',\n", " 'Dorset, England',\n", " 'Cornwall, England, Great Britain, United Kingdom',\n", " 'Northwest, United States',\n", " 'Carneros, Sonoma, California, United States',\n", " 'Croisy-sur-Eure, France',\n", " 'Pyrenees-Atlantiques, France',\n", " 'Västra Götaland, Sweden',\n", " 'Brisbane, Australia',\n", " 'Campania, Paestum, Foggia, Italy',\n", " 'Cevenes, France',\n", " ', England',\n", " 'Corsica, France',\n", " ', Austria',\n", " 'Ann Arbor, Michigan, United States',\n", " 'Po valley region, Italy',\n", " 'Banon, France',\n", " 'Herefordshire, West Midlands, United Kingdom',\n", " 'Zasavica, Serbia',\n", " ', United Kingdom, Wales',\n", " 'Lower Normandy, France',\n", " 'Piave Valley, Italy, Italy',\n", " 'Nicasio, United States',\n", " 'Beara Peninsula, Co. Cork, Ireland',\n", " 'Cotswolds, England, Great Britain, United Kingdom',\n", " 'Missouri, United States',\n", " 'Rhone Valley, France',\n", " 'California, United States',\n", " 'Oxfordshire, Great Britain',\n", " 'Petaluma, California, United States',\n", " 'Prince Edward Island, Canada',\n", " 'North Cornwall, England',\n", " 'Coquet, England',\n", " 'Emilia-Romagna, Italy',\n", " 'Castille-Leon, Spain',\n", " ', Cyprus',\n", " 'Bethania, United Kingdom',\n", " 'Puimichel in Provence Alpes, France',\n", " 'Gloucestershire County, England, United Kingdom',\n", " 'Galicia, Spain',\n", " 'Flanders, Belgium',\n", " ', Canada, Denmark, France, Germany, Netherlands, United States',\n", " 'Lanarkshire, Great Britain, Scotland, United Kingdom',\n", " 'Pays Basque, France',\n", " 'Veneto, Italy',\n", " ', Scotland',\n", " 'Victoria, Australia',\n", " 'Co Limerick, Ireland',\n", " 'Treviso, Veneto, Italy',\n", " 'Wales, Great Britain',\n", " 'Serra da Canastra, Minas Gerais state, Brazil',\n", " 'Devon, England, United Kingdom',\n", " 'Gevrey-Chambertin, Burgundy, France',\n", " ', Brazil',\n", " 'Gloucestershire, England, Great Britain, United Kingdom',\n", " 'Averyon, France',\n", " 'Midi-Pyrénées, France',\n", " 'Tipperary, Ireland',\n", " 'Maribo, Denmark',\n", " 'province of Brittany, France',\n", " 'Bavaria, Germany',\n", " ', United States',\n", " 'Northern Wisconsin, United States',\n", " 'New York, United States',\n", " 'Blarney, Ireland',\n", " 'Stewarton, Scotland, United Kingdom',\n", " 'Monterey, California, United States',\n", " 'Wigtownshire, Scotland',\n", " 'Cornwall, England',\n", " 'Co. Offaly, Ireland',\n", " 'Karlovy Vary, Czech Republic',\n", " 'Quebec, Canada',\n", " 'Laruns, France',\n", " 'Maine, United States',\n", " ', Mauritania',\n", " 'Piemonte, Italy',\n", " 'Brittany, France',\n", " 'Poitou-Charentes, France',\n", " 'Swaledale, North Yorkshire, England',\n", " 'Timsbury, Somerset, England, Scotland, Wales',\n", " ', Iceland',\n", " 'Oviken, Sweden',\n", " 'Pyrénées-Atlantiques, France',\n", " 'Pinconning, Michigan, United States',\n", " 'Belvederis, Lithuania',\n", " 'Chelmarsh, Bridgnorth, Shropshire, England',\n", " 'Peekskill, United States',\n", " 'Ceredigion, United Kingdom',\n", " 'All Holland, Netherlands',\n", " 'Normandy, France',\n", " 'Inagh, Co Clare, ',\n", " 'Amou, Gascony, France',\n", " 'Colby, Wisconsin, United States',\n", " 'Southwestern Wisconsin, United States',\n", " 'Lincolnshire, England, United Kingdom',\n", " 'Bas-Languedoc, Comtat Venaissin, France',\n", " ', United Kingdom',\n", " ', Switzerland',\n", " ', Portugal',\n", " 'Comox Valley, Vancouver Island, Canada',\n", " 'Derbyshire, Leicestershire, Nottinghamshire, England',\n", " 'Huizen, Netherlands',\n", " 'Asturias, Spain',\n", " 'Piedmont, Italy',\n", " ', Canada, India, United States',\n", " 'County Antrim, Ireland',\n", " ', Italy',\n", " 'St Antoine, France',\n", " ', Hungary',\n", " 'County Carlow, Ireland',\n", " 'Provencale, France',\n", " ', Sweden',\n", " 'Allgau, Germany',\n", " 'North Carolina, United States',\n", " 'Basilicata, Italy',\n", " ', Canada',\n", " 'Beira Baixa Province, Portugal',\n", " 'Ann Arbor, MI, United States',\n", " ', China, Nepal, Tibet',\n", " 'Bursa, Turkey',\n", " 'Websterville, VT, United States',\n", " 'Kent, United Kingdom',\n", " 'County Wexford, Ireland',\n", " 'island wide, Cyprus',\n", " 'Pyrenees, France',\n", " ', Mexico',\n", " 'Duchy of Limburg, Belgium, Germany, Netherlands',\n", " 'Cotherstone, England, United Kingdom',\n", " 'Richfield, Wisconsin, United States',\n", " 'Pienza, Italy',\n", " 'Dalmatia, Croatia',\n", " 'Taxco, Mexico',\n", " 'Plessisville, Quebec, Canada, France',\n", " ', Lebanon, Middle East',\n", " 'Mols, Denmark',\n", " '+, Cyprus, Egypt, Israel, Jordan, Lebanon, Middle East, Syria',\n", " 'Prince Edward County, Ontario, Canada',\n", " ', Belgium',\n", " 'Auvergne, France',\n", " ', Canada, United States',\n", " 'Upper Corsica, France',\n", " 'New South Wales, Australia',\n", " 'Tibet, China, Nepal, Tibet',\n", " 'West Bengal, India',\n", " ', Iraq',\n", " 'Béarnaise in Pyrénées-Atlantique, France',\n", " 'Co. Cork, Ireland',\n", " 'Vorarlberg, Austria',\n", " 'Midi-Pyrenees, France',\n", " ', Ireland',\n", " 'Wallonia, Belgium',\n", " 'Devon, England',\n", " 'South East England, United Kingdom',\n", " 'Indiana, United States',\n", " 'Island of Pag, Croatia',\n", " 'Sonoma, California, United States',\n", " 'Port Townsend, United States',\n", " 'old Liburnia (Dalmatia), Croatia',\n", " ', Mexico and Caribbean',\n", " 'North Yorkshire, England, United Kingdom',\n", " 'Charentes, France',\n", " 'La Velle, Wisconsin, United States',\n", " ', Germany',\n", " 'Monterey, California, Mexico, United States',\n", " 'Roxburghshire, Scotland, United Kingdom',\n", " 'Minas Gerais, Brazil',\n", " 'Sardinia (Italy), Southern Corsica (France), France, Italy',\n", " ', Hungary, Poland, Slovakia',\n", " 'Allgaeu Alps, Germany',\n", " 'Orkney Isles, Scotland, United Kingdom',\n", " 'East Sussex, United Kingdom',\n", " 'Póvoa de Lanhoso, Portugal',\n", " 'Central and Western Macedonia, Thessalia, Greece',\n", " 'Friuli Venezia Giulia and Veneto, Italy',\n", " 'Somerset, England, United Kingdom',\n", " 'Brickhill, Co. Clare, Ireland',\n", " 'New York, France',\n", " 'Burgundy, France',\n", " ', Holland',\n", " 'Gujarat, India',\n", " 'Pembrokeshire, United Kingdom',\n", " 'Milford, NJ, United States',\n", " 'Campania, Italy',\n", " ', Mongolia',\n", " 'Valencia, Spain',\n", " 'Tibet, China, Tibet',\n", " 'Bourgogne, France',\n", " 'Bjurholm, Sweden',\n", " 'Het Groene Hart, Netherlands',\n", " 'Kinfauns, Perthshire, Scotland',\n", " 'Stoneyford, Ireland',\n", " 'West Pawlet, VT, United States',\n", " 'Sebastopol, California, United States',\n", " 'Airedale farming district, New Zealand',\n", " 'Wales, London, United Kingdom, Wales',\n", " 'Tain, Scotland',\n", " 'Naples, Italy',\n", " 'Chirac, France',\n", " 'Fornells de la Selva, Gironès, Spain',\n", " 'Canton of Glarus, Switzerland',\n", " ', Belgium, Canada, France, Switzerland, United States',\n", " 'Tuscany, Italy',\n", " 'Haute-Savoie / Upper Savoy, France',\n", " 'Cornwall, ',\n", " 'Centre , the department of Loiret, France',\n", " ', Poland',\n", " 'Rhône-Alpes, France',\n", " 'Trakia, Albania, Bulgaria, Croatia, Greece, Israel, Macedonia, Romania, Serbia',\n", " 'Somerset, England',\n", " 'North East Victoria, Australia',\n", " 'Sardegna, Italy',\n", " 'Fairview, United States',\n", " 'Murcia, Spain',\n", " 'Languedoc, France',\n", " 'Veneto, ',\n", " 'Umbria, Lazio, Italy',\n", " 'Sulzberg, Austria',\n", " 'Queenstown, New Zealand',\n", " 'Bergues, France',\n", " 'Piora Valley, Switzerland',\n", " 'Savoie, France',\n", " 'Galax, Virginia, United States',\n", " ', New Zealand',\n", " 'Devon, England, Great Britain, United Kingdom',\n", " 'Avesnes, France',\n", " 'Dumfriesshire, Scotland, United Kingdom',\n", " ', Greece',\n", " 'Franche Comté, France',\n", " 'Ontario, Canada',\n", " 'North East Victoria, ',\n", " 'Oregon, United States',\n", " 'Loire, France',\n", " 'Friuli-Venezia Giulia and the Veneto, Italy',\n", " 'Virginia, United States',\n", " 'Central and Western Macedonia, Thessaly, Greece',\n", " 'Co. Mayo, Ireland',\n", " 'Normandy, Auvilliers, France',\n", " 'Bloomdale, United States',\n", " 'Azores, Portugal',\n", " 'Georgia, United States',\n", " 'Swabia, Germany',\n", " 'Äänekoski, Finland',\n", " 'Lazio, Sardinia, Italy',\n", " 'Buxton, Derbyshire, England, United Kingdom',\n", " 'Inagh, Co Clare, Ireland',\n", " 'Landshut, Germany',\n", " 'East Midlands, England, Great Britain, United Kingdom',\n", " 'East Midlands, England, United Kingdom',\n", " 'Stawley, near Wellington, Somerset, England, Great Britain, United Kingdom',\n", " 'Allgäu, Germany',\n", " ', Bangladesh, India',\n", " 'Stonegate, East Sussex, England, Great Britain, United Kingdom',\n", " 'Castile-Leon, Spain',\n", " 'New Jersey, United States',\n", " 'Svaneti, Samegrelo, Georgia',\n", " 'Low-laying regions, Sweden',\n", " 'California, Netherlands, United States',\n", " 'Gravina in Puglia, Murgia, Italy',\n", " 'Aveyron, Laguiole, France',\n", " 'Provence, France',\n", " 'Illoud (Haute-Marne), France',\n", " 'Cumbrian, United Kingdom',\n", " ', Wales',\n", " 'Illinois, United States',\n", " 'Aveyron, France',\n", " 'Anjou, France']" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "locs" ] }, { "cell_type": "code", "execution_count": 40, "id": "debb780e-ec13-4502-ac44-6001335e507d", "metadata": {}, "outputs": [], "source": [ "from geopy.geocoders import Nominatim" ] }, { "cell_type": "code", "execution_count": 41, "id": "eed3ac7b-5283-4d8e-bc26-61e1d821ccaf", "metadata": {}, "outputs": [], "source": [ "geolocator=Nominatim(user_agent=\"toto\")" ] }, { "cell_type": "code", "execution_count": 48, "id": "0043fe0d-e2d2-48f0-8953-ffc3dee52ba6", "metadata": {}, "outputs": [], "source": [ "location=geolocator.geocode(locs[1])" ] }, { "cell_type": "code", "execution_count": null, "id": "710341db-408f-4a4a-a849-65b963582ebc", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a2bbdb40ac6c43b0bfe08d014970db40", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/430 [00:00]" ] }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "len(locations)\n", "locations\n", "x=[]\n", "y=[]\n", "for l in locations:\n", " if l is not None:\n", " x.append(l[0])\n", " y.append(l[1])\n", "plt.plot(x,y,\".\")" ] }, { "cell_type": "code", "execution_count": null, "id": "66ce4e4a-7006-411f-abd0-ee94d7cf99b3", "metadata": {}, "outputs": [], "source": [ "def filter_df(df, cols=None):\n", " if cols is None:\n", " cols = [\"milk\", \"country\", \"type\", \"texture\", \"flavor\", \"aroma\", \"family\", \"rind\"]\n", "\n", " df = df.copy()\n", " attributes = set() # Get all the possible attributes (some are mixed in different columns)\n", " for col in cols:\n", " values = set()\n", " for val in set(df[col]):\n", " if type(val) == float: # skip NaN values\n", " continue\n", " values = values.union([x.strip() for x in set(val.split(\",\"))])\n", " attributes = attributes.union(values)\n", " \n", " \n", " row_attrs = [set() for _ in range(len(df))] # get the attributes specific to each row\n", " for col in cols:\n", " for i, row in enumerate(df[col]):\n", " if type(row) != float:\n", " row_attrs[i] = row_attrs[i].union([x.strip() for x in row.split(\",\")])\n", "\n", " for attr in attributes: # Add attributes rows\n", " df[attr] = list(attr in row_attrs[i] for i in range(len(df[col])))\n", " for col in cols:\n", " del df[col]\n", "\n", " return df.copy()" ] }, { "cell_type": "code", "execution_count": null, "id": "fda6aaad-7b1e-4daa-8d28-cd049df9cec2", "metadata": {}, "outputs": [], "source": [ "data_features=filter_df(data)" ] }, { "cell_type": "markdown", "id": "a1b022a3-a2f9-4e39-9e79-48ae9f6adca5", "metadata": {}, "source": [ "## Classification" ] }, { "cell_type": "code", "execution_count": null, "id": "94bcde38-784b-41d9-89b0-3e2e17aa2979", "metadata": {}, "outputs": [], "source": [ "from sklearn import tree" ] }, { "cell_type": "code", "execution_count": null, "id": "7b2b9d3e-d7da-4f43-9e1c-4e62e837ed0b", "metadata": {}, "outputs": [], "source": [ "c=tree.DecisionTreeClassifier()" ] }, { "cell_type": "code", "execution_count": null, "id": "0b52b082-0554-45f2-9eff-e6a3ba6a8d08", "metadata": {}, "outputs": [], "source": [ "c.fit(" ] }, { "cell_type": "code", "execution_count": null, "id": "6d0b0d25-3476-4fbb-84c7-008437e87903", "metadata": {}, "outputs": [], "source": [ "data[[\"country\",\"region\"]]" ] }, { "cell_type": "markdown", "id": "100a7c2e-2d24-4814-bd68-4b9f6433ce4d", "metadata": {}, "source": [ "Transformer: la couleur en RGB; la localisation en GPS\n", "1ère question: est-ce que la couleur suffit à savoir d'où ça vient ? \n", "2ème question: est-ce que si on ajoute le type ça marche ? \n", "3ème question: et les caractéristiques gustatives ?\n" ] }, { "cell_type": "code", "execution_count": null, "id": "24e7ff6e-c308-4cc8-aeac-eeb372f4c479", "metadata": {}, "outputs": [], "source": [ "data_features" ] }, { "cell_type": "code", "execution_count": null, "id": "ca969d41-a88a-47d9-b94b-8b633d3d3348", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "e2c9b84f-b899-4c99-abb7-37a9deeafbb5", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "d41c1cfc-1564-4131-8391-c8a8971b9d13", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "42a1e4ed-9a1e-41f8-a322-b5d2de68d24a", "metadata": {}, "outputs": [], "source": [ "data[pd.isnull(data[\"country\"])&pd.isnull(data[\"region\"])]\n" ] }, { "cell_type": "markdown", "id": "038cd38e-3890-4f73-91a7-c30294b3bc5b", "metadata": {}, "source": [ "## Pattern Mining" ] }, { "cell_type": "code", "execution_count": null, "id": "2e6b0dc1-030c-4239-803f-52736a41bcb5", "metadata": {}, "outputs": [], "source": [ "data" ] }, { "cell_type": "code", "execution_count": null, "id": "23b75579-95bb-4889-928f-9c3c1309a18a", "metadata": {}, "outputs": [], "source": [ "apriori(data)" ] }, { "cell_type": "code", "execution_count": null, "id": "61959c04-61bf-464a-89ca-72ec4782f927", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "744f8d1d-0874-4b92-921f-5a85ccf598ad", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "f99af4d2-20e9-4bff-802a-dbdb91f95a96", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "fa04bb73-ba5c-4164-a1af-f061d9627557", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "36ff5c84-93f3-4854-b2c4-e6082859c974", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 5 }