Merge branch 'main' of ssh://gitlab.aliens-lyon.fr/fcolinde/cheesedm
This commit is contained in:
commit
4ccbcd3d65
56
cheese.ipynb
56
cheese.ipynb
@ -85,12 +85,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "a0a77563-518e-4808-b744-9fc0c76763fe",
|
"id": "5d76fde3-8c65-4b50-a097-6dd81a68c1ca",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(len(data[pd.isnull(data[\"calcium_content\"])]))\n",
|
"data.describe().T.plot(kind='bar')"
|
||||||
"print(len(data[pd.isnull(data[\"fat_content\"])]))"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -98,7 +97,7 @@
|
|||||||
"id": "4590cffd-d4a9-4e15-8fd5-cbb22f048300",
|
"id": "4590cffd-d4a9-4e15-8fd5-cbb22f048300",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Since those two columns have too much null values, we choose to remove them. \n",
|
"Since `calcium_content` and `fat_content` columns have too much null values, we choose to remove them. \n",
|
||||||
"Similarly, we removed other columns we are not interested in: "
|
"Similarly, we removed other columns we are not interested in: "
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -403,7 +402,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "48cbb634-6754-4956-a945-539d329812ef",
|
"id": "979b9eef-9ca2-4299-a4e0-e8d3813f45c6",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"In this part, we achieved to do two things for the classification: create a decision tree on the database and, given a cheese and its characteristics, find where it originates from. \n",
|
"In this part, we achieved to do two things for the classification: create a decision tree on the database and, given a cheese and its characteristics, find where it originates from. \n",
|
||||||
@ -470,6 +469,28 @@
|
|||||||
"We cannot find the precise place a cheese originates from given its characteristic. \n",
|
"We cannot find the precise place a cheese originates from given its characteristic. \n",
|
||||||
"Can we get the country, at least? We are going to try to achieve this using a `DecisionTree`. \n",
|
"Can we get the country, at least? We are going to try to achieve this using a `DecisionTree`. \n",
|
||||||
"\n"
|
"\n"
|
||||||
|
"In short, it seems that we cannot find the region a cheese originates from given its characteristic. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "7fd507d0-1a68-4cd7-a12e-12c9ab1061e3",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"model.predict(X)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "9faf2aee-84f5-4633-b3de-039af42d31d3",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"yprime=pd.DataFrame(model.predict(X),columns=[\"latitude\",\"longitude\"])"
|
||||||
|
>>>>>>> 6d6969d60cf151787c2854b4183625033bc58fb6
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -575,7 +596,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"unused_columns = {\"vegetarian\", \"vegan\", \"cheese\", \"region\", \"color\", \"location\", \"latitude\", \"longitude\", \"country\",\"color_r\",\"color_g\",\"color_b\"}\n",
|
"unused_columns = {\"vegetarian\", \"vegan\", \"cheese\", \"region\", \"color\", \"location\", \"latitude\", \"longitude\", \"country\",\"color_r\",\"color_g\",\"color_b\"}\n",
|
||||||
"data_features_only=data_features.drop(columns=list(unused_columns.intersection(data_features.columns)))\n",
|
"data_features_only=data_features.drop(columns=list(unused_columns.intersection(data_features.columns)))\n",
|
||||||
"data_features_only.shape[1]"
|
"print(\"Number of features:\", data_features_only.shape[1])"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -609,7 +630,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"assoc_rules = association_rules(frequent_itemsets, min_threshold=.5)\n",
|
"assoc_rules = association_rules(frequent_itemsets, min_threshold=.5)\n",
|
||||||
"\n",
|
"assoc_rules=assoc_rules.sort_values(by=['confidence'], ascending=False)\n",
|
||||||
"display(HTML(assoc_rules.to_html()))"
|
"display(HTML(assoc_rules.to_html()))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -620,38 +641,33 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"assoc_rules[assoc_rules[\"consequents\"].astype(str).str.contains(\"cow\")]"
|
"assoc_rules[assoc_rules[\"antecedents\"].astype(str).str.contains(\"rich\")]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "84bf779a-36d0-4aa2-b3a2-0da9bb25fc01",
|
"id": "84e2f426-8077-46c7-bc7e-357e631972d2",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"For Pattern Mining, we only kept relevant columns (binary attributes) thus dropping RGB colors and any location based information, keeping only information relevant to the final cheese itself.\n",
|
"For Pattern Mining, we only kept relevant columns (binary attributes) thus dropping RGB colors and any location based information, keeping only information relevant to the final cheese itself.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"We applied the apriori algorithm for frequent itemsets and searched for association rules.\n",
|
"We applied the apriori algorithm for frequent itemsets and searched for association rules.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Est-ce que les fromages artisanaux ont souvent plus de \"goûts\" que les autres ?\n"
|
"If we observe the association rules with the highest degree of confidence, we can interpolate the following statements (then verified to be true):\n",
|
||||||
|
"- cheddar is primarily a cow cheese\n",
|
||||||
|
"- "
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "ce4db7c9-8049-4838-af30-b9fe2bca2925",
|
"id": "104b476d-5531-40e7-8bf6-987f00a8f5c1",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"len(data_features[data_features[\"pecorino\"] == True]), len(data_features[data_features[\"pecorino\"] == False])"
|
"data_f=text_to_boolean(data)\n",
|
||||||
|
"data_f[(data_f[\"bloomy\"] == True)]"
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "7d9f17c2-6c42-4f24-b0d0-e8640a661801",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user