diff --git a/src/02b.ipynb b/src/02b.ipynb index bc26e9b..447e675 100644 --- a/src/02b.ipynb +++ b/src/02b.ipynb @@ -44,7 +44,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "b1c664ab", "metadata": {}, "outputs": [], @@ -62,7 +62,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "b53e3b3f", "metadata": { "scrolled": true @@ -86,13 +86,64 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "d05752d1", "metadata": { "scrolled": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "[['citrus fruit', 'semi-finished bread', 'margarine', 'ready soups'],\n", + " ['tropical fruit', 'yogurt', 'coffee'],\n", + " ['whole milk'],\n", + " ['pip fruit', 'yogurt', 'cream cheese', 'meat spreads'],\n", + " ['other vegetables',\n", + " 'whole milk',\n", + " 'condensed milk',\n", + " 'long life bakery product']]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "groceries[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "e3a8e11a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['citrus fruit,semi-finished bread,margarine,ready soups',\n", + " 'tropical fruit,yogurt,coffee',\n", + " 'whole milk',\n", + " 'pip fruit,yogurt,cream cheese,meat spreads',\n", + " 'other vegetables,whole milk,condensed milk,long life bakery product']" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "groceries2 = []\n", + "with open('groceries.csv', 'r') as csvfile:\n", + " #csv_reader = reader(csvfile)\n", + " for row in csvfile:\n", + " groceries2.append(row.strip())\n", + "\n", + "groceries2[:5]" + ] }, { "cell_type": "markdown", @@ -112,7 +163,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "e53e450e", "metadata": {}, "outputs": [], @@ -130,11 +181,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "99669672", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "encoder = TransactionEncoder()" + ] }, { "cell_type": "markdown", @@ -146,11 +199,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "9019f8e9", "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "array([[False, False, False, ..., False, False, False],\n", + " [False, False, False, ..., False, True, False],\n", + " [False, False, False, ..., True, False, False],\n", + " ...,\n", + " [False, False, False, ..., False, True, False],\n", + " [False, False, False, ..., False, False, False],\n", + " [False, False, False, ..., False, False, False]])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "transactions = encoder.fit_transform(groceries)\n", + "transactions" + ] }, { "cell_type": "markdown", @@ -162,7 +235,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "c5262ae7", "metadata": {}, "outputs": [], @@ -180,11 +253,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "42ccf29b", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "itemsets = pd.DataFrame(transactions, columns=encoder.columns_)" + ] }, { "cell_type": "markdown", @@ -196,13 +271,222 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "e83bab97", "metadata": { "scrolled": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abrasive cleanerartif. sweetenerbaby cosmeticsbaby foodbagsbaking powderbathroom cleanerbeefberriesbeverages...uht-milkvinegarwaffleswhipped/sour creamwhiskywhite breadwhite winewhole milkyogurtzwieback
0FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
1FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse...FalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
2FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse...FalseFalseFalseFalseFalseFalseFalseTrueFalseFalse
3FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse...FalseFalseFalseFalseFalseFalseFalseFalseTrueFalse
4FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse...FalseFalseFalseFalseFalseFalseFalseTrueFalseFalse
\n", + "

5 rows × 169 columns

\n", + "
" + ], + "text/plain": [ + " abrasive cleaner artif. sweetener baby cosmetics baby food bags \\\n", + "0 False False False False False \n", + "1 False False False False False \n", + "2 False False False False False \n", + "3 False False False False False \n", + "4 False False False False False \n", + "\n", + " baking powder bathroom cleaner beef berries beverages ... uht-milk \\\n", + "0 False False False False False ... False \n", + "1 False False False False False ... False \n", + "2 False False False False False ... False \n", + "3 False False False False False ... False \n", + "4 False False False False False ... False \n", + "\n", + " vinegar waffles whipped/sour cream whisky white bread white wine \\\n", + "0 False False False False False False \n", + "1 False False False False False False \n", + "2 False False False False False False \n", + "3 False False False False False False \n", + "4 False False False False False False \n", + "\n", + " whole milk yogurt zwieback \n", + "0 False False False \n", + "1 False True False \n", + "2 True False False \n", + "3 False True False \n", + "4 True False False \n", + "\n", + "[5 rows x 169 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "itemsets.head()" + ] }, { "cell_type": "markdown", @@ -214,13 +498,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "92c5e631", "metadata": { "scrolled": true }, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 9835 entries, 0 to 9834\n", + "Columns: 169 entries, abrasive cleaner to zwieback\n", + "dtypes: bool(169)\n", + "memory usage: 1.6 MB\n" + ] + } + ], + "source": [ + "itemsets.info()" + ] }, { "cell_type": "markdown", @@ -248,11 +546,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "947456f2", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "from mlxtend.frequent_patterns import apriori" + ] }, { "cell_type": "markdown", @@ -266,11 +566,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "3db7d690", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "frequent_itemsets = apriori(itemsets, min_support=0.015, use_colnames=True)" + ] }, { "cell_type": "markdown", @@ -282,13 +584,123 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "046f5901", "metadata": { "scrolled": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
supportitemsets
00.017692(baking powder)
10.052466(beef)
20.033249(berries)
30.026029(beverages)
40.080529(bottled beer)
.........
1750.023183(root vegetables, other vegetables, whole milk)
1760.017082(other vegetables, tropical fruit, whole milk)
1770.022267(other vegetables, yogurt, whole milk)
1780.015557(rolls/buns, whole milk, yogurt)
1790.015150(yogurt, tropical fruit, whole milk)
\n", + "

180 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " support itemsets\n", + "0 0.017692 (baking powder)\n", + "1 0.052466 (beef)\n", + "2 0.033249 (berries)\n", + "3 0.026029 (beverages)\n", + "4 0.080529 (bottled beer)\n", + ".. ... ...\n", + "175 0.023183 (root vegetables, other vegetables, whole milk)\n", + "176 0.017082 (other vegetables, tropical fruit, whole milk)\n", + "177 0.022267 (other vegetables, yogurt, whole milk)\n", + "178 0.015557 (rolls/buns, whole milk, yogurt)\n", + "179 0.015150 (yogurt, tropical fruit, whole milk)\n", + "\n", + "[180 rows x 2 columns]" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "frequent_itemsets" + ] }, { "cell_type": "markdown", @@ -300,11 +712,121 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "bfba8e98", "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
supportitemsets
710.255516(whole milk)
440.193493(other vegetables)
530.183935(rolls/buns)
600.174377(soda)
720.139502(yogurt)
.........
1630.015252(shopping bags, yogurt)
1790.015150(yogurt, tropical fruit, whole milk)
450.015048(pasta)
110.015048(canned fish)
1670.015048(whole milk, sugar)
\n", + "

180 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " support itemsets\n", + "71 0.255516 (whole milk)\n", + "44 0.193493 (other vegetables)\n", + "53 0.183935 (rolls/buns)\n", + "60 0.174377 (soda)\n", + "72 0.139502 (yogurt)\n", + ".. ... ...\n", + "163 0.015252 (shopping bags, yogurt)\n", + "179 0.015150 (yogurt, tropical fruit, whole milk)\n", + "45 0.015048 (pasta)\n", + "11 0.015048 (canned fish)\n", + "167 0.015048 (whole milk, sugar)\n", + "\n", + "[180 rows x 2 columns]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "frequent_itemsets.sort_values('support', ascending=False)" + ] }, { "cell_type": "markdown", @@ -318,11 +840,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "id": "48ad4460", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "length = frequent_itemsets['itemsets'].str.len()" + ] }, { "cell_type": "markdown", @@ -334,13 +858,15 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "id": "8302997b", "metadata": { "scrolled": true }, "outputs": [], - "source": [] + "source": [ + "rows = length > 2" + ] }, { "cell_type": "markdown", @@ -352,11 +878,88 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "id": "be48d27a", "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
supportitemsets
1740.017895(other vegetables, rolls/buns, whole milk)
1750.023183(root vegetables, other vegetables, whole milk)
1760.017082(other vegetables, tropical fruit, whole milk)
1770.022267(other vegetables, yogurt, whole milk)
1780.015557(rolls/buns, whole milk, yogurt)
1790.015150(yogurt, tropical fruit, whole milk)
\n", + "
" + ], + "text/plain": [ + " support itemsets\n", + "174 0.017895 (other vegetables, rolls/buns, whole milk)\n", + "175 0.023183 (root vegetables, other vegetables, whole milk)\n", + "176 0.017082 (other vegetables, tropical fruit, whole milk)\n", + "177 0.022267 (other vegetables, yogurt, whole milk)\n", + "178 0.015557 (rolls/buns, whole milk, yogurt)\n", + "179 0.015150 (yogurt, tropical fruit, whole milk)" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "frequent_itemsets[rows]" + ] }, { "cell_type": "markdown", @@ -370,11 +973,112 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "id": "93699e5d", "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countmeanstdmin25%50%75%max
itemsets
173.00.0534410.0459560.0150480.0245040.0371120.0648700.255516
2101.00.0247990.0100580.0150480.0184040.0210470.0275550.074835
36.00.0185220.0034170.0151500.0159380.0174890.0211740.023183
\n", + "
" + ], + "text/plain": [ + " count mean std min 25% 50% 75% \\\n", + "itemsets \n", + "1 73.0 0.053441 0.045956 0.015048 0.024504 0.037112 0.064870 \n", + "2 101.0 0.024799 0.010058 0.015048 0.018404 0.021047 0.027555 \n", + "3 6.0 0.018522 0.003417 0.015150 0.015938 0.017489 0.021174 \n", + "\n", + " max \n", + "itemsets \n", + "1 0.255516 \n", + "2 0.074835 \n", + "3 0.023183 " + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "frequent_itemsets.groupby(length)['support'].describe()" + ] }, { "cell_type": "markdown", @@ -755,7 +1459,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -769,11 +1473,11 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.10.4" }, "vscode": { "interpreter": { - "hash": "4f946df053fbf2b937619d3c5458e7af74262f9a954d8797ba0b27400bcafe06" + "hash": "3ad933181bd8a04b432d3370b9dc3b0662ad032c4dfaa4e4f1596c548f763858" } } },