前言:
众所周知,关联规则挖掘是数据挖掘中重要的一部分,如著名的啤酒和尿布的问题。今天要学习的是经典的关联规则挖掘算法——Apriori算法
一、算法的基本原理
由k项频繁集去导出k+1项频繁集。
二、算法流程
1.扫描事务数据库,找出1项集,并根据最小支持度计数,剪枝得出频繁1项集。k=1.
2.由频繁k项集进行连接步操作,形成候选的k+1项集,并扫描数据库,得出每一项的支持度计数,并根据最小支持度计数,剪枝得到频繁k+1项集。
迭代的进行第2步直到频繁k项集是空的。
3.由频繁项集构造关联规则。先列出所有可能的关联规则,然后计算相应的置信度,最终筛选出满足最小置信度的强关联规则。
三、算法实现
测试数据集:data.txt
1, 7, 15, 44, 49 2, 1, 19 3, 1, 19 4, 3, 4, 15, 18, 35, 44 5, 2, 4, 7, 9, 23 6, 14, 21, 44 7, 4, 12, 31, 36, 44, 48 8, 15, 27, 28 9, 2, 28 10, 3, 18, 35 11, 23, 24, 40, 41, 43 12, 20, 43, 48 13, 49 14, 1, 19, 26 15, 5, 22, 39 16, 16, 32, 45 17, 4, 6, 9, 10, 16, 22 18, 1, 19, 23 19, 7, 11, 37, 45 20, 3, 18, 32, 35 21, 1, 8, 19, 47 22, 34, 39, 44 23, 13, 19 24, 4, 9, 38 25, 7, 22, 48 26, 7, 11, 14 27, 23, 24, 40, 41, 43 28, 9, 14 29, 0, 2, 42 30, 13, 35 31, 23 32, 8, 21, 25, 38 33, 4, 46 34, 23, 24, 40, 41, 43 35, 4, 17, 29, 47 36, 12, 31, 36 37, 14, 22, 26, 37, 44 38, 0, 16, 30, 32, 45, 47 39, 1, 11, 19, 25, 27, 29, 46 40, 15, 16, 18, 21, 26 41, 4, 10, 14 42, 3, 36 43, 23, 27, 28 44, 15, 21, 40 45, 10, 19, 25, 32 46, 11, 22, 44 47, 8 48, 0, 2, 46 49, 33, 42 50, 28, 39 51, 7, 17, 28 52, 1, 19 53, 32, 34 54, 0, 2, 46 55, 15, 30, 45 56, 39, 49 57, 46 58, 4, 9, 19 59, 0, 2, 16, 19, 46 60, 17, 21, 40 61, 2, 4, 6, 9, 39 62, 23, 24, 40, 41, 43 63, 13, 27, 28 64, 40 65, 12, 14, 44 66, 27, 28 67, 1, 36 68, 12, 31, 36, 48 69, 3, 18, 35 70, 5, 12, 16 71, 30, 49 72, 7, 29, 30 73, 17, 19, 30 74, 1, 13, 36 75, 19, 33, 49 76, 1, 5, 14, 38 77, 31 78, 1, 5, 8, 14, 22, 26 79, 7, 27, 30, 36, 37, 43 80, 13, 35, 42 81, 5, 22, 32 82, 7, 11, 20, 28, 37, 45 83, 23, 24, 40, 41 84, 3, 5, 14, 22, 36 85, 12, 31, 36, 48 86, 45 87, 12, 31, 36, 48 88, 14, 44 89, 5, 19, 22, 34, 43 90, 0, 2, 45, 46 91, 16, 32, 45, 47, 48 92, 1, 48 93, 1, 3, 18, 35 94, 7, 11, 37, 45 95, 16, 26, 32, 41, 45 96, 1, 29, 30 97, 1, 19 98, 1 99, 14, 22, 37 100, 22, 34, 40 101, 31 102, 16, 17, 43 103, 8, 19, 20, 48 104, 1, 12, 15, 27, 31, 34, 36, 48 105, 3, 40, 45 106, 13, 18, 21, 41, 47, 48 107, 3, 18, 22, 27 108, 6, 16, 17, 31, 33, 42 109, 5, 22, 27, 35 110, 7, 15, 49 111, 6, 12, 26, 29, 31, 32, 36, 48 112, 11, 13, 14, 44 113, 23, 33, 35 114, 5, 8, 18, 29, 46 115, 41 116, 1, 3, 8, 27, 28 117, 1, 11, 19, 33 118, 23, 24, 40, 41 119, 16, 18, 23, 27, 29, 30, 40, 49 120, 14, 18, 44 121, 0, 2, 46 122, 3, 18, 35 123, 12, 31, 36, 48 124, 7, 11, 20, 29, 37, 45 125, 2, 24, 27 126, 5, 13, 22 127, 22, 42, 46, 47 128, 39, 41 129, 13, 23 130, 1, 24, 40, 42 131, 14, 20, 26, 44 132, 12, 31, 36, 48 133, 7, 21, 27, 28, 33, 36, 47 134, 17, 29, 47 135, 12, 31, 36 136, 4, 9, 11 137, 3, 11, 14, 18, 29, 35 138, 20, 33, 42 139, 22, 44, 46 140, 5, 6, 16, 22, 33, 38, 49 141, 0, 2, 12, 24, 26, 31, 46 142, 0, 6, 25 143, 5, 19, 22 144, 11, 13, 14, 35, 48 145, 17, 29, 35, 44, 47 146, 7, 15 147, 5, 22 148, 0, 1, 2, 46 149, 4, 9, 18, 41 150, 1, 12, 18, 19, 39 151, 0, 2, 19, 22, 46 152, 33, 37, 42 153, 0, 2, 37, 46 154, 11, 18, 35, 45 155, 4, 9, 15 156, 19, 24, 28, 35, 49 157, 5, 22, 37 158, 0, 2, 5, 19, 33, 35, 39, 46 159, 3, 4, 5, 48 160, 4, 6, 18, 28, 35 161, 3, 18, 31, 35 162, 6, 17, 25, 49 163, 17 164, 3, 8, 9, 20, 22, 23, 42 165, 4, 15, 17, 21, 26, 36, 48 166, 14, 41, 44 167, 19, 28, 42 168, 4, 9 169, 13, 31, 33, 41, 42 170, 5, 22, 28 171, 0, 16, 32 172, 5, 28, 43 173, 24, 36, 37, 42 174, 31 175, 40, 42 176, 11, 34, 48 177, 14, 28, 40, 43 178, 0, 13, 26 179, 16, 32, 45 180, 10, 14, 44, 46 181, 4, 7, 9, 19, 36 182, 23, 24, 40, 41, 43 183, 4, 9, 37 184, 5, 22, 31 185, 21, 24, 29 186, 0, 6, 22, 46 187, 3, 18, 21, 35, 39, 46 188, 12, 31, 36, 38, 48 189, 28 190, 23, 24, 40, 41, 43 191, 12, 24 192, 6, 27, 28 193, 23, 24, 40, 41, 43 194, 27, 28 195, 5, 14, 16, 22 196, 5, 22 197, 27, 28, 44, 47 198, 5, 22 199, 0, 2, 46 200, 0, 10, 30 201, 15, 33, 42 202, 4, 9, 40 203, 1, 36 204, 23, 24, 40, 41 205, 25, 27, 28, 32 206, 7, 11, 37, 45 207, 20, 48 208, 7, 11, 37, 45 209, 9, 27, 28 210, 7, 11, 37 211, 1, 2, 9, 22, 48 212, 7, 27, 39, 45 213, 1, 6, 8, 15, 18, 19, 21, 42 214, 23, 24, 40, 41 215, 4, 14, 20, 32, 36, 44 216, 16, 32, 45 217, 18, 26, 42, 45 218, 2, 11, 23 219, 11, 15, 35 220, 31, 39, 43, 45, 46 221, 0, 2, 28, 40, 46 222, 13, 24, 26, 35, 39, 47 223, 7, 21, 23, 30, 36 224, 23, 40, 47 225, 5, 22 226, 1, 5, 19, 30 227, 7, 15, 49 228, 7, 11, 37, 45 229, 23, 31, 34, 45 230, 7, 29, 48 231, 27, 28 232, 24, 44 233, 4, 5, 9, 39 234, 5, 22, 25, 28, 34, 46 235, 23, 32, 48 236, 16, 42 237, 5, 22 238, 4, 9, 15, 25 239, 1, 12, 16, 17, 19, 26, 48 240, 1, 3, 16, 19, 35 241, 11, 12 242, 4, 9, 10, 12, 45 243, 12, 31, 36, 46, 48 244, 17, 28, 47 245, 3, 18, 35 246, 4, 9 247, 7, 11, 37, 45 248, 5, 12, 22 249, 6, 14, 38, 48 250, 9, 10, 34, 36, 42 251, 3, 18, 35 252, 3 253, 14, 27 254, 0, 1, 19 255, 3, 18, 24, 35 256, 12, 31, 36, 48 257, 19, 31, 41 258, 1, 11, 14, 32, 34, 35 259, 5, 14, 28, 39, 49 260, 9, 14, 21, 46 261, 6 262, 5, 22, 31 263, 11, 40 264, 0, 2, 46 265, 23, 24, 40, 41, 43 266, 3, 14, 23, 44 267, 18, 22, 42, 49 268, 3, 24 269, 12, 31, 36, 48 270, 20, 25, 27 271, 15, 30, 39 272, 7, 15, 49 273, 12, 31, 36, 48 274, 16, 18, 32, 42 275, 27, 31, 33, 42 276, 5, 9, 22, 43 277, 2, 4, 36, 40, 41, 48 278, 19, 28, 30, 32 279, 2, 31, 39, 44 280, 1, 2, 19, 21, 23, 28, 45 281, 7, 10, 14, 16, 23, 32, 45 282, 4, 9, 14, 36, 44 283, 21, 29 284, 9, 13, 17, 46 285, 38, 39 286, 1, 8, 19 287, 0, 33, 42 288, 2, 33, 36, 47 289, 12, 31, 36 290, 28 291, 16, 27, 28 292, 7, 15, 49 293, 8, 21, 24, 33, 34, 40, 42, 44 294, 1, 14, 26, 32, 41 295, 23, 33, 42 296, 14, 21, 23, 24 297, 0, 2, 46 298, 11, 29 299, 1, 23, 33, 42, 47 300, 16, 32, 45 301, 3, 4, 9, 33, 37, 43 302, 19, 25, 30, 43, 46 303, 0, 2, 46 304, 16, 17, 34 305, 5, 21, 22, 25, 27, 31, 42 306, 5, 12, 16, 31, 36 307, 8, 22, 42 308, 3, 27, 36 309, 16, 32, 45 310, 12, 31, 36, 48 311, 5, 22 312, 7, 11, 37, 45 313, 9, 13, 20, 21, 24, 39, 41, 45 314, 1, 9, 15, 24, 37 315, 4, 9 316, 18, 35, 45 317, 0, 2, 46 318, 5, 10, 14, 47 319, 1, 4, 10, 14, 25, 36, 49 320, 12, 34 321, 7, 15, 49 322, 23, 28 323, 1, 19 324, 0, 2, 46 325, 1, 2, 19 326, 7, 11, 37, 45 327, 29, 33, 38, 45, 48 328, 4, 9, 24 329, 1, 19, 23, 30, 44 330, 5, 13 331, 12, 31, 36, 48 332, 0, 2, 46 333, 5, 14, 20, 24, 28, 31, 39, 46 334, 4, 9, 13, 33, 43, 46 335, 5, 14, 22, 29 336, 2, 8, 10, 19, 35, 45 337, 14, 24, 34, 44 338, 5, 22, 28, 30, 47 339, 0, 28, 47 340, 7, 15, 49 341, 5, 9, 28, 41 342, 1, 7, 11, 37 343, 26 344, 44 345, 0, 2, 46 346, 5, 10, 26, 30 347, 8, 12, 14, 33, 44, 47 348, 3, 4, 27, 42 349, 9, 11, 40, 45 350, 3, 4, 9, 11, 12, 47 351, 5, 22, 35 352, 26, 29, 45 353, 4, 9, 31 354, 0, 13, 27, 28, 31, 36 355, 17, 32, 47 356, 1, 31, 41 357, 14, 43, 44 358, 18, 35 359, 5, 10, 16 360, 33, 37 361, 4, 30, 31 362, 9, 14, 25 363, 14, 44 364, 23, 27, 28 365, 9, 18, 22 366, 5, 8, 22 367, 10, 29 368, 3, 15, 16, 20, 33, 45 369, 4, 8, 12, 25, 34 370, 9, 26, 28 371, 7, 9, 15, 49 372, 3, 48, 49 373, 5, 21, 30, 31, 43 374, 4, 9 375, 1, 19, 28 376, 4, 9, 26, 33, 47 377, 1, 13, 19, 41 378, 15, 18, 41 379, 14, 28, 48 380, 5, 22, 47 381, 49 382, 7, 15, 49 383, 8, 28, 47 384, 8, 25, 29 385, 0, 4, 10, 21 386, 41 387, 5, 23, 26, 44 388, 25 389, 4, 9 390, 12, 17, 26, 29, 31, 47 391, 3, 18, 35 392, 7, 11, 34, 37, 45 393, 9, 13, 18, 23, 25, 33, 40 394, 15 395, 5, 17, 22, 40, 48 396, 23, 33, 46 397, 7, 15, 49 398, 16, 32, 45 399, 7, 15, 49 400, 1, 48 401, 2 402, 36, 39, 49 403, 3, 4, 10 404, 34, 36 405, 7, 17, 34, 35, 46 406, 5, 22 407, 32, 34, 36, 42 408, 14, 46 409, 3, 18, 29, 35, 37, 48 410, 27, 33, 42 411, 27, 28 412, 27, 28 413, 4, 9, 13, 21 414, 1, 5, 7, 10, 21, 22, 30, 31 415, 7, 15, 49 416, 4, 5, 14, 23, 42 417, 17, 29, 47 418, 30 419, 5, 33, 42 420, 27, 28, 31 421, 7, 15, 49 422, 16, 32, 45 423, 1, 2, 8, 25, 32 424, 12, 13 425, 16, 44 426, 0, 17, 24 427, 26 428, 4, 10, 27, 28, 43 429, 14 430, 4, 19, 47 431, 33, 42 432, 47 433, 8, 17, 23, 29, 43, 47 434, 0, 5, 33, 46 435, 9, 18, 35, 38, 40, 47 436, 2, 4, 11, 39 437, 4, 5, 9, 23, 24 438, 4, 24, 32 439, 8, 47 440, 2, 19 441, 2, 4, 9, 40 442, 0, 2, 46 443, 11, 19, 33, 42 444, 16, 32, 45 445, 5, 7, 22, 32, 42 446, 4, 33, 47 447, 19, 27, 36 448, 1, 28, 40 449, 23 450, 4, 9, 31, 33 451, 4, 28, 47 452, 25, 27, 34, 49 453, 3, 18, 35 454, 9, 27, 28 455, 14, 15, 35, 36 456, 14, 27, 28 457, 12, 16, 34 458, 0, 2, 13, 18, 24, 36, 46, 47 459, 14, 32, 44 460, 16, 32, 45 461, 2, 8, 16 462, 7, 15, 49 463, 14, 26, 30, 39, 44 464, 1, 23, 32 465, 0, 12, 20, 22, 49 466, 16, 20, 32, 49 467, 23, 24, 40, 41 468, 1, 3, 29, 41, 42, 43, 46 469, 5, 16, 25, 48 470, 17, 29, 47 471, 11, 16, 32, 45 472, 4, 9, 13 473, 17, 47 474, 11, 32 475, 12, 33, 42, 46 476, 23, 24, 40, 41 477, 9, 13, 14, 33, 38, 49 478, 14, 15, 26, 47 479, 3, 18, 35 480, 3, 21, 44 481, 3, 46 482, 9 483, 16, 24, 30, 31, 48 484, 19 485, 33, 42 486, 4, 34 487, 23, 24, 40, 41, 43 488, 5, 22 489, 10, 31 490, 4, 17, 40, 47 491, 39, 47 492, 14, 15, 31 493, 5, 26, 33, 42, 44 494, 13, 30, 38 495, 2, 3, 18, 35, 47 496, 12, 31, 36, 48 497, 17, 27, 28 498, 17, 29, 47 499, 14, 44 500, 0, 2, 46 501, 0, 21, 33, 39 502, 14, 27, 44 503, 12, 31, 36 504, 5, 18 505, 7, 11, 37, 45 506, 0, 20 507, 23, 30 508, 0, 14, 16, 32 509, 8 510, 13, 26, 27, 28, 46 511, 4, 9, 12, 17, 37 512, 10, 20, 37 513, 3, 7, 11, 21, 23 514, 21, 31, 32 515, 0, 1, 5, 23, 32, 42, 44 516, 0, 2, 46 517, 34, 37, 47 518, 16, 28, 32, 45 519, 3, 18, 35 520, 0, 5, 23, 33, 35, 46, 48 521, 14, 28, 29, 44 522, 13, 15, 24 523, 8, 22, 23 524, 11, 14, 26, 28, 41, 43, 45 525, 22, 39 526, 7, 11, 37, 45 527, 0, 2, 15, 16, 22, 35, 46 528, 41, 42 529, 6, 9, 30 530, 4, 13, 37, 45, 49 531, 0, 16 532, 14, 27, 28, 37, 48 533, 13, 28 534, 13, 18 535, 14, 29, 31, 35, 41, 49 536, 15, 34, 46, 48 537, 15, 28 538, 3, 18, 35 539, 12, 31, 36, 48 540, 33, 35, 42 541, 16, 32, 45 542, 7, 9, 15, 43 543, 7, 11, 37, 45 544, 12, 31, 36, 48 545, 14 546, 5, 8, 14, 21, 33, 38 547, 0, 3, 10, 38, 40 548, 10, 14, 18, 47 549, 11, 37, 38 550, 1, 3, 18, 19, 35 551, 3, 5, 22 552, 9, 24, 27, 34 553, 4, 7, 9 554, 0, 12, 35 555, 28, 29, 37 556, 9, 11, 28, 33, 41 557, 14, 44 558, 4, 11, 41, 43 559, 17, 47 560, 16, 20, 27, 32, 45 561, 2, 14, 42 562, 4, 9, 21, 43 563, 17, 29, 47 564, 23, 28, 33, 42 565, 6, 46 566, 3, 18, 35 567, 5, 9, 14, 18, 40 568, 4, 9, 34 569, 13, 14, 43, 44 570, 27, 28 571, 16, 32 572, 6, 14, 22 573, 0, 10, 41 574, 0, 18, 27, 31, 34, 37, 39, 47 575, 5, 7, 22 576, 4, 7 577, 7, 15, 49 578, 4, 15 579, 1, 16, 18, 20, 25, 29, 36, 46 580, 13, 30, 42 581, 24, 25, 31 582, 4, 7, 15, 18 583, 5, 22 584, 28 585, 23, 33, 42 586, 44 587, 15, 19, 27, 28 588, 0, 11, 18, 23, 47 589, 5, 22 590, 7, 11, 37 591, 7, 11, 37, 45 592, 6, 16, 33, 39, 42 593, 23, 24, 40, 41 594, 0, 9, 30, 31 595, 1, 32, 48 596, 2, 27, 28, 43 597, 1, 19 598, 35 599, 3, 10, 18, 35 600, 4, 9, 12, 48 601, 1, 6, 19 602, 7, 15, 49 603, 20, 22, 27 604, 13 605, 15, 16, 26 606, 3, 18, 35 607, 19 608, 5, 22 609, 1, 19 610, 12, 36, 37, 39 611, 2, 3, 10, 34 612, 30, 44 613, 13, 20, 22, 47 614, 4, 12, 26, 45 615, 13, 27, 35, 36 616, 0, 2, 46 617, 7, 10 618, 1, 2, 34, 39 619, 7, 11, 37, 45 620, 39, 43, 44 621, 3, 14, 30 622, 2, 12, 30, 32, 49 623, 1, 19, 32 624, 10, 21, 28, 31 625, 9, 17, 27, 28, 45 626, 10, 11, 16, 32 627, 12, 31, 36, 48 628, 3, 18, 29, 35, 40 629, 23, 24, 40, 41, 43 630, 14, 17, 29, 44, 47 631, 38 632, 0, 2, 46 633, 3, 18, 20, 35 634, 17, 29, 43, 47 635, 0, 6, 19, 26, 43 636, 3, 18, 35 637, 5, 21, 26, 27, 28, 39, 42, 49 638, 12, 31, 36, 48 639, 0, 2, 30, 46, 47 640, 5, 22, 33 641, 14, 33, 42 642, 0, 30, 43, 46 643, 11, 17, 35, 47 644, 0, 2, 46 645, 16, 32, 45 646, 20, 37, 39, 42 647, 14, 30, 44 648, 36, 39 649, 1, 17, 25, 37, 41, 49 650, 30, 36, 44 651, 12, 31, 36, 48 652, 24, 27, 28, 34, 39, 45, 48 653, 0, 37, 46 654, 18, 25, 44 655, 4, 9 656, 5, 22 657, 33, 42 658, 0, 26, 27, 28 659, 5 660, 2, 22, 33, 42 661, 7, 15, 49 662, 4, 9, 13, 17, 37 663, 5, 22, 35, 38, 47 664, 0, 2, 46 665, 27, 28, 39, 42 666, 12, 31, 36, 48 667, 27, 28 668, 16, 32, 45, 49 669, 21, 40 670, 5, 22, 48 671, 23, 33, 39, 42, 45 672, 1, 7, 25 673, 33, 42 674, 16, 19, 32, 38, 45 675, 7, 23, 30, 46 676, 42 677, 2, 10, 23, 42 678, 26, 30, 48 679, 7, 15, 49 680, 12, 25, 39, 48 681, 16, 17, 33, 47, 49 682, 49 683, 27, 45 684, 1, 11, 21, 42, 43 685, 8, 9, 22 686, 27, 28 687, 0, 2, 26, 43, 46 688, 16, 32, 45 689, 10, 22, 39 690, 7, 11, 14, 18, 23 691, 5, 22, 43 692, 17, 23, 29, 47 693, 1, 2, 15, 19 694, 7, 11, 37, 45 695, 5, 12, 22, 44 696, 8, 11, 22, 37 697, 20, 29 698, 7, 9, 11, 13, 37, 39, 45 699, 7, 11, 37, 45 700, 3, 36, 42, 49 701, 16, 32, 45 702, 21, 35 703, 0, 2, 46 704, 10, 18, 44 705, 6, 15, 21, 27, 28, 34, 41, 46 706, 7, 11, 37, 45 707, 16, 40 708, 37, 43 709, 7, 15, 29, 49 710, 12, 31, 36, 48 711, 5, 22, 24 712, 12, 13, 14, 32 713, 4, 5, 9, 32 714, 10, 40, 44 715, 3, 18, 35 716, 1, 13, 20, 31, 45 717, 37, 47, 48 718, 4, 9 719, 1, 19 720, 4, 12, 31 721, 10, 33 722, 14, 44 723, 5, 15, 34 724, 23, 36 725, 5, 29, 32, 35, 42 726, 3, 11, 29 727, 4, 6, 8, 10, 18, 22, 35, 37 728, 21, 24, 27 729, 33, 39 730, 27, 28 731, 6, 10, 44 732, 22 733, 0, 46 734, 16, 32, 45 735, 3, 9, 37, 42, 44 736, 19, 21, 23, 27, 30 737, 5, 19, 30 738, 5, 15, 19, 22, 23 739, 5, 22 740, 3, 18, 35 741, 17, 29, 47 742, 12, 26, 31, 36, 41, 48 743, 3, 18, 35 744, 10 745, 7, 15 746, 28, 43, 49 747, 12, 27, 28 748, 1, 19 749, 17, 21 750, 0, 18, 24, 43, 48 751, 16, 32, 45 752, 0, 11, 33 753, 3 754, 9, 15, 40 755, 12, 30, 49 756, 45 757, 23, 24, 40, 41, 43 758, 14, 40, 44 759, 15, 41, 47 760, 27, 28, 29 761, 0, 2, 23, 46 762, 7, 8, 11, 20 763, 6, 15, 22, 32 764, 5, 13, 26 765, 2, 20 766, 1, 7, 15, 48 767, 34, 45 768, 12, 24, 31, 46 769, 26, 27, 28 770, 23, 24, 40, 41 771, 20, 27, 29 772, 26 773, 27, 28 774, 1, 13, 29 775, 11, 23, 25, 36, 38, 45 776, 17, 29, 47 777, 22, 30, 39 778, 33, 42, 45, 47 779, 0, 2, 12, 39, 41, 46, 49 780, 12, 31, 36, 48 781, 12, 17, 27, 43, 45 782, 17, 47 783, 12, 31, 36, 48 784, 8, 20, 29, 32, 46 785, 10, 22, 23, 26, 36 786, 7, 20, 26, 44, 47 787, 2, 18, 27, 28, 33 788, 14, 21, 23, 24, 30, 42, 46 789, 18, 35 790, 1 791, 0, 14, 20, 44, 46 792, 5, 7, 11, 24, 27 793, 0, 18, 25, 39 794, 1, 19, 27 795, 23, 24, 40, 41, 43 796, 16 797, 0, 6, 28, 33, 35, 46, 48 798, 1, 19, 44 799, 17, 29, 47 800, 6, 31 801, 23, 24, 40, 41, 43 802, 33, 42 803, 1, 3, 19, 29, 31, 44 804, 17, 29, 47 805, 27, 31, 32, 36, 46 806, 16, 32, 45 807, 1, 19, 34, 44 808, 0, 2, 7, 18, 48 809, 14, 19 810, 7, 15, 49 811, 5, 13, 22, 30 812, 17, 29, 47 813, 13, 27, 28, 42, 48 814, 1, 5, 22 815, 4, 9 816, 7, 11, 37, 45 817, 0, 3, 7, 22, 37, 39, 40 818, 1, 19 819, 22, 26 820, 33, 37, 42 821, 4, 8, 13, 27, 28, 46 822, 27, 28, 40 823, 27, 28 824, 33, 42 825, 5, 22 826, 14, 22, 44 827, 16, 32, 45 828, 3, 16, 28, 48 829, 22, 23, 24, 39 830, 15, 26, 28, 33, 36 831, 7, 15, 49 832, 15, 22, 27, 31, 33, 40 833, 18, 35, 41, 43, 49 834, 4, 9 835, 12, 31, 36 836, 1, 19, 28, 31, 38, 44, 48 837, 4, 9, 18 838, 6, 32, 34 839, 15, 42 840, 13, 27, 28, 36 841, 7, 15, 22, 37, 43, 49 842, 7, 30, 43 843, 15, 25, 49 844, 22, 33, 41 845, 34 846, 4, 14, 21, 25, 41 847, 20, 23, 33, 42 848, 9, 13, 22, 42, 45 849, 0, 14, 22, 29, 46 850, 1, 19 851, 3, 16, 32, 45 852, 4, 9 853, 4, 9, 21, 39 854, 3, 18, 39 855, 4, 13, 32 856, 17, 29, 47 857, 12, 31, 36, 48 858, 14, 15 859, 23, 46 860, 5, 22 861, 3, 18, 35 862, 12, 22, 47, 48 863, 3, 5, 18, 35 864, 23, 24, 40, 41 865, 22 866, 8, 12, 16, 18, 29, 34, 49 867, 33, 34, 42, 44 868, 13, 23, 24, 41 869, 23, 24, 40, 41, 43 870, 13, 25 871, 5, 36, 41, 42 872, 5, 22, 31, 37, 47 873, 5, 7, 20 874, 4, 14, 16, 37, 44 875, 3, 18, 35 876, 8, 21, 29, 33, 42 877, 5, 14, 18 878, 27 879, 14, 20, 22, 24 880, 1, 3, 14, 31, 44, 45, 47 881, 29, 36, 41, 46 882, 16, 32, 45 883, 23, 24, 40, 41, 43 884, 7, 11, 19, 38, 42, 43, 47 885, 0, 27, 28, 32 886, 0, 2, 28, 41, 46 887, 23, 24, 40, 41, 43 888, 3, 18, 35 889, 14, 31 890, 8, 14, 44 891, 3, 14, 18, 22, 35 892, 1, 3, 30 893, 6, 12, 41, 44 894, 11, 30, 43 895, 7, 15, 49 896, 0, 2, 46 897, 1, 17, 38, 43 898, 25, 36 899, 7, 9, 27 900, 5, 13, 22 901, 12, 31, 36, 48 902, 26, 39, 43 903, 12, 26, 38 904, 1, 16, 20, 40 905, 23, 24, 40, 41, 43 906, 6, 28, 37, 38, 49 907, 14, 16, 17, 18, 26, 31 908, 37, 44 909, 11, 30 910, 22, 28, 41, 48 911, 12, 31, 36, 48 912, 4, 5, 7, 21 913, 3, 9, 18, 35, 41 914, 5, 27, 28, 42 915, 5, 22, 31 916, 33, 42 917, 4, 9, 15 918, 7, 11, 37, 45 919, 23, 24, 40, 41, 43 920, 40 921, 1, 8, 14, 19 922, 14, 31, 44 923, 6, 28, 33, 39 924, 0, 6, 16, 30, 39, 47 925, 12, 31, 36 926, 1, 36, 46 927, 27, 42, 44 928, 4, 9 929, 20 930, 5, 22 931, 10, 16, 26 932, 13, 36, 42 933, 12, 33, 42, 47 934, 4, 27, 28 935, 1, 19, 39 936, 1, 16, 32 937, 12, 31, 36 938, 16, 32, 41, 45 939, 16, 33, 42 940, 12, 13, 31, 35, 36, 41, 44 941, 6, 9, 19, 24, 44 942, 23, 24, 40, 41, 43 943, 5, 12, 31, 36, 48 944, 0, 2, 20, 44, 46 945, 3, 18, 35 946, 4, 12, 14, 19, 32, 48 947, 41 948, 0, 14, 44 949, 1, 10, 13, 19, 25, 26, 33, 39 950, 7, 15, 16 951, 4, 29, 46 952, 14, 20, 40, 43 953, 7, 38 954, 3, 18, 45, 47 955, 19, 29, 43 956, 9, 16 957, 0, 20, 42 958, 5, 10, 32, 33, 39, 44 959, 27, 28 960, 5, 22, 25, 29, 37, 43 961, 19 962, 4, 16, 34 963, 9, 17, 44, 45 964, 7, 15, 29 965, 7, 11, 27, 37, 39, 45, 49 966, 12, 31, 36, 48 967, 34, 49 968, 31 969, 25, 33, 42, 45 970, 16, 27, 32, 45 971, 25, 41 972, 6, 9, 15, 24, 25, 40, 45 973, 7, 8, 9, 11, 29, 37, 45 974, 21, 36 975, 5, 7, 14, 17, 22 976, 38, 40 977, 2, 7, 15, 22, 25, 33, 41 978, 27, 28 979, 16, 32, 45 980, 8, 18, 28 981, 13 982, 41 983, 27, 28, 38, 48 984, 3, 18, 35 985, 16, 20, 32, 36 986, 16, 32, 45 987, 4, 9 988, 22, 31, 39 989, 0, 2, 46 990, 34 991, 0, 2, 46 992, 0 993, 3, 6, 13 994, 3, 29, 44 995, 9, 27, 28, 45 996, 2, 10, 22, 31, 33, 46 997, 2, 6, 22, 32 998, 0, 4, 9, 30, 33 999, 3, 18, 35 1000, 15, 34, 47 package second; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.Map; import java.util.Set; /* * date:2015-06-10 * by wenbaoli * */ public class Apriori { private float min_sup;//minimum support private float min_conf;//minimum confidence private Map<Integer,Set<String>> TransDataBase;//transaction database private Integer DBnum; private Map<Integer,Map<Set<String>,Float>> freqItermSet;//frequent iterm set,from 1 to k... private Map<Set<String>,Set<Map<Set<String>,Float>>> associationRules;//the final associate rules public Apriori(Map<Integer,Set<String>> DB , float minSup, float minConf){ this.TransDataBase = DB; this.min_conf = minConf; this.min_sup = minSup; this.DBnum = DB.size(); freqItermSet =new HashMap<Integer,Map<Set<String>,Float>>(); associationRules = new HashMap<Set<String>,Set<Map<Set<String>,Float>>>(); } /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub //initial String file = "data/data.txt"; String delimeter = ","; //load data Map<Integer,Set<String>> data = new HashMap<Integer,Set<String>>(); int num = 0; try { File mFile = new File(file); FileReader fr = new FileReader(mFile); BufferedReader br = new BufferedReader(fr); String line; while((line=br.readLine())!= null) { line = line.trim(); String[] str = line.split(delimeter); // int i = Integer.parseInt(str[0]); Set<String> set = new HashSet<String>(); for (int i = 1; i < str.length; i++) { set.add(str[i].trim()); } num++; data.put(num, set); } br.close(); } catch(IOException ex) { ex.printStackTrace(); } Apriori ap = new Apriori(data,0.005f,0.6f); ap.findAllFreqItermSet(); ap.findAssociationRules(); } public void findAllFreqItermSet(){ //找出频繁一项集 Map<Set<String>,Float> f1 = new HashMap<Set<String>,Float>(); Map<Set<String>,Integer> OneItermSet = new HashMap<Set<String>,Integer>(); Iterator<Map.Entry<Integer, Set<String>>> it = this.TransDataBase.entrySet().iterator(); while(it.hasNext()){ Map.Entry<Integer, Set<String>> entry = it.next(); Set<String> itermSet = entry.getValue(); for(String iterm:itermSet){ Set<String> key = new HashSet<String>(); key.add(iterm); if(!OneItermSet.containsKey(key)){ OneItermSet.put(key, 1); }else{ int value = 1 + OneItermSet.get(key); OneItermSet.put(key, value); } } } //找出支持度大于minSup的频繁一项集 Iterator<Map.Entry<Set<String>,Integer>> iter = OneItermSet.entrySet().iterator(); while(iter.hasNext()){ Map.Entry<Set<String>,Integer> entry = iter.next(); //计算支持度 Float support = new Float(entry.getValue().toString())/new Float(this.DBnum); if(support >= this.min_sup){ f1.put(entry.getKey(), support); } } System.out.println("频繁1" + "项集:" + f1);//打印频繁1-项集 System.out.println("-------------------------------------------"); this.freqItermSet.put(1, f1); //由频繁k项集得到频繁k+1项集 int k = 2; while(true){ Set<Set<String>> candFreItermSets = this.apriori_gen(k,this.freqItermSet.get(k-1).keySet()); Map<Set<String>,Float> freqKItermSetMap = this.getFreqKItermSet(k,candFreItermSets); if(!freqKItermSetMap.isEmpty()){ this.freqItermSet.put(k, freqKItermSetMap); } else { break; } System.out.println("频繁" + k + "项集:" + freqKItermSetMap); System.out.println("-------------------------------------------"); k++; } } public Map<Set<String>, Float> getFreqKItermSet(int k, Set<Set<String>> candFreItermSets) { Map<Set<String>,Integer> candFreqKItermSetMap = new HashMap<Set<String>,Integer>(); //扫描事物数据库 Iterator<Map.Entry<Integer, Set<String>>> it = this.TransDataBase.entrySet().iterator(); //统计支持度计数 while (it.hasNext()){ Map.Entry<Integer, Set<String>> entry = it.next(); Iterator<Set<String>> iter = candFreItermSets.iterator(); while(iter.hasNext()){ Set<String> set = iter.next(); if(entry.getValue().containsAll(set)){ if(!candFreqKItermSetMap.containsKey(set)){ candFreqKItermSetMap.put(set, 1); }else { int value = 1+ candFreqKItermSetMap.get(set); candFreqKItermSetMap.put(set, value); } } } } Iterator<Map.Entry<Set<String>, Integer>> iter = candFreqKItermSetMap.entrySet().iterator(); Map<Set<String>,Float> freqKIntermSet = new HashMap<Set<String>,Float>(); while(iter.hasNext()){ Map.Entry<Set<String>, Integer> entry = iter.next(); float support = new Float(entry.getValue().toString())/this.DBnum; if(support < this.min_sup){ iter.remove(); } else { freqKIntermSet.put(entry.getKey(), support); } } return freqKIntermSet; } public Set<Set<String>> apriori_gen(int k, Set<Set<String>> fk){ Set<Set<String>> ck = new HashSet<Set<String>>(); Iterator<Set<String>> it1 = fk.iterator(); while (it1.hasNext()) { Set<String> itermSet1 = it1.next(); Iterator<Set<String>> it2 = fk.iterator(); while (it2.hasNext()) { Set<String> itermSet2 = it2.next(); if(!itermSet1.equals(itermSet2)) { //连接步 Set<String> commIterms = new HashSet<String>(); commIterms.addAll(itermSet1); commIterms.retainAll(itermSet2); if(commIterms.size() == (k - 2)){ Set<String> candIterms = new HashSet<String>(); candIterms.addAll(itermSet1); candIterms.removeAll(itermSet2); candIterms.addAll(itermSet2); //剪枝步骤 if(!this.has_infrequent_subset(candIterms, fk)){ ck.add(candIterms); } } } } } System.out.println(ck.size()); return ck; } public boolean has_infrequent_subset(Set<String> set,Set<Set<String>> fk){ Set<Set<String>> subItermSet = new HashSet<Set<String>>(); Iterator<String> itr = set.iterator(); while(itr.hasNext()){ //深拷贝 Set<String> subItem = new HashSet<String>(); Iterator<String> it = set.iterator(); while(it.hasNext()){ subItem.add(it.next()); } //去掉一个项后为k-1子集 subItem.remove(itr.next()); subItermSet.add(subItem); } Iterator<Set<String>> it = subItermSet.iterator(); while(it.hasNext()){ if(!fk.contains(it.next())){ return true; } } return false; } public void findAssociationRulesTemp(){ } public void findAssociationRules(){ Iterator<Map.Entry<Integer, Map<Set<String>,Float>>> it = this.freqItermSet.entrySet().iterator(); while (it.hasNext()) { Map.Entry<Integer, Map<Set<String>,Float>> entry = it.next(); for (Set<String> itemSet : entry.getValue().keySet()) { int n = itemSet.size() / 2; for (int i = 1; i <= n; i++) { Set<Set<String>> subset = this.getProperSubset(i,itemSet); for(Set<String> conditionSet:subset){ Set<String> conclusionSet = new HashSet<String>(); conclusionSet.addAll(itemSet); conclusionSet.removeAll(conditionSet); int s1 = conditionSet.size(); int s2 = conclusionSet.size(); float supF = this.freqItermSet.get(s1).get(conditionSet); float supS = this.freqItermSet.get(s2).get(conclusionSet); float sup = this.freqItermSet.get(s1+s2).get(itemSet); float conf1 = sup/supF; float conf2 = sup/supS; if(conf1 >= this.min_conf){ if(this.associationRules.get(conditionSet) == null){ Set<Map<Set<String>,Float>> conclusionSetSet = new HashSet<Map<Set<String>,Float>>(); Map<Set<String>,Float> sets = new HashMap<Set<String>,Float>(); sets.put(conclusionSet, conf1); conclusionSetSet.add(sets); this.associationRules.put(conditionSet, conclusionSetSet); } else { Map<Set<String>,Float> sets = new HashMap<Set<String>,Float>(); sets.put(conclusionSet, conf1); associationRules.get(conditionSet).add(sets); } } if(conf2 >= this.min_conf){ if(this.associationRules.get(conditionSet) == null){ Set<Map<Set<String>,Float>> conclusionSetSet = new HashSet<Map<Set<String>,Float>>(); Map<Set<String>,Float> sets = new HashMap<Set<String>,Float>(); sets.put(conclusionSet, conf2); conclusionSetSet.add(sets); this.associationRules.put(conditionSet, conclusionSetSet); } else { Map<Set<String>,Float> sets = new HashMap<Set<String>,Float>(); sets.put(conclusionSet, conf2); associationRules.get(conditionSet).add(sets); } } } } } } System.out.println("关联规则(强规则):" + associationRules); } private Set<Set<String>> getProperSubset(int i, Set<String> itemSet) { Set<Set<String>> subset = new HashSet<Set<String>>(); if(itemSet.size() <= 1){ return null; }else if(itemSet.size() == 2){ for(String s: itemSet){ Set<String> set = new HashSet<String>(); set.add(s); if(!subset.contains(s)){ subset.add(set); } } return subset; }else { Iterator<String> it = itemSet.iterator(); String s = it.next(); Set<String> temp = new HashSet<String>(itemSet); temp.remove(s); //包含s的子集 Set<Set<String>> subset0 = new HashSet<Set<String>>(); subset0 = this.getProperSubset(i-1, temp); subset.addAll(subset0); //不包含s的子集 Set<Set<String>> subset1 = new HashSet<Set<String>>(); subset1 = this.getProperSubset(i, temp); subset.addAll(subset1); return subset; } } }转载于:https://www.cnblogs.com/wenbaoli/p/5655746.html