Commit be5415d5 authored by Linus Jahn's avatar Linus Jahn 🍙

finish apriori

parent ed4c2eb0
......@@ -24,49 +24,56 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""
transactions = {
"A": [1, 2, 3, 4],
"B": [1, 3, 4],
"C": [1, 2, 3, 4],
"D": [1, 5],
"E": [5],
"F": [5, 3, 6],
"G": [1, 5, 6],
"H": [1, 3, 5],
"I": [5],
"J": [3, 6],
"K": [1, 5, 6],
}
import itertools
def apriori(transactions, threshold = 0.5):
# find all different products
products = []
for pList in transactions.values():
for p in pList:
if not p in products:
products.append(p)
transactions = [
["Milch", "Wein", "Brot"],
["Saft", "Wein", "Wasser"],
["Milch", "Saft", "Wein", "Wasser"],
["Saft", "Wasser"]
]
def containsItems(items, transaction):
if type(items) == tuple:
for item in items:
if not item in transaction:
return False
else:
if not items in transaction:
return False
return True
# check how frequent products are
frequencies = {}
for p in products:
count = 0
for pList in transactions.values():
if p in pList:
count += 1
frequencies[p] = count / len(transactions)
def combinationFrequency(combination, transactions):
count = 0
for transaction in transactions:
if containsItems(combination, transaction):
count += 1
return count / len(transactions)
# selection of frequent products
def apriori(transactions, threshold = 0.5):
candidates = []
for p in frequencies.keys():
if frequencies[p] >= threshold:
candidates.append(p)
# combine candidates
for p1 in candidates:
for p2 in candidates:
if p1 == p2:
continue
for transaction in transactions:
for t in transaction:
if not t in candidates:
candidates.append(t)
loesungsmenge = []
len_candidates = len(candidates)
while len_candidates > 0:
newCandidates = []
for combination in candidates:
if combinationFrequency(combination, transactions) >= threshold:
loesungsmenge.append(combination)
newCandidates.append(combination)
candidates = []
for x in itertools.combinations(newCandidates, 2):
if type(x[0]) == tuple:
x = x[0] + x[1]
x = tuple(set(x))
candidates.append(x)
candidates = tuple(set(candidates))
len_candidates = len(candidates)
return loesungsmenge
apriori(transactions)
print(apriori(transactions))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment