中国IT动力,最新最全的IT技术教程
最新100篇 | 推荐100篇 | 专题100篇 | 排行榜 | 搜索 | 在线API文档
首 页 | 程序开发 | 操作系统 | 软件应用 | 图形图象 | 网络应用 | 精文荟萃 | 教育认证 | 硬件维护 | 未整理篇 | 站长教程
ASP JS PHP工程 ASP.NET 网站建设 UML J2EESUN .NET VC VB VFP 网络维护 数据库 DB2 SQL2000 Oracle Mysql
服务器 Win2000 Office C DreamWeaver FireWorks Flash PhotoShop 上网宝典 CorelDraw 协议大全 网络安全 微软认证
硬件维护  CPU  主板  硬盘  内存  显卡  显示器  键盘鼠标  声卡音箱  打印机  机箱电源  BIOS  网卡  C#  Java  Delphi  vs.net2005
  当前位置:> 程序开发 > 编程语言 > 综合其它
我做得AprioriTid的实现
作者:未知 时间:2005-09-13 23:35 出处:Blog.ChinaUnix.net 责编:chinaitpower
              摘要:我做得AprioriTid的实现
因为毕业设计的原因,简单研究了一下数据挖掘。
AprioriTid是数据挖掘关联规则方向的一个算法,用于找出具有最小支持度的频繁项目集。同时代码中也实现了根据频繁项目集找出具有最小确信度的关联规则的基本算法。
使用Ruby语言实现。

def simple_count(candidate_transaction, minsupp_count)
support_count_of_itemset = {}
support_count_of_itemset.default = 0
candidate_transaction.each_value { |transaction|
transaction.each { |itemset|
support_count_of_itemset[itemset] += 1
}
}
support_count_of_itemset.each { |itemset, support_count|
support_count_of_itemset.delete(itemset) if support_count < minsupp_count
}
return support_count_of_itemset
end

def aprioritid(database, minsupp)
minsupp_count = minsupp * database.length
itemsets_with_support = [{}]
candidate_transaction_set_past = {}
database.each { |tid, transaction|
candidate_transaction_set_past[tid] = []
transaction.each { |item|
candidate_transaction_set_past[tid] << [item]
}
}
itemsets_with_support << simple_count(candidate_transaction_set_past, minsupp_count)
k = 1
while itemsets_with_support[k].length != 0
candidate_transaction_set = {}
candidate_itemsets = apriori_gen(itemsets_with_support[k].keys)
break if candidate_itemsets.length == 0
candidate_transaction_set_past.each { |tid, itemsets_past|
candidate_transaction_set[tid] = []
candidate_itemsets.each { |candidate_itemset|
contain = true
if contain
itemsets_past_flatten = itemsets_past.flatten
candidate_itemset.each { |candidate_item|
contain &= itemsets_past_flatten.include?(candidate_item)
}
end
if contain
candidate_transaction_set[tid] << candidate_itemset
end
}
}
itemsets_with_support << simple_count(candidate_transaction_set, minsupp_count)
k += 1
candidate_transaction_set_past.replace(candidate_transaction_set)
end
itemsets_with_support.each { |itemsets_with_support_for_each_pass|
@flatten.merge!(itemsets_with_support_for_each_pass)
}
return itemsets_with_support
end

def apriori_gen(itemsets)
candidate_itemsets = []
itemsets.each { |itemset1|
itemsets.each { |itemset2|
if itemset1.length == 1 and itemset1[-1] < itemset2[-1]
candidate_itemsets << [itemset1[-1], itemset2[-1]]
elsif itemset2.length == 2 and itemset1[-1] < itemset2[-1]
temp = []
temp.replace(itemset1)
temp << itemset2[-1]
contain = true
itemset1.each_index { |index|
check_temp = []
check_temp.replace(temp)
check_temp.delete_at(index)
contain &= itemsets.include?(check_temp)
}
candidate_itemsets << temp if contain and !candidate_itemsets.include?(temp)
elsif itemset1[0..-2] == itemset2[0..-2] and itemset1[-1] < itemset2[-1]
temp = []
temp.replace(itemset1)
temp << itemset2[-1]
contain = true
if contain
itemset1.each_index { |index|
check_temp = []
check_temp.replace(temp)
check_temp.delete_at(index).each { |check_temp_item|
contain &= itemsets.include?(check_temp_item)
}
}
end
candidate_itemsets << temp if contain
end
}
}
return candidate_itemsets
end

def find_out_rules(itemsets_with_support, minconf)
rules = []
one_item_consequent = []
(2..(itemsets_with_support.length - 1)).each { |itemset_with_support_index|
itemsets_with_support[itemset_with_support_index].each { |frequent_itemset, support|
frequent_itemset.each_index { |frequent_item_index|
temp = []
temp.replace(frequent_itemset)
temp.delete_at(frequent_item_index)
one_item_consequent << [frequent_itemset[frequent_item_index]] if support.to_f / @flatten[temp].to_f >= minconf
rules << [temp, [frequent_itemset[frequent_item_index]]]
}
rules.concat(rules_gen(frequent_itemset, one_item_consequent, minconf))
}
}
return rules.uniq
end

def rules_gen(frequent_itemset, m_item_consequent, minconf)
rules = []
m1_item_consequent = []
if frequent_itemset.length > m_item_consequent[0].length + 1
m1_item_consequent = apriori_gen(m_item_consequent)
m1_item_consequent.each_index { |consequent_index|
temp = []
temp.replace(frequent_itemset)
m1_item_consequent[consequent_index].each { |item|
temp.delete(item)
}
if @flatten[frequent_itemset].to_f / @flatten[temp].to_f >= minconf
rules << [temp, m1_item_consequent[consequent_index]]
else
m1_item_consequent.delete_at(consequent_index)
end
}
rules.concat(rules_gen(frequent_itemset, m1_item_consequent, minconf))
end
return rules
end

@flatten = {}

require('postgres')
puts('reading database at ' + Time.now.inspect)
db = PGconn.new('localhost', 5432, '', '', 'pgsql', 'pgsql', '')
result = db.exec("select id, name, gen, base, basegen from stock;")
db.close
database = {}
id = 0
name = ''
gen = 0.0
base = 0.0
basegen = 0.0
puts('converting at ' + Time.now.inspect)
result.result.each_index { |index|
id = result.result[index][0].to_i
name = result.result[index][1].to_s
gen = result.result[index][2].to_f
base = result.result[index][3].to_f
basegen = result.result[index][4].to_f
database[index] = [id, name, gen, base, basegen]
}
support = 0.5
confidence = 0.2
puts('aprioritid at ' + Time.now.inspect)
frequent_itemsets = aprioritid(database, support)
puts('find rules at ' + Time.now.inspect)
rules = find_out_rules(frequent_itemsets, confidence)
rules.each { |rule|
print('{')
rule[0].each { |item|
print(item)
print(', ')
}
print("\b\b} => {")
rule[1].each { |item|
print(item)
print(', ')
}
print("\b\b}\n")
}
关闭本页
 
首页 | 投资与合作 | 服务条款 | 隐私政策 | 收藏本站 | 设为首页 | 新用户注册 | 免责声明 | 使用帮助
Copyright ©2005-2008 chinaitpower.com All rights reserved. www.chinaitpower.com 版权所有