Apriori算法的java版,Apriori算法java版,Apriori算法的ja
分享于 点击 45847 次 点评:247
Apriori算法的java版,Apriori算法java版,Apriori算法的ja
Apriori算法的java版,挖掘频繁模式!
[Java]代码
import java.util.ArrayList;import java.util.Collections;import java.util.HashMap;import java.util.Iterator;import java.util.List;import java.util.Map;public class Apriori { private final String TAB_SEPARATOR = "\t"; private String separator = "\u001e"; private final String MAOHAO_SEPARATOR = ":"; private final String SHU_SEPARATOR = MAOHAO_SEPARATOR; private Map> infoMap = new HashMap>(); private List contentList = null; private Map oneItemMap = new HashMap(); private List oneItemList = new ArrayList(); private List TwoItemList = new ArrayList(); private Map cacheItemCount = new HashMap(); private int support = 2; private double confident = 0.2; public Apriori(List contentList) { this.contentList = contentList; this.init(); } public Apriori(List contentList, int support) { this.contentList = contentList; this.support = support; this.init(); } private void init() { for (String content : contentList) { List list = this.getSingleTerm(content); infoMap.put(content, list); } } public void run() { //获得一项集 this.getOneItemSet(this.support); //获得二项集 this.getTwoItemSet(this.support); } /** * 找到频繁一项集 * * @param support */ public void getOneItemSet(int support) { Iterator iter = infoMap.keySet().iterator(); while (iter.hasNext()) { List contentStrVector = infoMap.get(iter.next()); for (String tempStr : contentStrVector) { if (oneItemMap.containsKey(tempStr)) { Integer tempInt = oneItemMap.get(tempStr); tempInt++; oneItemMap.put(tempStr, tempInt); } else { oneItemMap.put(tempStr, 1); } } } iter = oneItemMap.keySet().iterator(); while (iter.hasNext()) { String contentTemp = (String) iter.next(); Integer tempInteger = oneItemMap.get(contentTemp); if (tempInteger >= support) { this.oneItemList.add(contentTemp); } } } /** * 找到频繁二项集 * * @param support */ public void getTwoItemSet(int support) { List candidatesTwoItemSet = this.getCandidates(); Iterator iter = infoMap.keySet().iterator(); while (iter.hasNext()) { List transaction = (List) infoMap.get(iter.next()); for (ItemBean bean : candidatesTwoItemSet) { boolean tempBoolean = this.isContains(transaction, bean); if (tempBoolean) { bean.addCount(); } } } //检验候选二项集,得到频繁二项集 for (ItemBean bean : candidatesTwoItemSet) { if (bean.getCount() >= support) { this.TwoItemList.add(bean); } } } public void cacheItemCountFromTwoItemSet(){ for(ItemBean bean : this.TwoItemList){ List itemSet = bean.getItemList(); for(String temp : itemSet){ int count = 0; if(this.cacheItemCount.containsKey(temp)){ count = bean.getCount() + this.cacheItemCount.get(temp); }else{ count = bean.getCount(); } this.cacheItemCount.put(temp, count); } } } /** * 找到一个transaction记录中 是否包含当前的itemBean 即是否当前记录包含这个频繁集 * * @param transaction * @param bean * @return */ private boolean isContains(List transaction, ItemBean bean) { List tempItemSet = bean.getItemList(); for (String item : tempItemSet) { if (!transaction.contains(item)) { return false; } } return true; } /** * 这里默认是找候选二项集 * 从通过检验的频繁一项集中,构造出频繁二项集 * @return */ private List getCandidates() { List list = new ArrayList(); for (int i = 0; i < oneItemList.size() - 1; i++) { for (int j = i + 1; j < oneItemList.size(); j++) { ItemBean bean = new ItemBean(); bean.setSize(2); bean.add(oneItemList.get(i)); bean.add(oneItemList.get(j)); list.add(bean); } } return list; } /** * 获得字符串中的独立term */ private List getSingleTerm(String str) { String[] tempStr = str.split(this.separator, -1); ArrayList list = new ArrayList(); for (int i = 0; i < tempStr.length; i++) { tempStr[i] = tempStr[i].trim(); if ("".equals(tempStr[i])) { continue; } if (!list.contains(tempStr[i])) { if(tempStr[i].length()> 1){ list.add(tempStr[i]); } } } return list; } public double getConfident(String term, String item) { int termSum = 0; int termItemCount = 0; for (ItemBean bean : this.TwoItemList) { if (bean.isContainsTerm(term)) { termSum = termSum + bean.getCount(); if (bean.isContainsTerm(item)) { termItemCount = termItemCount + bean.getCount(); } } } if (termSum == 0) { return 0; } return termItemCount * 1.0 / termSum; } public String getMultiConfidentStr(){ int termSumOne = 0; int termSumTwo = 0; int termItemCount = 0; double confidentOne = 0.0; double confidentTwo = 0.0; StringBuilder strBuild = new StringBuilder(); for (ItemBean bean : this.TwoItemList) { List tempStrList = bean.getItemList(); String itemOne = tempStrList.get(0); String itemTwo = tempStrList.get(1); termItemCount = bean.getCount(); //left to right termSumOne = this.cacheItemCount.get(itemOne); confidentOne = termItemCount *1.0/ termSumOne; termSumTwo = this.cacheItemCount.get(itemTwo); confidentTwo = termItemCount *1.0/ termSumTwo; if((confidentOne >= this.confident) && (confidentTwo >= this.confident)){ if(Rule.valiate(itemOne, itemTwo)){ strBuild.append(TAB_SEPARATOR); strBuild.append(itemOne); strBuild.append(SHU_SEPARATOR); strBuild.append(itemTwo); strBuild.append(MAOHAO_SEPARATOR); strBuild.append(confidentOne); strBuild.append(TAB_SEPARATOR); // right to left strBuild.append(itemTwo); strBuild.append(SHU_SEPARATOR); strBuild.append(itemOne); strBuild.append(MAOHAO_SEPARATOR); strBuild.append(confidentTwo); }// }// // termSum = this.cacheItemCount.get(itemTwo);// confident = termItemCount *1.0/ termSum;// if(confident >= this.confident){ } } return strBuild.toString(); } public String getAllConfidentStr(String term) { int termSum = 0; Map map = new HashMap(); StringBuilder strBuilder = new StringBuilder(); strBuilder.append(term ); for (ItemBean bean : this.TwoItemList) { if (bean.isContainsTerm(term)) { termSum = termSum + bean.getCount(); List itemStr = bean.getItemList(); for(String tempItem : itemStr){ if(!term.equals(tempItem)){ if(map.containsKey(tempItem)){ Integer tempInt = map.get(tempItem); map.put(tempItem, tempInt + bean.getCount()); }else{ map.put(tempItem, bean.getCount()); } } } } } if (termSum == 0) { termSum = 1; } Iterator iter = map.keySet().iterator(); List resultList = new ArrayList(); while(iter.hasNext()){ ResultBean bean = new ResultBean(); String tempStr = (String)(iter.next()); bean.setContent(tempStr); Integer tempInt = map.get(tempStr); bean.setConfident(tempInt*1.0/termSum); resultList.add(bean); } //对结果进行排序 Collections.sort(resultList); boolean flag = false; for(ResultBean tempBean : resultList){ strBuilder.append("\t"); strBuilder.append(tempBean.toString()); flag = true; } if(flag){ return strBuilder.toString(); }else{ return null; } } public void setSupport(int support){ this.support = support; } public String getSeparator() { return separator; } public void setSeparator(String separator) { this.separator = separator; } public double getConfident() { return confident; } public void setConfident(double confident) { this.confident = confident; } public static void main(String[] args) { // TreeSet list = new TreeList(); // List tempList = new ArrayList(); tempList.add("诺基亚 E66 全新 正品 行货 正规 发票 全国 联保"); tempList.add(" NOKIA 诺基亚 E66 标准 配置 现正 热卖中"); tempList.add("淘宝商城 诺基亚 E66 手机 大陆 行货 全国 联保 "); Test test = new Test(tempList); test.setSeparator("\\s+"); test.run(); String term = "诺基亚"; String content = "E66"; System.out.println(test.getConfident(term, content)); content = "全国"; System.out.println(test.getConfident(term, content)); content = "行货"; System.out.println(test.getConfident(term, content)); content = "联保"; System.out.println(test.getConfident(term, content)); System.out.println(test.getAllConfidentStr(term)); System.out.println("元".length()); }}
用户点评