欢迎访问悦橙教程(wld5.com),关注java教程。悦橙教程  java问答|  每日更新
页面导航 : > > 文章正文

Apriori算法的java版,Apriori算法java版,Apriori算法的ja

来源: javaer 分享于  点击 45847 次 点评:247

Apriori算法的java版,Apriori算法java版,Apriori算法的ja


Apriori算法的java版,挖掘频繁模式!

[Java]代码

import java.util.ArrayList;import java.util.Collections;import java.util.HashMap;import java.util.Iterator;import java.util.List;import java.util.Map;public class Apriori {    private final String TAB_SEPARATOR = "\t";    private String separator = "\u001e";        private final String MAOHAO_SEPARATOR = ":";        private final String SHU_SEPARATOR = MAOHAO_SEPARATOR;          private Map> infoMap = new HashMap>();    private List contentList = null;    private Map oneItemMap = new HashMap();    private List oneItemList = new ArrayList();    private List TwoItemList = new ArrayList();    private Map cacheItemCount = new HashMap();    private int support = 2;        private double confident = 0.2;    public Apriori(List contentList) {        this.contentList = contentList;        this.init();    }    public Apriori(List contentList, int support) {        this.contentList = contentList;        this.support = support;        this.init();    }    private void init() {        for (String content : contentList) {            List list = this.getSingleTerm(content);            infoMap.put(content, list);        }    }    public void run() {        //获得一项集        this.getOneItemSet(this.support);        //获得二项集        this.getTwoItemSet(this.support);    }    /**     * 找到频繁一项集     *      * @param support     */    public void getOneItemSet(int support) {        Iterator iter = infoMap.keySet().iterator();        while (iter.hasNext()) {            List contentStrVector = infoMap.get(iter.next());            for (String tempStr : contentStrVector) {                if (oneItemMap.containsKey(tempStr)) {                    Integer tempInt = oneItemMap.get(tempStr);                    tempInt++;                    oneItemMap.put(tempStr, tempInt);                } else {                    oneItemMap.put(tempStr, 1);                }            }        }        iter = oneItemMap.keySet().iterator();        while (iter.hasNext()) {            String contentTemp = (String) iter.next();            Integer tempInteger = oneItemMap.get(contentTemp);            if (tempInteger >= support) {                this.oneItemList.add(contentTemp);            }        }    }    /**     * 找到频繁二项集     *      * @param support     */    public void getTwoItemSet(int support) {        List candidatesTwoItemSet = this.getCandidates();        Iterator iter = infoMap.keySet().iterator();        while (iter.hasNext()) {            List transaction = (List) infoMap.get(iter.next());            for (ItemBean bean : candidatesTwoItemSet) {                boolean tempBoolean = this.isContains(transaction, bean);                if (tempBoolean) {                    bean.addCount();                }            }        }        //检验候选二项集,得到频繁二项集        for (ItemBean bean : candidatesTwoItemSet) {            if (bean.getCount() >= support) {                this.TwoItemList.add(bean);            }        }    }    public void cacheItemCountFromTwoItemSet(){        for(ItemBean bean : this.TwoItemList){            List itemSet = bean.getItemList();            for(String temp : itemSet){                int count = 0;                if(this.cacheItemCount.containsKey(temp)){                    count = bean.getCount() + this.cacheItemCount.get(temp);                }else{                    count = bean.getCount();                }                this.cacheItemCount.put(temp, count);            }        }    }    /**     * 找到一个transaction记录中 是否包含当前的itemBean 即是否当前记录包含这个频繁集     *      * @param transaction     * @param bean     * @return     */    private boolean isContains(List transaction, ItemBean bean) {        List tempItemSet = bean.getItemList();        for (String item : tempItemSet) {            if (!transaction.contains(item)) {                return false;            }        }        return true;    }    /**     * 这里默认是找候选二项集     * 从通过检验的频繁一项集中,构造出频繁二项集      * @return     */    private List getCandidates() {        List list = new ArrayList();        for (int i = 0; i < oneItemList.size() - 1; i++) {            for (int j = i + 1; j < oneItemList.size(); j++) {                ItemBean bean = new ItemBean();                bean.setSize(2);                bean.add(oneItemList.get(i));                bean.add(oneItemList.get(j));                list.add(bean);            }        }        return list;    }    /**     * 获得字符串中的独立term     */    private List getSingleTerm(String str) {        String[] tempStr = str.split(this.separator, -1);        ArrayList list = new ArrayList();        for (int i = 0; i < tempStr.length; i++) {            tempStr[i] = tempStr[i].trim();            if ("".equals(tempStr[i])) {                continue;            }            if (!list.contains(tempStr[i])) {                if(tempStr[i].length()> 1){                    list.add(tempStr[i]);                }            }        }        return list;    }    public double getConfident(String term, String item) {        int termSum = 0;        int termItemCount = 0;        for (ItemBean bean : this.TwoItemList) {            if (bean.isContainsTerm(term)) {                termSum = termSum + bean.getCount();                if (bean.isContainsTerm(item)) {                    termItemCount = termItemCount + bean.getCount();                }            }        }        if (termSum == 0) {            return 0;        }        return termItemCount * 1.0 / termSum;    }    public String getMultiConfidentStr(){        int termSumOne = 0;        int termSumTwo = 0;        int termItemCount = 0;        double confidentOne = 0.0;        double confidentTwo = 0.0;        StringBuilder strBuild = new StringBuilder();         for (ItemBean bean : this.TwoItemList) {            List tempStrList = bean.getItemList();            String itemOne = tempStrList.get(0);            String itemTwo = tempStrList.get(1);            termItemCount = bean.getCount();            //left to right            termSumOne = this.cacheItemCount.get(itemOne);            confidentOne = termItemCount *1.0/ termSumOne;            termSumTwo = this.cacheItemCount.get(itemTwo);            confidentTwo = termItemCount *1.0/ termSumTwo;            if((confidentOne >= this.confident) && (confidentTwo >= this.confident)){                if(Rule.valiate(itemOne, itemTwo)){                    strBuild.append(TAB_SEPARATOR);                    strBuild.append(itemOne);                    strBuild.append(SHU_SEPARATOR);                    strBuild.append(itemTwo);                    strBuild.append(MAOHAO_SEPARATOR);                    strBuild.append(confidentOne);                    strBuild.append(TAB_SEPARATOR);                    //  right to left                    strBuild.append(itemTwo);                    strBuild.append(SHU_SEPARATOR);                    strBuild.append(itemOne);                    strBuild.append(MAOHAO_SEPARATOR);                    strBuild.append(confidentTwo);                }//          }//          //          termSum = this.cacheItemCount.get(itemTwo);//          confident = termItemCount *1.0/ termSum;//          if(confident >= this.confident){            }        }        return strBuild.toString();    }    public String getAllConfidentStr(String term) {        int termSum = 0;        Map map = new HashMap();        StringBuilder strBuilder = new StringBuilder();        strBuilder.append(term );        for (ItemBean bean : this.TwoItemList) {            if (bean.isContainsTerm(term)) {                termSum = termSum + bean.getCount();                List itemStr = bean.getItemList();                for(String tempItem : itemStr){                    if(!term.equals(tempItem)){                        if(map.containsKey(tempItem)){                            Integer tempInt = map.get(tempItem);                            map.put(tempItem, tempInt + bean.getCount());                        }else{                            map.put(tempItem, bean.getCount());                        }                    }                }            }        }        if (termSum == 0) {            termSum = 1;        }        Iterator iter = map.keySet().iterator();        List resultList = new ArrayList();        while(iter.hasNext()){            ResultBean bean = new ResultBean();            String tempStr = (String)(iter.next());            bean.setContent(tempStr);            Integer tempInt = map.get(tempStr);            bean.setConfident(tempInt*1.0/termSum);            resultList.add(bean);        }        //对结果进行排序        Collections.sort(resultList);        boolean flag = false;        for(ResultBean tempBean : resultList){            strBuilder.append("\t");            strBuilder.append(tempBean.toString());            flag = true;        }        if(flag){            return strBuilder.toString();        }else{            return null;        }    }    public void setSupport(int support){        this.support = support;    }    public String getSeparator() {        return separator;    }    public void setSeparator(String separator) {        this.separator = separator;    }    public double getConfident() {        return confident;    }    public void setConfident(double confident) {        this.confident = confident;    }    public static void main(String[] args) {        // TreeSet list = new TreeList();        // List tempList = new ArrayList();        tempList.add("诺基亚 E66 全新 正品 行货 正规 发票 全国  联保");        tempList.add(" NOKIA 诺基亚  E66  标准 配置 现正 热卖中");        tempList.add("淘宝商城  诺基亚 E66 手机  大陆 行货  全国 联保 ");        Test test = new Test(tempList);        test.setSeparator("\\s+");        test.run();        String term = "诺基亚";        String content = "E66";        System.out.println(test.getConfident(term, content));        content = "全国";        System.out.println(test.getConfident(term, content));        content = "行货";        System.out.println(test.getConfident(term, content));        content = "联保";        System.out.println(test.getConfident(term, content));        System.out.println(test.getAllConfidentStr(term));        System.out.println("元".length());    }}
相关栏目:

用户点评