Skip to content

Commit

Permalink
add FuzzyCDF
Browse files Browse the repository at this point in the history
  • Loading branch information
Ljyustc committed Apr 4, 2021
1 parent 3efb320 commit 951ab1a
Show file tree
Hide file tree
Showing 13 changed files with 645 additions and 0 deletions.
101 changes: 101 additions & 0 deletions EduCDM/FuzzyCDF/FuzzyCDF.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# coding: utf-8
# 2021/3/28 @ liujiayu

import logging
import numpy as np
import pickle
from scipy import stats
from tqdm import tqdm
from collections import namedtuple
from EduCDM import CDM
from .modules import get_LogLikelihood, cal_alpha_mastery, update_A_B, update_theta, update_slip_guess, update_variance

hyper_para = namedtuple("hyperparameters",
["sig_a", "mu_a", "sig_b", "mu_b", "max_s", "min_s", "max_g", "min_g", "mu_theta", "sig_theta"])
default_hyper = hyper_para(1, 0, 1, 0, 0.6, 0, 0.6, 0, 0, 1)


def init_parameters(stu_num, prob_num, know_num, args): # initialize FuzzyCDF parameters
a = stats.lognorm.rvs(s=args.sig_a, loc=0, scale=np.exp(args.mu_a), size=(stu_num, know_num))
b = stats.norm.rvs(loc=args.mu_b, scale=args.sig_b, size=(stu_num, know_num))
slip = stats.beta.rvs(a=1, b=2, size=prob_num) * (args.max_s - args.min_s) + args.min_s
guess = stats.beta.rvs(a=1, b=2, size=prob_num) * (args.max_g - args.min_g) + args.min_g
theta = stats.norm.rvs(loc=args.mu_theta, scale=args.sig_theta, size=stu_num)
variance = 1 / stats.gamma.rvs(a=4, scale=1 / 6, size=1)
return a, b, slip, guess, theta, variance


class FuzzyCDF(CDM):
"""
FuzzyCDF model, training (MCMC) and testing methods
:param R (array): response matrix, shape = (stu_num, prob_num)
:param q_m (array): Q matrix, shape = (prob_num, know_num)
:param stu_num (int): number of students
:param prob_num (int): number of problems
:param know_num (int): number of knowledge
:param obj_prob_index (array): index of all objective problems, shape = (number, )
:param sub_prob_index (array): index of all subjective problems, shape = (number, )
:param skip_value (int): skip value in response matrix
:param args: all hyper-parameters
"""

def __init__(self, R, q_m, stu_num, prob_num, know_num, obj_prob_index, sub_prob_index, skip_value=-1,
args=default_hyper):
self.args = args
self.R, self.q_m, self.stu_num, self.prob_num, self.know_num = R, q_m, stu_num, prob_num, know_num
self.a, self.b, self.slip, self.guess, self.theta, self.variance = init_parameters(stu_num, prob_num, know_num,
self.args)
self.obj_prob_index, self.sub_prob_index, self.skip_value = obj_prob_index, sub_prob_index, skip_value

def train(self, epoch, burnin) -> ...:
A, B, slip, guess = np.copy(self.a), np.copy(self.b), np.copy(self.slip), np.copy(self.guess)
theta, variance = np.copy(self.theta), np.copy(self.variance)
estimate_A, estimate_B, estimate_slip, estimate_guess, estimate_theta, estimate_variance = 0, 0, 0, 0, 0, 0
for iteration in range(epoch):
update_A_B(A, B, theta, slip, guess, variance, self.R, self.q_m, self.obj_prob_index, self.sub_prob_index,
self.skip_value, self.args)
update_theta(A, B, theta, slip, guess, variance, self.R, self.q_m, self.obj_prob_index, self.sub_prob_index,
self.skip_value, self.args)
update_slip_guess(A, B, theta, slip, guess, variance, self.R, self.q_m, self.obj_prob_index,
self.sub_prob_index,
self.skip_value, self.args)
variance = update_variance(A, B, theta, slip, guess, variance, self.R, self.q_m, self.obj_prob_index,
self.sub_prob_index,
self.skip_value)
if iteration >= burnin:
estimate_A += A
estimate_B += B
estimate_slip += slip
estimate_guess += guess
estimate_theta += theta
estimate_variance += variance
self.a, self.b, self.slip, self.guess, self.theta, self.variance = estimate_A / (epoch - burnin), estimate_B / (
epoch - burnin), estimate_slip / (epoch - burnin), estimate_guess / (epoch - burnin), estimate_theta \
/ (epoch - burnin), estimate_variance / (epoch - burnin)

def eval(self, test_data) -> tuple:
_, pred_mastery = cal_alpha_mastery(self.a, self.b, self.theta, self.q_m, self.obj_prob_index,
self.sub_prob_index)
pred_score = (1 - self.slip) * pred_mastery + self.guess * (1 - pred_mastery)
test_rmse, test_mae = [], []
for i in tqdm(test_data, "evaluating"):
stu, test_id, true_score = i['user_id'], i['item_id'], i['score']
test_rmse.append((pred_score[stu, test_id] - true_score) ** 2)
test_mae.append(abs(pred_score[stu, test_id] - true_score))
return np.sqrt(np.average(test_rmse)), np.average(test_mae)

def save(self, filepath):
with open(filepath, 'wb') as file:
pickle.dump({"a": self.a, "b": self.b, "theta": self.theta, "slip": self.slip, "guess": self.guess}, file)
logging.info("save parameters to %s" % filepath)

def load(self, filepath):
with open(filepath, 'rb') as file:
self.a, self.b, self.theta, self.slip, self.guess = pickle.load(file).values()
logging.info("load parameters from %s" % filepath)

def inc_train(self, inc_train_data, epoch, burnin): # incremental training
for i in inc_train_data:
stu, test_id, true_score = i['user_id'], i['item_id'], i['score']
self.R[stu, test_id] = true_score
self.train(epoch, burnin)
5 changes: 5 additions & 0 deletions EduCDM/FuzzyCDF/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# coding: utf-8
# 2021/3/28 @ liujiayu


from .FuzzyCDF import FuzzyCDF
107 changes: 107 additions & 0 deletions EduCDM/FuzzyCDF/modules.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# coding: utf-8
# 2021/3/28 @ liujiayu
# Modules in FuzzyCDF

import numpy as np
from scipy import stats


def cal_alpha_mastery(A, B, theta, q_m, obj_prob_index, sub_prob_index): # calculate proficiency on knows and probs
stu_num, prob_num = len(theta), q_m.shape[0]
alpha = 1 / (1 + np.exp(-1.7 * A * (theta.reshape([-1, 1]) - B)))
mastery = np.zeros((stu_num, prob_num))
for i in range(stu_num):
stu_i = alpha[i] * q_m # shape = (prob_num, know_num)
if len(obj_prob_index) > 0:
mastery[i][obj_prob_index] = np.min((stu_i + 2 * (1 - q_m))[obj_prob_index], axis=1)
if len(sub_prob_index) > 0:
mastery[i][sub_prob_index] = np.max(stu_i[sub_prob_index], axis=1)
return alpha, mastery


def get_LogLikelihood(A, B, theta, R, q_m, slip, guess, variance, obj_prob_index, sub_prob_index, skip_value=-1):
# calculate log-likelihood for each response log
_, mastery = cal_alpha_mastery(A, B, theta, q_m, obj_prob_index, sub_prob_index)
stu_num, prob_num = R.shape[0], R.shape[1]
x = (1 - slip) * mastery + guess * (1 - mastery)
result = np.zeros((stu_num, prob_num))
if len(obj_prob_index) > 0:
result[:, obj_prob_index] = (np.log(x + 1e-9) * R + np.log(1 - x + 1e-9) * (1 - R))[:, obj_prob_index]
if len(sub_prob_index) > 0:
result[:, sub_prob_index] = np.log(stats.norm.pdf(R, loc=x, scale=variance))[:, sub_prob_index]

result[np.where(R == skip_value)[0], np.where(R == skip_value)[1]] = 0 # skip logs
return result # shape = (stu_num, prob_num)


# ---below are updating processes in MCMC for FuzzyCDF---
def update_A_B(A, B, theta, slip, guess, variance, R, q_m, obj_prob_index, sub_prob_index, skip_value, args):
know_num = A.shape[1]
new_A = A + 0.3 * stats.norm.rvs(size=A.shape)
new_B = B + 0.3 * stats.norm.rvs(size=B.shape)
for know in range(know_num):
tempA = np.copy(A)
tempB = np.copy(B)
tempA[:, know] = np.copy(new_A[:, know])
tempB[:, know] = np.copy(new_B[:, know])

l_0 = get_LogLikelihood(A, B, theta, R, q_m, slip, guess, variance, obj_prob_index, sub_prob_index, skip_value)
l_1 = get_LogLikelihood(tempA, tempB, theta, R, q_m, slip, guess, variance, obj_prob_index, sub_prob_index,
skip_value)

log_p0 = np.sum(l_0, axis=1) + np.log(stats.norm.pdf(x=B[:, know], loc=args.mu_b, scale=args.sig_b) + 1e-9) + \
np.log(stats.lognorm.pdf(x=A[:, know], loc=0, scale=np.exp(args.mu_a), s=args.sig_a) + 1e-9)
log_p1 = np.sum(l_1, axis=1) + np.log(stats.norm.pdf(x=tempB[:, know], loc=args.mu_b, scale=args.sig_b) + 1e-9)\
+ np.log(stats.lognorm.pdf(x=tempA[:, know], loc=0, scale=np.exp(args.mu_a), s=args.sig_a) + 1e-9)
accept_prob = np.exp(np.minimum(log_p1 - log_p0, 0)) # avoid overflow in exp
mask = accept_prob >= np.random.random(1)
A[mask, know] = new_A[mask, know]
B[mask, know] = new_B[mask, know]


def update_theta(A, B, theta, slip, guess, variance, R, q_m, obj_prob_index, sub_prob_index, skip_value, args):
new_theta = theta + 0.1 * stats.norm.rvs(size=theta.shape)

l_0 = get_LogLikelihood(A, B, theta, R, q_m, slip, guess, variance, obj_prob_index, sub_prob_index, skip_value)
l_1 = get_LogLikelihood(A, B, new_theta, R, q_m, slip, guess, variance, obj_prob_index, sub_prob_index, skip_value)

log_p0 = np.sum(l_0, axis=1) + np.log(stats.norm.pdf(x=theta, loc=args.mu_theta, scale=args.sig_theta) + 1e-9)
log_p1 = np.sum(l_1, axis=1) + np.log(stats.norm.pdf(x=new_theta, loc=args.mu_theta, scale=args.sig_theta) + 1e-9)
accept_prob = np.exp(np.minimum(log_p1 - log_p0, 0)) # avoid overflow in exp
mask = accept_prob >= np.random.random(1)
theta[mask] = new_theta[mask]


def update_slip_guess(A, B, theta, slip, guess, variance, R, q_m, obj_prob_index, sub_prob_index, skip_value, args):
new_slip = np.abs(slip + 0.2 * stats.norm.rvs(size=slip.shape) - 0.1)
new_guess = np.abs(guess + 0.2 * stats.norm.rvs(size=guess.shape) - 0.1)

l_0 = get_LogLikelihood(A, B, theta, R, q_m, slip, guess, variance, obj_prob_index, sub_prob_index, skip_value)
l_1 = get_LogLikelihood(A, B, theta, R, q_m, new_slip, new_guess, variance, obj_prob_index, sub_prob_index,
skip_value)

log_p0 = np.sum(l_0, axis=0) + np.log(stats.beta.pdf(x=slip / (args.max_s - args.min_s), a=1, b=2) + 1e-9) + np.log(
stats.beta.pdf(x=guess / (args.max_g - args.min_g), a=1, b=2) + 1e-9)
log_p1 = np.sum(l_1, axis=0) + np.log(stats.beta.pdf(x=new_slip / (args.max_s - args.min_s), a=1, b=2) + 1e-9) + \
np.log(stats.beta.pdf(x=new_guess / (args.max_g - args.min_g), a=1, b=2) + 1e-9)
accept_prob = np.exp(np.minimum(log_p1 - log_p0, 0)) # avoid overflow in exp
mask = accept_prob >= np.random.random(1)
slip[mask] = new_slip[mask]
guess[mask] = new_guess[mask]


def update_variance(A, B, theta, slip, guess, variance, R, q_m, obj_prob_index, sub_prob_index, skip_value):
new_var = np.maximum(variance - 0.01 + 0.02 * stats.norm.rvs(size=variance.shape), 0)

l_0 = get_LogLikelihood(A, B, theta, R, q_m, slip, guess, variance, obj_prob_index, sub_prob_index, skip_value)
l_1 = get_LogLikelihood(A, B, theta, R, q_m, slip, guess, new_var, obj_prob_index, sub_prob_index, skip_value)

l_0[:, obj_prob_index] = 0
l_1[:, obj_prob_index] = 0

log_p0 = np.sum(l_0) + np.log(stats.gamma.pdf(x=1 / (variance + 1e-9), a=4, scale=1 / 6) + 1e-9)
log_p1 = np.sum(l_1) + np.log(stats.gamma.pdf(x=1 / (new_var + 1e-9), a=4, scale=1 / 6) + 1e-9)
accept_prob = np.exp(np.minimum(log_p1 - log_p0, 0)) # avoid overflow in exp
if accept_prob >= np.random.random(1):
variance = new_var
return variance
1 change: 1 addition & 0 deletions EduCDM/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
from .meta import CDM
from .MCD import MCD
from .DINA import DINA
from .FuzzyCDF import FuzzyCDF
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ The Model Zoo of Cognitive Diagnosis Models

* [MCD](EduCDM/MCD) [[doc]](docs/MCD.md) [[example]](examples/MCD)
* [DINA](EduCDM/DINA) [[doc]](docs/DINA.md) [[example]](examples/DINA)
* [FuzzyCDF](EduCDM/FuzzyCDF) [[doc]](docs/FuzzyCDF.md) [[example]](examples/FuzzyCDF)

## Contribute

Expand Down
6 changes: 6 additions & 0 deletions docs/FuzzyCDF.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Fuzzy cognitive diagnosis framework

If the reader wants to know the details of FuzzyCDF, please refer to the Chapter 4 of the paper:
"Fuzzy Cognitive Diagnosis for Modelling Examinee Performance"(2018)

![model](_static/FuzzyCDF.png)
Binary file added docs/_static/FuzzyCDF.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit 951ab1a

Please sign in to comment.