From a282d62fca78d5793fdb3ba262d118c304b99b34 Mon Sep 17 00:00:00 2001
From: Feng <33390790+fengxie009@users.noreply.github.com>
Date: Wed, 7 Dec 2022 10:24:34 +0800
Subject: [PATCH 01/14] Delete GIN.py

---
 causallearn/search/HiddenCausal/GIN/GIN.py | 357 ---------------------
 1 file changed, 357 deletions(-)
 delete mode 100644 causallearn/search/HiddenCausal/GIN/GIN.py

diff --git a/causallearn/search/HiddenCausal/GIN/GIN.py b/causallearn/search/HiddenCausal/GIN/GIN.py
deleted file mode 100644
index 17f31b20..00000000
--- a/causallearn/search/HiddenCausal/GIN/GIN.py
+++ /dev/null
@@ -1,357 +0,0 @@
-from collections import deque
-from itertools import combinations
-
-import numpy as np
-from scipy.stats import chi2
-
-from causallearn.graph.GeneralGraph import GeneralGraph
-from causallearn.graph.GraphNode import GraphNode
-from causallearn.graph.NodeType import NodeType
-from causallearn.graph.Edge import Edge
-from causallearn.graph.Endpoint import Endpoint
-from causallearn.search.FCMBased.lingam.hsic import hsic_test_gamma
-from causallearn.utils.KCI.KCI import KCI_UInd
-
-
-def fisher_test(pvals):
-    pvals = [pval if pval >= 1e-5 else 1e-5 for pval in pvals]
-    fisher_stat = -2.0 * np.sum(np.log(pvals))
-    return 1 - chi2.cdf(fisher_stat, 2 * len(pvals))
-
-
-def GIN(data, indep_test_method='kci', alpha=0.05):
-    '''
-    Learning causal structure of Latent Variables for Linear Non-Gaussian Latent Variable Model
-    with Generalized Independent Noise Condition
-    Parameters
-    ----------
-    data : numpy ndarray
-           data set
-    indep_test_method : str, default='kci'
-        the name of the independence test being used
-    alpha : float, default=0.05
-        desired significance level of independence tests (p_value) in (0,1)
-    Returns
-    -------
-    G : general graph
-        causal graph
-    causal_order : list
-        causal order
-    '''
-    n = data.shape[1]
-    cov = np.cov(data.T)
-
-    if indep_test_method == 'kci':
-        kci = KCI_UInd()
-
-    if indep_test_method not in ['kci', 'hsic']:
-        raise NotImplementedError((f"Independent test method {indep_test_method} is not implemented."))
-
-    def indep_test(x, y, method):
-        if method == 'kci':
-            return kci.compute_pvalue(x, y)[0]
-        elif method == 'hsic':
-            return hsic_test_gamma(x, y)[1]
-        else:
-            raise NotImplementedError((f"Independent test method {indep_test_method} is not implemented."))
-
-    var_set = set(range(n))
-    cluster_size = 2
-    clusters_list = []
-    while cluster_size < len(var_set):
-        tmp_clusters_list = []
-        for cluster in combinations(var_set, cluster_size):
-            remain_var_set = var_set - set(cluster)
-            e = cal_e_with_gin(data, cov, list(cluster), list(remain_var_set))
-            pvals = []
-            for z in range(len(remain_var_set)):
-                pvals.append(indep_test(data[:, [z]], e[:, None], method=indep_test_method))
-            fisher_pval = fisher_test(pvals)
-            if fisher_pval >= alpha:
-                tmp_clusters_list.append(cluster)
-        tmp_clusters_list = merge_overlaping_cluster(tmp_clusters_list)
-        clusters_list = clusters_list + tmp_clusters_list
-        for cluster in tmp_clusters_list:
-            var_set -= set(cluster)
-        cluster_size += 1
-
-    causal_order = [] # this variable corresponds to K in paper
-    updated = True
-    while updated:
-        updated = False
-        X = []
-        Z = []
-        for cluster_k in causal_order:
-            cluster_k1, cluster_k2 = array_split(cluster_k, 2)
-            X += cluster_k1
-            Z += cluster_k2
-
-        for i, cluster_i in enumerate(clusters_list):
-            is_root = True
-            cluster_i1, cluster_i2 = array_split(cluster_i, 2)
-            for j, cluster_j in enumerate(clusters_list):
-                if i == j:
-                    continue
-                cluster_j1, cluster_j2 = array_split(cluster_j, 2)
-                e = cal_e_with_gin(data, cov, X + cluster_i1 + cluster_j1, Z + cluster_i2)
-                pvals = []
-                for z in range(len(Z + cluster_i2)):
-                    pvals.append(indep_test(data[:, [z]], e[:, None], method=indep_test_method))
-                fisher_pval = fisher_test(pvals)
-                if fisher_pval < alpha:
-                    is_root = False
-                    break
-            if is_root:
-                causal_order.append(cluster_i)
-                clusters_list.remove(cluster_i)
-                updated = True
-                break
-
-    G = GeneralGraph([])
-    for var in var_set:
-        o_node = GraphNode(f"X{var + 1}")
-        G.add_node(o_node)
-
-    latent_id = 1
-    l_nodes = []
-
-    for cluster in causal_order:
-        l_node = GraphNode(f"L{latent_id}")
-        l_node.set_node_type(NodeType.LATENT)
-        G.add_node(l_node)
-        for l in l_nodes:
-            G.add_directed_edge(l, l_node)
-        l_nodes.append(l_node)
-
-        for o in cluster:
-            o_node = GraphNode(f"X{o + 1}")
-            G.add_node(o_node)
-            G.add_directed_edge(l_node, o_node)
-        latent_id += 1
-
-    undirected_l_nodes = []
-
-    for cluster in clusters_list:
-        l_node = GraphNode(f"L{latent_id}")
-        l_node.set_node_type(NodeType.LATENT)
-        G.add_node(l_node)
-        for l in l_nodes:
-            G.add_directed_edge(l, l_node)
-
-        for l in undirected_l_nodes:
-            G.add_edge(Edge(l, l_node, Endpoint.TAIL, Endpoint.TAIL))
-
-        undirected_l_nodes.append(l_node)
-
-        for o in cluster:
-            o_node = GraphNode(f"X{o + 1}")
-            G.add_node(o_node)
-            G.add_directed_edge(l_node, o_node)
-        latent_id += 1
-
-    return G, causal_order
-
-
-def GIN_MI(data):
-    '''
-    Learning causal structure of Latent Variables for Linear Non-Gaussian Latent Variable Model
-    with Generalized Independent Noise Condition
-
-    Parameters
-    ----------
-    data : numpy ndarray
-           data set
-
-    Returns
-    -------
-    G : general graph
-        causal graph
-    causal_order : list
-        causal order
-    '''
-    v_labels = list(range(data.shape[1]))
-    v_set = set(v_labels)
-    cov = np.cov(data.T)
-
-    # Step 1: Finding Causal Clusters
-    cluster_list = []
-    min_cluster = {i: set() for i in v_set}
-    min_dep_score = {i: 1e9 for i in v_set}
-    for (x1, x2) in combinations(v_set, 2):
-        x_set = {x1, x2}
-        z_set = v_set - x_set
-        dep_statistic = cal_dep_for_gin(data, cov, list(x_set), list(z_set))
-        for i in x_set:
-            if min_dep_score[i] > dep_statistic:
-                min_dep_score[i] = dep_statistic
-                min_cluster[i] = x_set
-    for i in v_labels:
-        cluster_list.append(list(min_cluster[i]))
-
-    cluster_list = merge_overlaping_cluster(cluster_list)
-
-    # Step 2: Learning the Causal Order of Latent Variables
-    causal_order = [] # this variable corresponds to K in paper
-    while (len(cluster_list) != 0):
-        root = find_root(data, cov, cluster_list, causal_order)
-        causal_order.append(root)
-        cluster_list.remove(root)
-
-    latent_id = 1
-    l_nodes = []
-    G = GeneralGraph([])
-    for cluster in causal_order:
-        l_node = GraphNode(f"L{latent_id}")
-        l_node.set_node_type(NodeType.LATENT)
-        l_nodes.append(l_node)
-        G.add_node(l_node)
-        for l in l_nodes:
-            if l != l_node:
-                G.add_directed_edge(l, l_node)
-        for o in cluster:
-            o_node = GraphNode(f"X{o + 1}")
-            G.add_node(o_node)
-            G.add_directed_edge(l_node, o_node)
-        latent_id += 1
-
-    return G, causal_order
-
-
-def cal_e_with_gin(data, cov, X, Z):
-    cov_m = cov[np.ix_(Z, X)]
-    _, _, v = np.linalg.svd(cov_m)
-    omega = v.T[:, -1]
-    return np.dot(data[:, X], omega)
-
-
-def cal_dep_for_gin(data, cov, X, Z):
-    '''
-    Calculate the statistics of dependence via Generalized Independent Noise Condition
-
-    Parameters
-    ----------
-    data : data set (numpy ndarray)
-    cov : covariance matrix
-    X : test set variables
-    Z : condition set variables
-
-    Returns
-    -------
-    sta : test statistic
-    '''
-
-    e_xz = cal_e_with_gin(data, cov, X, Z)
-
-    sta = 0
-    for i in Z:
-        sta += hsic_test_gamma(e_xz, data[:, i])[0]
-    sta /= len(Z)
-    return sta
-
-
-def find_root(data, cov, clusters, causal_order):
-    '''
-    Find the causal order by statistics of dependence
-    Parameters
-    ----------
-    data : data set (numpy ndarray)
-    cov : covariance matrix
-    clusters : clusters of observed variables
-    causal_order : causal order
-    Returns
-    -------
-    root : latent root cause
-    '''
-    if len(clusters) == 1:
-        return clusters[0]
-    root = clusters[0]
-    dep_statistic_score = 1e30
-    for i in clusters:
-        for j in clusters:
-            if i == j:
-                continue
-            X = [i[0], j[0]]
-            Z = []
-            for k in range(1, len(i)):
-                Z.append(i[k])
-
-            if causal_order:
-                for k in causal_order:
-                    X.append(k[0])
-                    Z.append(k[1])
-
-            dep_statistic = cal_dep_for_gin(data, cov, X, Z)
-            if dep_statistic < dep_statistic_score:
-                dep_statistic_score = dep_statistic
-                root = i
-
-    return root
-
-
-def _get_all_elements(S):
-    result = set()
-    for i in S:
-        for j in i:
-            result |= {j}
-    return result
-
-
-# merging cluster
-def merge_overlaping_cluster(cluster_list):
-    v_labels = _get_all_elements(cluster_list)
-    if len(v_labels) == 0:
-        return []
-    cluster_dict = {i: -1 for i in v_labels}
-    cluster_b = {i: [] for i in v_labels}
-    cluster_len = 0
-    for i in range(len(cluster_list)):
-        for j in cluster_list[i]:
-            cluster_b[j].append(i)
-
-    visited = [False] * len(cluster_list)
-    cont = True
-    while cont:
-        cont = False
-        q = deque()
-        for i, val in enumerate(visited):
-            if not val:
-                q.append(i)
-                visited[i] = True
-                break
-        while q:
-            top = q.popleft()
-            for i in cluster_list[top]:
-                cluster_dict[i] = cluster_len
-                for j in cluster_b[i]:
-                    if not visited[j]:
-                        q.append(j)
-                        visited[j] = True
-
-        for i in visited:
-            if not i:
-                cont = True
-                break
-        cluster_len += 1
-
-    cluster = [[] for _ in range(cluster_len)]
-    for i in v_labels:
-        cluster[cluster_dict[i]].append(i)
-
-    return cluster
-
-
-def array_split(x, k):
-    x_len = len(x)
-    # div_points = []
-    sub_arys = []
-    start = 0
-    section_len = x_len // k
-    extra = x_len % k
-    for i in range(extra):
-        sub_arys.append(x[start:start + section_len + 1])
-        start = start + section_len + 1
-
-    for i in range(k - extra):
-        sub_arys.append(x[start:start + section_len])
-        start = start + section_len
-    return sub_arys
\ No newline at end of file

From 89b80f1c8952cfcba73300544c0ff57a82e405e9 Mon Sep 17 00:00:00 2001
From: Feng <33390790+fengxie009@users.noreply.github.com>
Date: Wed, 7 Dec 2022 10:25:08 +0800
Subject: [PATCH 02/14] Add files via upload

---
 .../search/HiddenCausal/GIN/FisherTest.py     |  45 +++
 causallearn/search/HiddenCausal/GIN/GIN.py    |  68 +++++
 causallearn/search/HiddenCausal/GIN/GIN2.py   |  94 ++++++
 causallearn/search/HiddenCausal/GIN/HSIC2.py  | 101 +++++++
 .../GIN/Identifying_Causal_Clusters.py        |  70 +++++
 .../HiddenCausal/GIN/Infer_Causal_Order.py    |  70 +++++
 .../search/HiddenCausal/GIN/Utils_V2.py       | 127 ++++++++
 .../search/HiddenCausal/GIN/independence.py   |  38 +++
 causallearn/search/HiddenCausal/GIN/mutual.py | 276 ++++++++++++++++++
 9 files changed, 889 insertions(+)
 create mode 100644 causallearn/search/HiddenCausal/GIN/FisherTest.py
 create mode 100644 causallearn/search/HiddenCausal/GIN/GIN.py
 create mode 100644 causallearn/search/HiddenCausal/GIN/GIN2.py
 create mode 100644 causallearn/search/HiddenCausal/GIN/HSIC2.py
 create mode 100644 causallearn/search/HiddenCausal/GIN/Identifying_Causal_Clusters.py
 create mode 100644 causallearn/search/HiddenCausal/GIN/Infer_Causal_Order.py
 create mode 100644 causallearn/search/HiddenCausal/GIN/Utils_V2.py
 create mode 100644 causallearn/search/HiddenCausal/GIN/independence.py
 create mode 100644 causallearn/search/HiddenCausal/GIN/mutual.py

diff --git a/causallearn/search/HiddenCausal/GIN/FisherTest.py b/causallearn/search/HiddenCausal/GIN/FisherTest.py
new file mode 100644
index 00000000..0e1de1b8
--- /dev/null
+++ b/causallearn/search/HiddenCausal/GIN/FisherTest.py
@@ -0,0 +1,45 @@
+#-------------------------------------------------------------------------------
+# Name:        模块1
+# Purpose:
+#
+# Author:      YY
+#
+# Created:     03/03/2021
+# Copyright:   (c) YY 2021
+# Licence:     <your licence>
+#-------------------------------------------------------------------------------
+import math
+from scipy.stats import chi2
+
+def FisherTest(pvals,alph=0.01):
+    Fisher_Stat=0
+    L = len(pvals)
+    for i in range(0,L):
+        if pvals[i] ==0:
+            TP = 1e-05
+        else:
+            TP = pvals[i]
+
+        Fisher_Stat = Fisher_Stat-2*math.log(TP)
+
+
+    Fisher_pval = 1-chi2.cdf(Fisher_Stat, 2*L)  #自由度2*L
+
+    #print(Fisher_pval)
+
+    if Fisher_pval >alph:
+        return True,Fisher_pval
+    else:
+        return False,Fisher_pval
+
+
+
+
+
+
+def main():
+    pvals = [0.01,0.9]
+    FisherTest(pvals,0.1)
+
+if __name__ == '__main__':
+    main()
diff --git a/causallearn/search/HiddenCausal/GIN/GIN.py b/causallearn/search/HiddenCausal/GIN/GIN.py
new file mode 100644
index 00000000..5e0963e2
--- /dev/null
+++ b/causallearn/search/HiddenCausal/GIN/GIN.py
@@ -0,0 +1,68 @@
+from causallearn.graph.GeneralGraph import GeneralGraph
+from causallearn.graph.GraphNode import GraphNode
+from causallearn.graph.NodeType import NodeType
+from causallearn.search.FCMBased.lingam.hsic import hsic_test_gamma
+from causallearn.utils.KCI.KCI import KCI_UInd
+
+import causallearn.search.HiddenCausal.GIN.Infer_Causal_Order as ICO
+import causallearn.search.HiddenCausal.GIN.Identifying_Causal_Clusters as ICC
+
+
+
+def GIN(data, indep_test_method='kerpy', alpha=0.05):
+
+    if indep_test_method not in ['kci', 'hsic', 'kerpy']:
+        raise NotImplementedError((f"Independent test method {indep_test_method} is not implemented."))
+
+    if indep_test_method == 'kci':
+        kci = KCI_UInd()
+
+    if indep_test_method == 'kerpy':
+        from kerpy.GaussianKernel import GaussianKernel
+        from independence_testing.HSICSpectralTestObject import HSICSpectralTestObject
+        kernelX = GaussianKernel(float(0.1))
+        kernelY = GaussianKernel(float(0.1))
+
+        num_samples = data.shape[0]
+
+        myspectralobject = HSICSpectralTestObject(num_samples, kernelX=kernelX, kernelY=kernelY,
+                                          kernelX_use_median=False, kernelY_use_median=False,
+                                          rff=True, num_rfx=20, num_rfy=20, num_nullsims=500)
+
+
+    if indep_test_method == 'kci':
+        def indep_test(x, y):
+            return kci.compute_pvalue(x, y)[0]
+    elif indep_test_method == 'hsic':
+        def indep_test(x, y):
+            return hsic_test_gamma(x, y)[1]
+    elif indep_test_method == 'kerpy':
+        def indep_test(x, y):
+            return myspectralobject.compute_pvalue(x, y)
+    else:
+        raise NotImplementedError((f"Independent test method {indep_test_method} is not implemented."))
+
+
+    #identifying causal cluster by using fast HSIC, set Signification Level(alhpa) =0.05
+    Cluster=ICC.FindCluser(data,indep_test,alpha)
+    #identifying causal order by using mutual information, (k nearest neighbors (method='1') or sklearn package (method='2')
+    CausalOrder=ICO.LearnCausalOrder(Cluster,data,method='1')
+    
+
+    latent_id = 1
+    l_nodes = []
+    G = GeneralGraph([])
+    for cluster in CausalOrder:
+        l_node = GraphNode(f"L{latent_id}")
+        l_node.set_node_type(NodeType.LATENT)
+        l_nodes.append(l_node)
+        G.add_node(l_node)
+        for l in l_nodes:
+            if l != l_node:
+                G.add_directed_edge(l, l_node)
+        for o in cluster:
+            o_node = GraphNode(f"X{o + 1}")
+            G.add_node(o_node)
+            G.add_directed_edge(l_node, o_node)
+        latent_id += 1
+    return G, CausalOrder
diff --git a/causallearn/search/HiddenCausal/GIN/GIN2.py b/causallearn/search/HiddenCausal/GIN/GIN2.py
new file mode 100644
index 00000000..97aac2ac
--- /dev/null
+++ b/causallearn/search/HiddenCausal/GIN/GIN2.py
@@ -0,0 +1,94 @@
+import numpy as np
+import pandas as pd
+import causallearn.search.HiddenCausal.GIN.FisherTest as FisherTest
+import causallearn.search.HiddenCausal.GIN.independence as ID
+
+#GIN by 
+#X=['X1','X2']
+#Z=['X3']
+#data.type=Pandas.DataFrame
+def GIN(X, Z, data, test_function, alpha=0.05):
+    omega = getomega(data,X,Z)
+    tdata= data[:, X]
+    #print(tdata.T)
+    result = np.dot(tdata, omega)[:,None]
+    for i in Z:
+        temp = np.array(data[:, [i]])
+        pval =test_function(result, temp)
+        if pval > alpha:
+            flag = True
+        else:
+            flag = False
+
+        if not flag:#not false == ture  ---> if false
+            #print(X,Z,flag)
+            return False
+
+    return True
+
+
+
+#GIN by Fisher's method
+def FisherGIN(X,Z,data,test_function,alpha=0.01):
+    omega = getomega(data,X,Z)
+    tdata= data[:,X]
+    result = np.dot(tdata, omega)[:,None]
+    pvals=[]
+
+    for i in Z:
+        temp = np.array(data[:, [i]])
+        pval=test_function(result, temp)
+        pvals.append(pval)
+    flag,fisher_pval=FisherTest.FisherTest(pvals,alpha)
+
+    return flag
+
+
+
+
+
+
+#mthod 1: estimating mutual information by k nearest neighbors (density estimation)
+#mthod 2: estimating mutual information by sklearn package
+def GIN_MI(X,Z,data,method='1'):
+    omega = getomega(data,X,Z)
+    tdata= data[:, X]
+    result = np.dot(tdata, omega)
+    MIS=0
+    for i in Z:
+
+        temp = np.array(data[:, i])
+        if method =='1':
+            mi=ID.independent(result,temp)
+        else:
+            mi=ID.independent11(result,temp)
+        MIS+=mi
+    MIS = MIS/len(Z)
+
+    return MIS
+
+
+
+
+
+def getomega(data,X,Z):
+    cov_m =np.cov(data,rowvar=False)
+    # print(f'{cov_m.shape = }')
+    col = list(range(data.shape[1]))
+    Xlist = []
+    Zlist = []
+    for i in X:
+        t = col.index(i)
+        Xlist.append(t)
+    for i in Z:
+        t = col.index(i)
+        Zlist.append(t)
+    B = cov_m[Xlist]
+    B = B[:,Zlist]
+    A = B.T
+    u,s,v = np.linalg.svd(A)
+    lens = len(X)
+    omega =v.T[:,lens-1]
+
+    return omega
+
diff --git a/causallearn/search/HiddenCausal/GIN/HSIC2.py b/causallearn/search/HiddenCausal/GIN/HSIC2.py
new file mode 100644
index 00000000..8f04c3ca
--- /dev/null
+++ b/causallearn/search/HiddenCausal/GIN/HSIC2.py
@@ -0,0 +1,101 @@
+from kerpy.GaussianKernel import GaussianKernel
+from numpy import shape,savetxt,loadtxt,transpose,shape,reshape,concatenate
+from independence_testing.HSICSpectralTestObject import HSICSpectralTestObject
+from independence_testing.HSICBlockTestObject import HSICBlockTestObject
+import numpy as np
+import pandas as pd
+
+#Fast HSIC, refer to ./kerpy-master/independence_testing/ExampleHSIC.py
+
+def test(x,y,alph=0.05):
+    lens = len(x)
+    x=x.reshape(lens,1)
+    y=y.reshape(lens,1)
+##
+##    kernelX=GaussianKernel()
+##    kernelY=GaussianKernel()
+
+    kernelY = GaussianKernel(float(0.1))
+    kernelX=GaussianKernel(float(0.1))
+
+
+    num_samples = lens
+
+    myspectralobject = HSICSpectralTestObject(num_samples, kernelX=kernelX, kernelY=kernelY,
+                                          kernelX_use_median=False, kernelY_use_median=False,
+                                          rff=True, num_rfx=20, num_rfy=20, num_nullsims=500)
+
+    pvalue = myspectralobject.compute_pvalue(x, y)
+
+    #print(pvalue)
+    if pvalue >alph:
+        return True
+    else:
+        return False
+    #return pvalue
+
+def test2(x,y,alph=0.01):
+    lens = len(x)
+    x=x.reshape(lens,1)
+    y=y.reshape(lens,1)
+    kernelX=GaussianKernel()
+    kernelY=GaussianKernel()
+
+    num_samples = lens
+
+    myblockobject = HSICBlockTestObject(num_samples, kernelX=kernelX, kernelY=kernelY,
+                                    kernelX_use_median=True, kernelY_use_median=True,
+                                    blocksize=80, nullvarmethod='permutation')
+
+    pvalue = myblockobject.compute_pvalue(x, y)
+    #print(pvalue)
+    if pvalue >alph:
+        return True
+    else:
+        return False
+
+
+
+def INtest(x,y,alph=0.01):
+    lens = len(x)
+    x=x.reshape(lens,1)
+    y=y.reshape(lens,1)
+    kernelX=GaussianKernel()
+    kernelY=GaussianKernel()
+##    kernelY = GaussianKernel(float(0.3))
+##    kernelX=GaussianKernel(float(0.3))
+    num_samples = lens
+
+    myspectralobject = HSICSpectralTestObject(num_samples, kernelX=kernelX, kernelY=kernelY,
+                                          kernelX_use_median=True, kernelY_use_median=True,
+                                          rff=True, num_rfx=20, num_rfy=20, num_nullsims=1000)
+
+    pvalue = myspectralobject.compute_pvalue(x, y)
+
+    return pvalue
+
+def INtest2(x,y,alph=0.01):
+    lens = len(x)
+    x=x.reshape(lens,1)
+    y=y.reshape(lens,1)
+    kernelX=GaussianKernel()
+    kernelY=GaussianKernel()
+    num_samples = lens
+
+    myblockobject = HSICBlockTestObject(num_samples, kernelX=kernelX, kernelY=kernelY,
+                                    kernelX_use_median=True, kernelY_use_median=True,
+                                    blocksize=200, nullvarmethod='permutation')
+
+    pvalue = myblockobject.compute_pvalue(x, y)
+
+    return pvalue
+
+
+
+def main():
+    x=np.random.uniform(size=10000)
+    y=np.random.uniform(size=10000)+x
+    test2(x,y)
+
+if __name__ == '__main__':
+    main()
diff --git a/causallearn/search/HiddenCausal/GIN/Identifying_Causal_Clusters.py b/causallearn/search/HiddenCausal/GIN/Identifying_Causal_Clusters.py
new file mode 100644
index 00000000..6052b7cd
--- /dev/null
+++ b/causallearn/search/HiddenCausal/GIN/Identifying_Causal_Clusters.py
@@ -0,0 +1,70 @@
+import causallearn.search.HiddenCausal.GIN.GIN2 as GIN2 #based on hsic to find cluster
+import causallearn.search.HiddenCausal.GIN.Utils_V2 as Utils #overlap merge utils
+import itertools
+
+#return the combination of list
+def FindCombination(Lists,N):
+    return itertools.combinations(Lists,N)
+
+
+#GIN test by fast HSIC
+#X and Z are list, e.g., X=['X1','X2'] Z=['X3']
+#Data.type=Pandas.DataFrame, where data.columns=['x1','x2',...]
+def GIN(X,Z,data,test_function,alpha=0.05):
+    return GIN2.GIN(X,Z,data,test_function,alpha)
+    #HSIC with fisher method
+    #return GIN2.FisherGIN(X,Z,data,alpha)
+
+
+#identifying causal cluster from 1-factor to n-factor
+#limit=N  is to limited the n-factor we found
+def FindCluser(data, test_function, alhpa=0.05,limit=0):
+    indexs=list(range(data.shape[1]))
+    B = indexs.copy()
+    Cluster={}
+    Grlen=2
+
+    #finding causal cluster, 1-factor to n-factor, recorded by dic type, e.g., {'1':[['x1','x2']],'2':[['x4','x5','x6']}
+    while len(B) >= Grlen and len(indexs) >=2*Grlen-1:
+        LatentNum=Grlen-1
+        Set_P=FindCombination(B,Grlen)
+        print('identifying causal cluster: '+str(LatentNum)+'-factor model:')
+        for P in Set_P:
+            tind=indexs.copy()
+            for t in P:
+                tind.remove(t)  #   tind= ALLdata\P
+            if GIN(list(P),tind,data,test_function,alhpa):
+                key=Cluster.keys()
+                key = list(key)
+                if (LatentNum) in key:
+                    temp =Cluster[LatentNum]
+                    temp.append(list(P))
+                    Cluster[LatentNum]=temp
+                else:
+                    Cluster[LatentNum]=[list(P)]
+        key=Cluster.keys()
+        if LatentNum in key:
+            Tclu=Cluster[LatentNum]
+            Tclu=Utils.merge_list(Tclu)
+            Cluster[LatentNum]=Tclu
+            #update the B
+            for i in Tclu:
+                for j in i:
+                    if j in B:
+                        B.remove(j)
+
+
+        Grlen+=1
+        print('The identified cluster in identifying '+ str(LatentNum)+ '-factor model:')
+        print(Cluster)
+
+        #limit the n-factor we found
+        if limit !=0 and (Grlen-1)>limit:
+            break
+
+
+
+    return Cluster
+
+
+
diff --git a/causallearn/search/HiddenCausal/GIN/Infer_Causal_Order.py b/causallearn/search/HiddenCausal/GIN/Infer_Causal_Order.py
new file mode 100644
index 00000000..514489c1
--- /dev/null
+++ b/causallearn/search/HiddenCausal/GIN/Infer_Causal_Order.py
@@ -0,0 +1,70 @@
+import causallearn.search.HiddenCausal.GIN.GIN2 as GIN #GIN based on mutual information
+
+
+# identifying causal order by identifying root node recursively.
+#cluster.type=dic, which is learned by phase I {identifying causal cluster}
+#method 1 = estimating mutual information by k nearest neighbors
+def LearnCausalOrder(cluster,data,method='1'):
+    Cluster=GetCluster(cluster)
+    #print(Cluster)
+
+    K=[]  #causal order
+    while(len(Cluster) >0):
+        root = FindRoot(Cluster,data,K,method='1')
+        K.append(root)
+        Cluster.remove(root)
+    return K
+
+#finding root node by mutual information based GIN
+def FindRoot(clu1,data1,K1,method='1'):
+    cluster=clu1.copy()
+    data =data1.copy()
+    K=K1.copy()
+    if len(cluster) ==1:
+        return cluster[0]
+
+    MIS=[]
+    for clu in cluster:
+        other=cluster.copy()
+        other.remove(clu)
+        TempM=0
+        for o in other:
+            X=[]
+            Z=[]
+
+            for i in range(0,len(clu)):
+                if i < len(clu)/2:
+                    X.append(clu[i])
+                else:
+                    Z.append(clu[i])
+            for i in range(0,len(o)):
+                if i < len(o)/2:
+                    X.append(o[i])
+
+
+            if len(K) != 0:
+                for i in K:
+                    for j in range(0,len(i)):
+                        if j < int(len(i)/2):
+                            X.append(i[j])
+                        else:
+                            Z.append(i[j])
+
+            #print(X,Z)
+            mi = GIN.GIN_MI(X,Z,data,method='1')
+            TempM+=mi
+            TempM =TempM
+        MIS.append(TempM)
+
+    mins=MIS.index(min(MIS))
+    root = cluster[mins]
+    return root
+
+def GetCluster(cluster):
+    Clu=[]
+    key=cluster.keys()
+    for i in key:
+        C=cluster[i]
+        for j in C:
+            Clu.append(j)
+    return Clu
diff --git a/causallearn/search/HiddenCausal/GIN/Utils_V2.py b/causallearn/search/HiddenCausal/GIN/Utils_V2.py
new file mode 100644
index 00000000..04513229
--- /dev/null
+++ b/causallearn/search/HiddenCausal/GIN/Utils_V2.py
@@ -0,0 +1,127 @@
+import numpy as np
+import pandas as pd
+from itertools import combinations
+import itertools
+import networkx as nx
+
+
+
+
+
+
+
+def bfs(graph, start):
+    visited, queue = set(), [start]
+    while queue:
+        vertex = queue.pop(0)
+        if vertex not in visited:
+            visited.add(vertex)
+            queue.extend(graph[vertex] - visited)
+    return visited
+
+def connected_components(G):
+    seen = set()
+    for v in G:
+        if v not in seen:
+            c = set(bfs(G, v))
+            yield c
+            seen.update(c)
+
+def graph(edge_list):
+    result = {}
+    for source, target in edge_list:
+        result.setdefault(source, set()).add(target)
+        result.setdefault(target, set()).add(source)
+    return result
+
+
+def merge_list(L2):
+    l=L2.copy()
+
+    edges = []
+    s = list(map(set, l))
+    for i, j in combinations(range(len(s)), r=2):
+
+        if s[i].intersection(s[j]):
+            edges.append((i, j))
+
+    G = graph(edges)
+
+    result = []
+    unassigned = list(range(len(s)))
+
+    for component in connected_components(G):
+
+        union = set().union(*(s[i] for i in component))
+
+        result.append(sorted(union))
+
+        unassigned = [i for i in unassigned if i not in component]
+
+
+    result.extend(map(sorted, (s[i] for i in unassigned)))
+
+    return result
+
+
+#merge withour change any order
+def merge_listNew(L2):
+
+
+
+    l=L2.copy()
+
+
+    edges = []
+    s = list(map(set, l))
+    for i, j in combinations(range(len(s)), r=2):
+        if s[i].intersection(s[j]):
+            edges.append((i, j))
+
+    G = graph(edges)
+
+    result = []
+    unassigned = list(range(len(s)))
+
+    for component in connected_components(G):
+
+        component=list(component)
+
+        Cluster=l[component[0]]
+
+        adj= s[0].intersection(s[1])
+        for i in component:
+            if i == 0:
+                continue
+            temp=l[i]
+            for j in list(adj):
+                if j in temp:
+                    temp.remove(j)
+
+            for j in range(0,len(temp)):
+                if j < (len(temp)/2):
+                    Cluster.insert(0,temp[j])
+                else:
+                    Cluster.append(temp[j])
+
+        result.append(Cluster)
+
+
+
+        unassigned = [i for i in unassigned if i not in component]
+
+
+    result.extend(map(sorted, (s[i] for i in unassigned)))
+
+    return result
+
+
+
+
+def main():
+    L=[['x1','x2'],['x3','x4'],['x5','x6','x2'],['x1','x9']]
+    print(merge_list(L))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/causallearn/search/HiddenCausal/GIN/independence.py b/causallearn/search/HiddenCausal/GIN/independence.py
new file mode 100644
index 00000000..dd64ef6b
--- /dev/null
+++ b/causallearn/search/HiddenCausal/GIN/independence.py
@@ -0,0 +1,38 @@
+import numpy as np
+import pandas as pd
+import causallearn.search.HiddenCausal.GIN.mutual as MI
+from sklearn import metrics
+
+#estimating mutual information by sklearn
+def independent11(x1,y1):
+    x=x1.copy()
+    y=y1.copy()
+    length=len(x)
+##    x=x.reshape(length,1)
+##    y=y.reshape(length,1)
+    x = list(x)
+    y = list(y)
+
+    result_NMI=metrics.normalized_mutual_info_score(x, y)
+    print (result_NMI)
+    return result_NMI
+
+#estimating mutual information by Non-parametric computation of entropy and mutual-information
+def independent(x1,y1):
+    x=x1.copy()
+    y=y1.copy()
+    length=len(x)
+    x=x.reshape(length,1)
+    y=y.reshape(length,1)
+
+    if length >3000:
+        k=15
+    else:
+        k=10
+
+    mi= MI.mutual_information((x,y),k)
+    return abs(mi)
+
+
+
+
diff --git a/causallearn/search/HiddenCausal/GIN/mutual.py b/causallearn/search/HiddenCausal/GIN/mutual.py
new file mode 100644
index 00000000..35c4cb8b
--- /dev/null
+++ b/causallearn/search/HiddenCausal/GIN/mutual.py
@@ -0,0 +1,276 @@
+
+
+'''
+Non-parametric computation of entropy and mutual-information
+Adapted by G Varoquaux for code created by R Brette, itself
+from several papers (see in the code).
+These computations rely on nearest-neighbor statistics
+'''
+import numpy as np
+
+from scipy.special import gamma,psi
+from scipy import ndimage
+from scipy.linalg import det
+from numpy import pi
+
+from sklearn.neighbors import NearestNeighbors
+
+__all__=['entropy', 'mutual_information', 'entropy_gaussian']
+
+EPS = np.finfo(float).eps
+
+
+def nearest_distances(X, k=1):
+    '''
+    X = array(N,M)
+    N = number of points
+    M = number of dimensions
+    returns the distance to the kth nearest neighbor for every point in X
+    '''
+    knn = NearestNeighbors(n_neighbors=k + 1)
+    knn.fit(X)
+    d, _ = knn.kneighbors(X) # the first nearest neighbor is itself
+    return d[:, -1] # returns the distance to the kth nearest neighbor
+
+
+def entropy_gaussian(C):
+    '''
+    Entropy of a gaussian variable with covariance matrix C
+    '''
+    if np.isscalar(C): # C is the variance
+        return .5*(1 + np.log(2*pi)) + .5*np.log(C)
+    else:
+        n = C.shape[0] # dimension
+        return .5*n*(1 + np.log(2*pi)) + .5*np.log(abs(det(C)))
+
+
+def entropy2(u,k=1):
+    k1 = 79.047
+    k2 = 7.4129
+    gamma = 0.37457
+    return (1 + np.log(2 * np.pi)) / 2 - k1 * (np.mean(np.log(np.cosh(u))) - gamma)**2 - k2 * (np.mean(u * np.exp((-u**2) / 2)))**2
+
+
+def entropy(X, k=1):
+    ''' Returns the entropy of the X.
+    Parameters
+    ===========
+    X : array-like, shape (n_samples, n_features)
+        The data the entropy of which is computed
+    k : int, optional
+        number of nearest neighbors for density estimation
+    Notes
+    ======
+    Kozachenko, L. F. & Leonenko, N. N. 1987 Sample estimate of entropy
+    of a random vector. Probl. Inf. Transm. 23, 95-101.
+    See also: Evans, D. 2008 A computationally efficient estimator for
+    mutual information, Proc. R. Soc. A 464 (2093), 1203-1215.
+    and:
+    Kraskov A, Stogbauer H, Grassberger P. (2004). Estimating mutual
+    information. Phys Rev E 69(6 Pt 2):066138.
+    '''
+
+    # Distance to kth nearest neighbor
+    r = nearest_distances(X, k) # squared distances
+    n, d = X.shape
+    volume_unit_ball = (pi**(.5*d)) / gamma(.5*d + 1)
+    '''
+    F. Perez-Cruz, (2008). Estimation of Information Theoretic Measures
+    for Continuous Random Variables. Advances in Neural Information
+    Processing Systems 21 (NIPS). Vancouver (Canada), December.
+    return d*mean(log(r))+log(volume_unit_ball)+log(n-1)-log(k)
+    '''
+    return (d*np.mean(np.log(r + np.finfo(X.dtype).eps))
+            + np.log(volume_unit_ball) + psi(n) - psi(k))
+
+
+def mutual_information(variables, k=1):
+    '''
+    Returns the mutual information between any number of variables.
+    Each variable is a matrix X = array(n_samples, n_features)
+    where
+      n = number of samples
+      dx,dy = number of dimensions
+    Optionally, the following keyword argument can be specified:
+      k = number of nearest neighbors for density estimation
+    Example: mutual_information((X, Y)), mutual_information((X, Y, Z), k=5)
+    '''
+    if len(variables) < 2:
+        raise AttributeError(
+                "Mutual information must involve at least 2 variables")
+    all_vars = np.hstack(variables)
+    return (sum([entropy(X, k=k) for X in variables])
+            - entropy(all_vars, k=k))
+
+
+def mutual_information_2d(x, y, sigma=1, normalized=False):
+    """
+    Computes (normalized) mutual information between two 1D variate from a
+    joint histogram.
+    Parameters
+    ----------
+    x : 1D array
+        first variable
+    y : 1D array
+        second variable
+    sigma: float
+        sigma for Gaussian smoothing of the joint histogram
+    Returns
+    -------
+    nmi: float
+        the computed similariy measure
+    """
+    bins = (256, 256)
+
+    jh = np.histogram2d(x, y, bins=bins)[0]
+
+    # smooth the jh with a gaussian filter of given sigma
+    ndimage.gaussian_filter(jh, sigma=sigma, mode='constant',
+                                 output=jh)
+
+    # compute marginal histograms
+    jh = jh + EPS
+    sh = np.sum(jh)
+    jh = jh / sh
+    s1 = np.sum(jh, axis=0).reshape((-1, jh.shape[0]))
+    s2 = np.sum(jh, axis=1).reshape((jh.shape[1], -1))
+
+    # Normalised Mutual Information of:
+    # Studholme,  jhill & jhawkes (1998).
+    # "A normalized entropy measure of 3-D medical image alignment".
+    # in Proc. Medical Imaging 1998, vol. 3338, San Diego, CA, pp. 132-143.
+    if normalized:
+        mi = ((np.sum(s1 * np.log(s1)) + np.sum(s2 * np.log(s2)))
+                / np.sum(jh * np.log(jh))) - 1
+    else:
+        mi = ( np.sum(jh * np.log(jh)) - np.sum(s1 * np.log(s1))
+               - np.sum(s2 * np.log(s2)))
+
+    return mi
+
+
+
+###############################################################################
+# Tests
+
+def test_entropy():
+    # Testing against correlated Gaussian variables
+    # (analytical results are known)
+    # Entropy of a 3-dimensional gaussian variable
+    rng = np.random.RandomState(0)
+    n = 50000
+    d = 3
+    P = np.array([[1, 0, 0], [0, 1, .5], [0, 0, 1]])
+    C = np.dot(P, P.T)
+    Y = rng.randn(d, n)
+    X = np.dot(P, Y)
+    H_th = entropy_gaussian(C)
+    H_est = entropy(X.T, k=5)
+    # Our estimated entropy should always be less that the actual one
+    # (entropy estimation undershoots) but not too much
+    np.testing.assert_array_less(H_est, H_th)
+    np.testing.assert_array_less(.9*H_th, H_est)
+
+
+def test_mutual_information():
+    # Mutual information between two correlated gaussian variables
+    # Entropy of a 2-dimensional gaussian variable
+    n = 50000
+    rng = np.random.RandomState(0)
+    #P = np.random.randn(2, 2)
+    P = np.array([[1, 0], [0.5, 1]])
+    C = np.dot(P, P.T)
+    U = rng.randn(2, n)
+    Z = np.dot(P, U).T
+    X = Z[:, 0]
+    X = X.reshape(len(X), 1)
+    Y = Z[:, 1]
+    Y = Y.reshape(len(Y), 1)
+    # in bits
+    MI_est = mutual_information((X, Y), k=5)
+    MI_th = (entropy_gaussian(C[0, 0])
+             + entropy_gaussian(C[1, 1])
+             - entropy_gaussian(C)
+            )
+    # Our estimator should undershoot once again: it will undershoot more
+    # for the 2D estimation that for the 1D estimation
+    print((MI_est, MI_th))
+    np.testing.assert_array_less(MI_est, MI_th)
+    np.testing.assert_array_less(MI_th, MI_est  + .3)
+
+
+def test_degenerate():
+    # Test that our estimators are well-behaved with regards to
+    # degenerate solutions
+    rng = np.random.RandomState(0)
+    x = rng.randn(50000)
+    X = np.c_[x, x]
+    assert np.isfinite(entropy(X))
+    assert np.isfinite(mutual_information((x[:, np.newaxis],
+                                           x[:,  np.newaxis])))
+    assert 2.9 < mutual_information_2d(x, x) < 3.1
+
+
+def test_mutual_information_2d():
+    # Mutual information between two correlated gaussian variables
+    # Entropy of a 2-dimensional gaussian variable
+    n = 50000
+    rng = np.random.RandomState(0)
+    #P = np.random.randn(2, 2)
+    P = np.array([[1, 0], [.9, .1]])
+    C = np.dot(P, P.T)
+    U = rng.randn(2, n)
+    Z = np.dot(P, U).T
+    X = Z[:, 0]
+    X = X.reshape(len(X), 1)
+    Y = Z[:, 1]
+    Y = Y.reshape(len(Y), 1)
+    # in bits
+    MI_est = mutual_information_2d(X.ravel(), Y.ravel())
+    MI_th = (entropy_gaussian(C[0, 0])
+             + entropy_gaussian(C[1, 1])
+             - entropy_gaussian(C)
+            )
+    print((MI_est, MI_th))
+    # Our estimator should undershoot once again: it will undershoot more
+    # for the 2D estimation that for the 1D estimation
+    np.testing.assert_array_less(MI_est, MI_th)
+    np.testing.assert_array_less(MI_th, MI_est  + .2)
+
+def _entropy(u):
+    """Calculate entropy using the maximum entropy approximations."""
+    k1 = 79.047
+    k2 = 7.4129
+    gamma = 0.37457
+    return (1 + np.log(2 * np.pi)) / 2 - k1 * (np.mean(np.log(np.cosh(u))) - gamma)**2 - k2 * (np.mean(u * np.exp((-u**2) / 2)))**2
+
+def main():
+    x=np.random.uniform(size=1000)
+    y=np.random.uniform(size=1000)*0.2+0.5*x
+
+    mi=mutual_information_2d(x,y,normalized=True)
+
+    x=x.reshape(1000,1)
+    y=y.reshape(1000,1)
+
+    mi2=mutual_information((x,y),k=10)
+    print(mi)
+    print(mi2)
+##
+##    x=x.reshape(10000,1)
+##    y=y.reshape(10000,1)
+##    a =entropy(x)
+##    b=_entropy(x)
+##
+##    print(a)
+##    print(b)
+
+
+
+if __name__ == '__main__':
+    # Run our tests
+##    test_entropy()
+##    test_mutual_information()
+##    test_degenerate()
+##    test_mutual_information_2d()
+    main()
\ No newline at end of file

From fcbafd3b75c95bfa97e5028b1f94dc573d0d2383 Mon Sep 17 00:00:00 2001
From: Feng Xie <33390790+fengxie009@users.noreply.github.com>
Date: Sat, 17 Dec 2022 09:13:29 +0800
Subject: [PATCH 03/14] Delete FisherTest.py

---
 .../search/HiddenCausal/GIN/FisherTest.py     | 45 -------------------
 1 file changed, 45 deletions(-)
 delete mode 100644 causallearn/search/HiddenCausal/GIN/FisherTest.py

diff --git a/causallearn/search/HiddenCausal/GIN/FisherTest.py b/causallearn/search/HiddenCausal/GIN/FisherTest.py
deleted file mode 100644
index 0e1de1b8..00000000
--- a/causallearn/search/HiddenCausal/GIN/FisherTest.py
+++ /dev/null
@@ -1,45 +0,0 @@
-#-------------------------------------------------------------------------------
-# Name:        模块1
-# Purpose:
-#
-# Author:      YY
-#
-# Created:     03/03/2021
-# Copyright:   (c) YY 2021
-# Licence:     <your licence>
-#-------------------------------------------------------------------------------
-import math
-from scipy.stats import chi2
-
-def FisherTest(pvals,alph=0.01):
-    Fisher_Stat=0
-    L = len(pvals)
-    for i in range(0,L):
-        if pvals[i] ==0:
-            TP = 1e-05
-        else:
-            TP = pvals[i]
-
-        Fisher_Stat = Fisher_Stat-2*math.log(TP)
-
-
-    Fisher_pval = 1-chi2.cdf(Fisher_Stat, 2*L)  #自由度2*L
-
-    #print(Fisher_pval)
-
-    if Fisher_pval >alph:
-        return True,Fisher_pval
-    else:
-        return False,Fisher_pval
-
-
-
-
-
-
-def main():
-    pvals = [0.01,0.9]
-    FisherTest(pvals,0.1)
-
-if __name__ == '__main__':
-    main()

From 200d8339c6287d3034185e41c016c354686fecf9 Mon Sep 17 00:00:00 2001
From: Feng Xie <33390790+fengxie009@users.noreply.github.com>
Date: Sat, 17 Dec 2022 09:13:56 +0800
Subject: [PATCH 04/14] Add files via upload

---
 .../search/HiddenCausal/GIN/FisherTest.py     | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 causallearn/search/HiddenCausal/GIN/FisherTest.py

diff --git a/causallearn/search/HiddenCausal/GIN/FisherTest.py b/causallearn/search/HiddenCausal/GIN/FisherTest.py
new file mode 100644
index 00000000..46b9893b
--- /dev/null
+++ b/causallearn/search/HiddenCausal/GIN/FisherTest.py
@@ -0,0 +1,21 @@
+import math
+from scipy.stats import chi2
+
+def FisherTest(pvals, alpha = 0.01):
+    Fisher_Stat = 0
+    L = len(pvals)
+    for i in range(0,L):
+        if pvals[i] == 0:
+            TP = 1e-05
+        else:
+            TP = pvals[i]
+
+        Fisher_Stat = Fisher_Stat - 2*math.log(TP)
+
+    Fisher_pval = 1 - chi2.cdf(Fisher_Stat, 2*L)
+
+    if Fisher_pval >alpha:
+        return True, Fisher_pval
+    else:
+        return False, Fisher_pval
+

From 0db446d381bd7c1ee4771da58722ae82d8cab279 Mon Sep 17 00:00:00 2001
From: Feng Xie <33390790+fengxie009@users.noreply.github.com>
Date: Sat, 17 Dec 2022 09:14:15 +0800
Subject: [PATCH 05/14] Delete GIN.py

---
 causallearn/search/HiddenCausal/GIN/GIN.py | 68 ----------------------
 1 file changed, 68 deletions(-)
 delete mode 100644 causallearn/search/HiddenCausal/GIN/GIN.py

diff --git a/causallearn/search/HiddenCausal/GIN/GIN.py b/causallearn/search/HiddenCausal/GIN/GIN.py
deleted file mode 100644
index 5e0963e2..00000000
--- a/causallearn/search/HiddenCausal/GIN/GIN.py
+++ /dev/null
@@ -1,68 +0,0 @@
-from causallearn.graph.GeneralGraph import GeneralGraph
-from causallearn.graph.GraphNode import GraphNode
-from causallearn.graph.NodeType import NodeType
-from causallearn.search.FCMBased.lingam.hsic import hsic_test_gamma
-from causallearn.utils.KCI.KCI import KCI_UInd
-
-import causallearn.search.HiddenCausal.GIN.Infer_Causal_Order as ICO
-import causallearn.search.HiddenCausal.GIN.Identifying_Causal_Clusters as ICC
-
-
-
-def GIN(data, indep_test_method='kerpy', alpha=0.05):
-
-    if indep_test_method not in ['kci', 'hsic', 'kerpy']:
-        raise NotImplementedError((f"Independent test method {indep_test_method} is not implemented."))
-
-    if indep_test_method == 'kci':
-        kci = KCI_UInd()
-
-    if indep_test_method == 'kerpy':
-        from kerpy.GaussianKernel import GaussianKernel
-        from independence_testing.HSICSpectralTestObject import HSICSpectralTestObject
-        kernelX = GaussianKernel(float(0.1))
-        kernelY = GaussianKernel(float(0.1))
-
-        num_samples = data.shape[0]
-
-        myspectralobject = HSICSpectralTestObject(num_samples, kernelX=kernelX, kernelY=kernelY,
-                                          kernelX_use_median=False, kernelY_use_median=False,
-                                          rff=True, num_rfx=20, num_rfy=20, num_nullsims=500)
-
-
-    if indep_test_method == 'kci':
-        def indep_test(x, y):
-            return kci.compute_pvalue(x, y)[0]
-    elif indep_test_method == 'hsic':
-        def indep_test(x, y):
-            return hsic_test_gamma(x, y)[1]
-    elif indep_test_method == 'kerpy':
-        def indep_test(x, y):
-            return myspectralobject.compute_pvalue(x, y)
-    else:
-        raise NotImplementedError((f"Independent test method {indep_test_method} is not implemented."))
-
-
-    #identifying causal cluster by using fast HSIC, set Signification Level(alhpa) =0.05
-    Cluster=ICC.FindCluser(data,indep_test,alpha)
-    #identifying causal order by using mutual information, (k nearest neighbors (method='1') or sklearn package (method='2')
-    CausalOrder=ICO.LearnCausalOrder(Cluster,data,method='1')
-    
-
-    latent_id = 1
-    l_nodes = []
-    G = GeneralGraph([])
-    for cluster in CausalOrder:
-        l_node = GraphNode(f"L{latent_id}")
-        l_node.set_node_type(NodeType.LATENT)
-        l_nodes.append(l_node)
-        G.add_node(l_node)
-        for l in l_nodes:
-            if l != l_node:
-                G.add_directed_edge(l, l_node)
-        for o in cluster:
-            o_node = GraphNode(f"X{o + 1}")
-            G.add_node(o_node)
-            G.add_directed_edge(l_node, o_node)
-        latent_id += 1
-    return G, CausalOrder

From 9ad04fafb1d831fcf1b8e4bcb65e73a1c43ea73f Mon Sep 17 00:00:00 2001
From: Feng Xie <33390790+fengxie009@users.noreply.github.com>
Date: Sat, 17 Dec 2022 09:14:23 +0800
Subject: [PATCH 06/14] Delete GIN2.py

---
 causallearn/search/HiddenCausal/GIN/GIN2.py | 94 ---------------------
 1 file changed, 94 deletions(-)
 delete mode 100644 causallearn/search/HiddenCausal/GIN/GIN2.py

diff --git a/causallearn/search/HiddenCausal/GIN/GIN2.py b/causallearn/search/HiddenCausal/GIN/GIN2.py
deleted file mode 100644
index 97aac2ac..00000000
--- a/causallearn/search/HiddenCausal/GIN/GIN2.py
+++ /dev/null
@@ -1,94 +0,0 @@
-import numpy as np
-import pandas as pd
-import causallearn.search.HiddenCausal.GIN.FisherTest as FisherTest
-import causallearn.search.HiddenCausal.GIN.independence as ID
-
-#GIN by 
-#X=['X1','X2']
-#Z=['X3']
-#data.type=Pandas.DataFrame
-def GIN(X, Z, data, test_function, alpha=0.05):
-    omega = getomega(data,X,Z)
-    tdata= data[:, X]
-    #print(tdata.T)
-    result = np.dot(tdata, omega)[:,None]
-    for i in Z:
-        temp = np.array(data[:, [i]])
-        pval =test_function(result, temp)
-        if pval > alpha:
-            flag = True
-        else:
-            flag = False
-
-        if not flag:#not false == ture  ---> if false
-            #print(X,Z,flag)
-            return False
-
-    return True
-
-
-
-#GIN by Fisher's method
-def FisherGIN(X,Z,data,test_function,alpha=0.01):
-    omega = getomega(data,X,Z)
-    tdata= data[:,X]
-    result = np.dot(tdata, omega)[:,None]
-    pvals=[]
-
-    for i in Z:
-        temp = np.array(data[:, [i]])
-        pval=test_function(result, temp)
-        pvals.append(pval)
-    flag,fisher_pval=FisherTest.FisherTest(pvals,alpha)
-
-    return flag
-
-
-
-
-
-
-#mthod 1: estimating mutual information by k nearest neighbors (density estimation)
-#mthod 2: estimating mutual information by sklearn package
-def GIN_MI(X,Z,data,method='1'):
-    omega = getomega(data,X,Z)
-    tdata= data[:, X]
-    result = np.dot(tdata, omega)
-    MIS=0
-    for i in Z:
-
-        temp = np.array(data[:, i])
-        if method =='1':
-            mi=ID.independent(result,temp)
-        else:
-            mi=ID.independent11(result,temp)
-        MIS+=mi
-    MIS = MIS/len(Z)
-
-    return MIS
-
-
-
-
-
-def getomega(data,X,Z):
-    cov_m =np.cov(data,rowvar=False)
-    # print(f'{cov_m.shape = }')
-    col = list(range(data.shape[1]))
-    Xlist = []
-    Zlist = []
-    for i in X:
-        t = col.index(i)
-        Xlist.append(t)
-    for i in Z:
-        t = col.index(i)
-        Zlist.append(t)
-    B = cov_m[Xlist]
-    B = B[:,Zlist]
-    A = B.T
-    u,s,v = np.linalg.svd(A)
-    lens = len(X)
-    omega =v.T[:,lens-1]
-
-    return omega
-

From 1b2d4c7b2936f887288ba2cfbe854af19c6c0307 Mon Sep 17 00:00:00 2001
From: Feng Xie <33390790+fengxie009@users.noreply.github.com>
Date: Sat, 17 Dec 2022 09:15:16 +0800
Subject: [PATCH 07/14] Delete HSIC2.py

---
 causallearn/search/HiddenCausal/GIN/HSIC2.py | 101 -------------------
 1 file changed, 101 deletions(-)
 delete mode 100644 causallearn/search/HiddenCausal/GIN/HSIC2.py

diff --git a/causallearn/search/HiddenCausal/GIN/HSIC2.py b/causallearn/search/HiddenCausal/GIN/HSIC2.py
deleted file mode 100644
index 8f04c3ca..00000000
--- a/causallearn/search/HiddenCausal/GIN/HSIC2.py
+++ /dev/null
@@ -1,101 +0,0 @@
-from kerpy.GaussianKernel import GaussianKernel
-from numpy import shape,savetxt,loadtxt,transpose,shape,reshape,concatenate
-from independence_testing.HSICSpectralTestObject import HSICSpectralTestObject
-from independence_testing.HSICBlockTestObject import HSICBlockTestObject
-import numpy as np
-import pandas as pd
-
-#Fast HSIC, refer to ./kerpy-master/independence_testing/ExampleHSIC.py
-
-def test(x,y,alph=0.05):
-    lens = len(x)
-    x=x.reshape(lens,1)
-    y=y.reshape(lens,1)
-##
-##    kernelX=GaussianKernel()
-##    kernelY=GaussianKernel()
-
-    kernelY = GaussianKernel(float(0.1))
-    kernelX=GaussianKernel(float(0.1))
-
-
-    num_samples = lens
-
-    myspectralobject = HSICSpectralTestObject(num_samples, kernelX=kernelX, kernelY=kernelY,
-                                          kernelX_use_median=False, kernelY_use_median=False,
-                                          rff=True, num_rfx=20, num_rfy=20, num_nullsims=500)
-
-    pvalue = myspectralobject.compute_pvalue(x, y)
-
-    #print(pvalue)
-    if pvalue >alph:
-        return True
-    else:
-        return False
-    #return pvalue
-
-def test2(x,y,alph=0.01):
-    lens = len(x)
-    x=x.reshape(lens,1)
-    y=y.reshape(lens,1)
-    kernelX=GaussianKernel()
-    kernelY=GaussianKernel()
-
-    num_samples = lens
-
-    myblockobject = HSICBlockTestObject(num_samples, kernelX=kernelX, kernelY=kernelY,
-                                    kernelX_use_median=True, kernelY_use_median=True,
-                                    blocksize=80, nullvarmethod='permutation')
-
-    pvalue = myblockobject.compute_pvalue(x, y)
-    #print(pvalue)
-    if pvalue >alph:
-        return True
-    else:
-        return False
-
-
-
-def INtest(x,y,alph=0.01):
-    lens = len(x)
-    x=x.reshape(lens,1)
-    y=y.reshape(lens,1)
-    kernelX=GaussianKernel()
-    kernelY=GaussianKernel()
-##    kernelY = GaussianKernel(float(0.3))
-##    kernelX=GaussianKernel(float(0.3))
-    num_samples = lens
-
-    myspectralobject = HSICSpectralTestObject(num_samples, kernelX=kernelX, kernelY=kernelY,
-                                          kernelX_use_median=True, kernelY_use_median=True,
-                                          rff=True, num_rfx=20, num_rfy=20, num_nullsims=1000)
-
-    pvalue = myspectralobject.compute_pvalue(x, y)
-
-    return pvalue
-
-def INtest2(x,y,alph=0.01):
-    lens = len(x)
-    x=x.reshape(lens,1)
-    y=y.reshape(lens,1)
-    kernelX=GaussianKernel()
-    kernelY=GaussianKernel()
-    num_samples = lens
-
-    myblockobject = HSICBlockTestObject(num_samples, kernelX=kernelX, kernelY=kernelY,
-                                    kernelX_use_median=True, kernelY_use_median=True,
-                                    blocksize=200, nullvarmethod='permutation')
-
-    pvalue = myblockobject.compute_pvalue(x, y)
-
-    return pvalue
-
-
-
-def main():
-    x=np.random.uniform(size=10000)
-    y=np.random.uniform(size=10000)+x
-    test2(x,y)
-
-if __name__ == '__main__':
-    main()

From 5229b0682a1ff3eca0326e5027c706ea0f01040d Mon Sep 17 00:00:00 2001
From: Feng Xie <33390790+fengxie009@users.noreply.github.com>
Date: Sat, 17 Dec 2022 09:15:24 +0800
Subject: [PATCH 08/14] Delete Identifying_Causal_Clusters.py

---
 .../GIN/Identifying_Causal_Clusters.py        | 70 -------------------
 1 file changed, 70 deletions(-)
 delete mode 100644 causallearn/search/HiddenCausal/GIN/Identifying_Causal_Clusters.py

diff --git a/causallearn/search/HiddenCausal/GIN/Identifying_Causal_Clusters.py b/causallearn/search/HiddenCausal/GIN/Identifying_Causal_Clusters.py
deleted file mode 100644
index 6052b7cd..00000000
--- a/causallearn/search/HiddenCausal/GIN/Identifying_Causal_Clusters.py
+++ /dev/null
@@ -1,70 +0,0 @@
-import causallearn.search.HiddenCausal.GIN.GIN2 as GIN2 #based on hsic to find cluster
-import causallearn.search.HiddenCausal.GIN.Utils_V2 as Utils #overlap merge utils
-import itertools
-
-#return the combination of list
-def FindCombination(Lists,N):
-    return itertools.combinations(Lists,N)
-
-
-#GIN test by fast HSIC
-#X and Z are list, e.g., X=['X1','X2'] Z=['X3']
-#Data.type=Pandas.DataFrame, where data.columns=['x1','x2',...]
-def GIN(X,Z,data,test_function,alpha=0.05):
-    return GIN2.GIN(X,Z,data,test_function,alpha)
-    #HSIC with fisher method
-    #return GIN2.FisherGIN(X,Z,data,alpha)
-
-
-#identifying causal cluster from 1-factor to n-factor
-#limit=N  is to limited the n-factor we found
-def FindCluser(data, test_function, alhpa=0.05,limit=0):
-    indexs=list(range(data.shape[1]))
-    B = indexs.copy()
-    Cluster={}
-    Grlen=2
-
-    #finding causal cluster, 1-factor to n-factor, recorded by dic type, e.g., {'1':[['x1','x2']],'2':[['x4','x5','x6']}
-    while len(B) >= Grlen and len(indexs) >=2*Grlen-1:
-        LatentNum=Grlen-1
-        Set_P=FindCombination(B,Grlen)
-        print('identifying causal cluster: '+str(LatentNum)+'-factor model:')
-        for P in Set_P:
-            tind=indexs.copy()
-            for t in P:
-                tind.remove(t)  #   tind= ALLdata\P
-            if GIN(list(P),tind,data,test_function,alhpa):
-                key=Cluster.keys()
-                key = list(key)
-                if (LatentNum) in key:
-                    temp =Cluster[LatentNum]
-                    temp.append(list(P))
-                    Cluster[LatentNum]=temp
-                else:
-                    Cluster[LatentNum]=[list(P)]
-        key=Cluster.keys()
-        if LatentNum in key:
-            Tclu=Cluster[LatentNum]
-            Tclu=Utils.merge_list(Tclu)
-            Cluster[LatentNum]=Tclu
-            #update the B
-            for i in Tclu:
-                for j in i:
-                    if j in B:
-                        B.remove(j)
-
-
-        Grlen+=1
-        print('The identified cluster in identifying '+ str(LatentNum)+ '-factor model:')
-        print(Cluster)
-
-        #limit the n-factor we found
-        if limit !=0 and (Grlen-1)>limit:
-            break
-
-
-
-    return Cluster
-
-
-

From ebb808b9f442fa39d1d7327d7ad75acb79ad45c0 Mon Sep 17 00:00:00 2001
From: Feng Xie <33390790+fengxie009@users.noreply.github.com>
Date: Sat, 17 Dec 2022 09:15:31 +0800
Subject: [PATCH 09/14] Delete Infer_Causal_Order.py

---
 .../HiddenCausal/GIN/Infer_Causal_Order.py    | 70 -------------------
 1 file changed, 70 deletions(-)
 delete mode 100644 causallearn/search/HiddenCausal/GIN/Infer_Causal_Order.py

diff --git a/causallearn/search/HiddenCausal/GIN/Infer_Causal_Order.py b/causallearn/search/HiddenCausal/GIN/Infer_Causal_Order.py
deleted file mode 100644
index 514489c1..00000000
--- a/causallearn/search/HiddenCausal/GIN/Infer_Causal_Order.py
+++ /dev/null
@@ -1,70 +0,0 @@
-import causallearn.search.HiddenCausal.GIN.GIN2 as GIN #GIN based on mutual information
-
-
-# identifying causal order by identifying root node recursively.
-#cluster.type=dic, which is learned by phase I {identifying causal cluster}
-#method 1 = estimating mutual information by k nearest neighbors
-def LearnCausalOrder(cluster,data,method='1'):
-    Cluster=GetCluster(cluster)
-    #print(Cluster)
-
-    K=[]  #causal order
-    while(len(Cluster) >0):
-        root = FindRoot(Cluster,data,K,method='1')
-        K.append(root)
-        Cluster.remove(root)
-    return K
-
-#finding root node by mutual information based GIN
-def FindRoot(clu1,data1,K1,method='1'):
-    cluster=clu1.copy()
-    data =data1.copy()
-    K=K1.copy()
-    if len(cluster) ==1:
-        return cluster[0]
-
-    MIS=[]
-    for clu in cluster:
-        other=cluster.copy()
-        other.remove(clu)
-        TempM=0
-        for o in other:
-            X=[]
-            Z=[]
-
-            for i in range(0,len(clu)):
-                if i < len(clu)/2:
-                    X.append(clu[i])
-                else:
-                    Z.append(clu[i])
-            for i in range(0,len(o)):
-                if i < len(o)/2:
-                    X.append(o[i])
-
-
-            if len(K) != 0:
-                for i in K:
-                    for j in range(0,len(i)):
-                        if j < int(len(i)/2):
-                            X.append(i[j])
-                        else:
-                            Z.append(i[j])
-
-            #print(X,Z)
-            mi = GIN.GIN_MI(X,Z,data,method='1')
-            TempM+=mi
-            TempM =TempM
-        MIS.append(TempM)
-
-    mins=MIS.index(min(MIS))
-    root = cluster[mins]
-    return root
-
-def GetCluster(cluster):
-    Clu=[]
-    key=cluster.keys()
-    for i in key:
-        C=cluster[i]
-        for j in C:
-            Clu.append(j)
-    return Clu

From 9299ea2ccf3b638ef45c79c65e7cfdf8abe63f46 Mon Sep 17 00:00:00 2001
From: Feng Xie <33390790+fengxie009@users.noreply.github.com>
Date: Sat, 17 Dec 2022 09:15:38 +0800
Subject: [PATCH 10/14] Delete Utils_V2.py

---
 .../search/HiddenCausal/GIN/Utils_V2.py       | 127 ------------------
 1 file changed, 127 deletions(-)
 delete mode 100644 causallearn/search/HiddenCausal/GIN/Utils_V2.py

diff --git a/causallearn/search/HiddenCausal/GIN/Utils_V2.py b/causallearn/search/HiddenCausal/GIN/Utils_V2.py
deleted file mode 100644
index 04513229..00000000
--- a/causallearn/search/HiddenCausal/GIN/Utils_V2.py
+++ /dev/null
@@ -1,127 +0,0 @@
-import numpy as np
-import pandas as pd
-from itertools import combinations
-import itertools
-import networkx as nx
-
-
-
-
-
-
-
-def bfs(graph, start):
-    visited, queue = set(), [start]
-    while queue:
-        vertex = queue.pop(0)
-        if vertex not in visited:
-            visited.add(vertex)
-            queue.extend(graph[vertex] - visited)
-    return visited
-
-def connected_components(G):
-    seen = set()
-    for v in G:
-        if v not in seen:
-            c = set(bfs(G, v))
-            yield c
-            seen.update(c)
-
-def graph(edge_list):
-    result = {}
-    for source, target in edge_list:
-        result.setdefault(source, set()).add(target)
-        result.setdefault(target, set()).add(source)
-    return result
-
-
-def merge_list(L2):
-    l=L2.copy()
-
-    edges = []
-    s = list(map(set, l))
-    for i, j in combinations(range(len(s)), r=2):
-
-        if s[i].intersection(s[j]):
-            edges.append((i, j))
-
-    G = graph(edges)
-
-    result = []
-    unassigned = list(range(len(s)))
-
-    for component in connected_components(G):
-
-        union = set().union(*(s[i] for i in component))
-
-        result.append(sorted(union))
-
-        unassigned = [i for i in unassigned if i not in component]
-
-
-    result.extend(map(sorted, (s[i] for i in unassigned)))
-
-    return result
-
-
-#merge withour change any order
-def merge_listNew(L2):
-
-
-
-    l=L2.copy()
-
-
-    edges = []
-    s = list(map(set, l))
-    for i, j in combinations(range(len(s)), r=2):
-        if s[i].intersection(s[j]):
-            edges.append((i, j))
-
-    G = graph(edges)
-
-    result = []
-    unassigned = list(range(len(s)))
-
-    for component in connected_components(G):
-
-        component=list(component)
-
-        Cluster=l[component[0]]
-
-        adj= s[0].intersection(s[1])
-        for i in component:
-            if i == 0:
-                continue
-            temp=l[i]
-            for j in list(adj):
-                if j in temp:
-                    temp.remove(j)
-
-            for j in range(0,len(temp)):
-                if j < (len(temp)/2):
-                    Cluster.insert(0,temp[j])
-                else:
-                    Cluster.append(temp[j])
-
-        result.append(Cluster)
-
-
-
-        unassigned = [i for i in unassigned if i not in component]
-
-
-    result.extend(map(sorted, (s[i] for i in unassigned)))
-
-    return result
-
-
-
-
-def main():
-    L=[['x1','x2'],['x3','x4'],['x5','x6','x2'],['x1','x9']]
-    print(merge_list(L))
-
-
-if __name__ == '__main__':
-    main()

From 36d5d4bca8cb5d59e6a095bd44b74d2f97825b32 Mon Sep 17 00:00:00 2001
From: Feng Xie <33390790+fengxie009@users.noreply.github.com>
Date: Sat, 17 Dec 2022 09:15:45 +0800
Subject: [PATCH 11/14] Delete __init__.py

---
 causallearn/search/HiddenCausal/GIN/__init__.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 causallearn/search/HiddenCausal/GIN/__init__.py

diff --git a/causallearn/search/HiddenCausal/GIN/__init__.py b/causallearn/search/HiddenCausal/GIN/__init__.py
deleted file mode 100644
index e69de29b..00000000

From c92cb8f6ebf4b63fc2e14f30e6e81338c0e3db34 Mon Sep 17 00:00:00 2001
From: Feng Xie <33390790+fengxie009@users.noreply.github.com>
Date: Sat, 17 Dec 2022 09:15:52 +0800
Subject: [PATCH 12/14] Delete independence.py

---
 .../search/HiddenCausal/GIN/independence.py   | 38 -------------------
 1 file changed, 38 deletions(-)
 delete mode 100644 causallearn/search/HiddenCausal/GIN/independence.py

diff --git a/causallearn/search/HiddenCausal/GIN/independence.py b/causallearn/search/HiddenCausal/GIN/independence.py
deleted file mode 100644
index dd64ef6b..00000000
--- a/causallearn/search/HiddenCausal/GIN/independence.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import numpy as np
-import pandas as pd
-import causallearn.search.HiddenCausal.GIN.mutual as MI
-from sklearn import metrics
-
-#estimating mutual information by sklearn
-def independent11(x1,y1):
-    x=x1.copy()
-    y=y1.copy()
-    length=len(x)
-##    x=x.reshape(length,1)
-##    y=y.reshape(length,1)
-    x = list(x)
-    y = list(y)
-
-    result_NMI=metrics.normalized_mutual_info_score(x, y)
-    print (result_NMI)
-    return result_NMI
-
-#estimating mutual information by Non-parametric computation of entropy and mutual-information
-def independent(x1,y1):
-    x=x1.copy()
-    y=y1.copy()
-    length=len(x)
-    x=x.reshape(length,1)
-    y=y.reshape(length,1)
-
-    if length >3000:
-        k=15
-    else:
-        k=10
-
-    mi= MI.mutual_information((x,y),k)
-    return abs(mi)
-
-
-
-

From 4f168e8d235cd4b6deb460b7cd5325811e57d038 Mon Sep 17 00:00:00 2001
From: Feng Xie <33390790+fengxie009@users.noreply.github.com>
Date: Sat, 17 Dec 2022 09:15:58 +0800
Subject: [PATCH 13/14] Delete mutual.py

---
 causallearn/search/HiddenCausal/GIN/mutual.py | 276 ------------------
 1 file changed, 276 deletions(-)
 delete mode 100644 causallearn/search/HiddenCausal/GIN/mutual.py

diff --git a/causallearn/search/HiddenCausal/GIN/mutual.py b/causallearn/search/HiddenCausal/GIN/mutual.py
deleted file mode 100644
index 35c4cb8b..00000000
--- a/causallearn/search/HiddenCausal/GIN/mutual.py
+++ /dev/null
@@ -1,276 +0,0 @@
-
-
-'''
-Non-parametric computation of entropy and mutual-information
-Adapted by G Varoquaux for code created by R Brette, itself
-from several papers (see in the code).
-These computations rely on nearest-neighbor statistics
-'''
-import numpy as np
-
-from scipy.special import gamma,psi
-from scipy import ndimage
-from scipy.linalg import det
-from numpy import pi
-
-from sklearn.neighbors import NearestNeighbors
-
-__all__=['entropy', 'mutual_information', 'entropy_gaussian']
-
-EPS = np.finfo(float).eps
-
-
-def nearest_distances(X, k=1):
-    '''
-    X = array(N,M)
-    N = number of points
-    M = number of dimensions
-    returns the distance to the kth nearest neighbor for every point in X
-    '''
-    knn = NearestNeighbors(n_neighbors=k + 1)
-    knn.fit(X)
-    d, _ = knn.kneighbors(X) # the first nearest neighbor is itself
-    return d[:, -1] # returns the distance to the kth nearest neighbor
-
-
-def entropy_gaussian(C):
-    '''
-    Entropy of a gaussian variable with covariance matrix C
-    '''
-    if np.isscalar(C): # C is the variance
-        return .5*(1 + np.log(2*pi)) + .5*np.log(C)
-    else:
-        n = C.shape[0] # dimension
-        return .5*n*(1 + np.log(2*pi)) + .5*np.log(abs(det(C)))
-
-
-def entropy2(u,k=1):
-    k1 = 79.047
-    k2 = 7.4129
-    gamma = 0.37457
-    return (1 + np.log(2 * np.pi)) / 2 - k1 * (np.mean(np.log(np.cosh(u))) - gamma)**2 - k2 * (np.mean(u * np.exp((-u**2) / 2)))**2
-
-
-def entropy(X, k=1):
-    ''' Returns the entropy of the X.
-    Parameters
-    ===========
-    X : array-like, shape (n_samples, n_features)
-        The data the entropy of which is computed
-    k : int, optional
-        number of nearest neighbors for density estimation
-    Notes
-    ======
-    Kozachenko, L. F. & Leonenko, N. N. 1987 Sample estimate of entropy
-    of a random vector. Probl. Inf. Transm. 23, 95-101.
-    See also: Evans, D. 2008 A computationally efficient estimator for
-    mutual information, Proc. R. Soc. A 464 (2093), 1203-1215.
-    and:
-    Kraskov A, Stogbauer H, Grassberger P. (2004). Estimating mutual
-    information. Phys Rev E 69(6 Pt 2):066138.
-    '''
-
-    # Distance to kth nearest neighbor
-    r = nearest_distances(X, k) # squared distances
-    n, d = X.shape
-    volume_unit_ball = (pi**(.5*d)) / gamma(.5*d + 1)
-    '''
-    F. Perez-Cruz, (2008). Estimation of Information Theoretic Measures
-    for Continuous Random Variables. Advances in Neural Information
-    Processing Systems 21 (NIPS). Vancouver (Canada), December.
-    return d*mean(log(r))+log(volume_unit_ball)+log(n-1)-log(k)
-    '''
-    return (d*np.mean(np.log(r + np.finfo(X.dtype).eps))
-            + np.log(volume_unit_ball) + psi(n) - psi(k))
-
-
-def mutual_information(variables, k=1):
-    '''
-    Returns the mutual information between any number of variables.
-    Each variable is a matrix X = array(n_samples, n_features)
-    where
-      n = number of samples
-      dx,dy = number of dimensions
-    Optionally, the following keyword argument can be specified:
-      k = number of nearest neighbors for density estimation
-    Example: mutual_information((X, Y)), mutual_information((X, Y, Z), k=5)
-    '''
-    if len(variables) < 2:
-        raise AttributeError(
-                "Mutual information must involve at least 2 variables")
-    all_vars = np.hstack(variables)
-    return (sum([entropy(X, k=k) for X in variables])
-            - entropy(all_vars, k=k))
-
-
-def mutual_information_2d(x, y, sigma=1, normalized=False):
-    """
-    Computes (normalized) mutual information between two 1D variate from a
-    joint histogram.
-    Parameters
-    ----------
-    x : 1D array
-        first variable
-    y : 1D array
-        second variable
-    sigma: float
-        sigma for Gaussian smoothing of the joint histogram
-    Returns
-    -------
-    nmi: float
-        the computed similariy measure
-    """
-    bins = (256, 256)
-
-    jh = np.histogram2d(x, y, bins=bins)[0]
-
-    # smooth the jh with a gaussian filter of given sigma
-    ndimage.gaussian_filter(jh, sigma=sigma, mode='constant',
-                                 output=jh)
-
-    # compute marginal histograms
-    jh = jh + EPS
-    sh = np.sum(jh)
-    jh = jh / sh
-    s1 = np.sum(jh, axis=0).reshape((-1, jh.shape[0]))
-    s2 = np.sum(jh, axis=1).reshape((jh.shape[1], -1))
-
-    # Normalised Mutual Information of:
-    # Studholme,  jhill & jhawkes (1998).
-    # "A normalized entropy measure of 3-D medical image alignment".
-    # in Proc. Medical Imaging 1998, vol. 3338, San Diego, CA, pp. 132-143.
-    if normalized:
-        mi = ((np.sum(s1 * np.log(s1)) + np.sum(s2 * np.log(s2)))
-                / np.sum(jh * np.log(jh))) - 1
-    else:
-        mi = ( np.sum(jh * np.log(jh)) - np.sum(s1 * np.log(s1))
-               - np.sum(s2 * np.log(s2)))
-
-    return mi
-
-
-
-###############################################################################
-# Tests
-
-def test_entropy():
-    # Testing against correlated Gaussian variables
-    # (analytical results are known)
-    # Entropy of a 3-dimensional gaussian variable
-    rng = np.random.RandomState(0)
-    n = 50000
-    d = 3
-    P = np.array([[1, 0, 0], [0, 1, .5], [0, 0, 1]])
-    C = np.dot(P, P.T)
-    Y = rng.randn(d, n)
-    X = np.dot(P, Y)
-    H_th = entropy_gaussian(C)
-    H_est = entropy(X.T, k=5)
-    # Our estimated entropy should always be less that the actual one
-    # (entropy estimation undershoots) but not too much
-    np.testing.assert_array_less(H_est, H_th)
-    np.testing.assert_array_less(.9*H_th, H_est)
-
-
-def test_mutual_information():
-    # Mutual information between two correlated gaussian variables
-    # Entropy of a 2-dimensional gaussian variable
-    n = 50000
-    rng = np.random.RandomState(0)
-    #P = np.random.randn(2, 2)
-    P = np.array([[1, 0], [0.5, 1]])
-    C = np.dot(P, P.T)
-    U = rng.randn(2, n)
-    Z = np.dot(P, U).T
-    X = Z[:, 0]
-    X = X.reshape(len(X), 1)
-    Y = Z[:, 1]
-    Y = Y.reshape(len(Y), 1)
-    # in bits
-    MI_est = mutual_information((X, Y), k=5)
-    MI_th = (entropy_gaussian(C[0, 0])
-             + entropy_gaussian(C[1, 1])
-             - entropy_gaussian(C)
-            )
-    # Our estimator should undershoot once again: it will undershoot more
-    # for the 2D estimation that for the 1D estimation
-    print((MI_est, MI_th))
-    np.testing.assert_array_less(MI_est, MI_th)
-    np.testing.assert_array_less(MI_th, MI_est  + .3)
-
-
-def test_degenerate():
-    # Test that our estimators are well-behaved with regards to
-    # degenerate solutions
-    rng = np.random.RandomState(0)
-    x = rng.randn(50000)
-    X = np.c_[x, x]
-    assert np.isfinite(entropy(X))
-    assert np.isfinite(mutual_information((x[:, np.newaxis],
-                                           x[:,  np.newaxis])))
-    assert 2.9 < mutual_information_2d(x, x) < 3.1
-
-
-def test_mutual_information_2d():
-    # Mutual information between two correlated gaussian variables
-    # Entropy of a 2-dimensional gaussian variable
-    n = 50000
-    rng = np.random.RandomState(0)
-    #P = np.random.randn(2, 2)
-    P = np.array([[1, 0], [.9, .1]])
-    C = np.dot(P, P.T)
-    U = rng.randn(2, n)
-    Z = np.dot(P, U).T
-    X = Z[:, 0]
-    X = X.reshape(len(X), 1)
-    Y = Z[:, 1]
-    Y = Y.reshape(len(Y), 1)
-    # in bits
-    MI_est = mutual_information_2d(X.ravel(), Y.ravel())
-    MI_th = (entropy_gaussian(C[0, 0])
-             + entropy_gaussian(C[1, 1])
-             - entropy_gaussian(C)
-            )
-    print((MI_est, MI_th))
-    # Our estimator should undershoot once again: it will undershoot more
-    # for the 2D estimation that for the 1D estimation
-    np.testing.assert_array_less(MI_est, MI_th)
-    np.testing.assert_array_less(MI_th, MI_est  + .2)
-
-def _entropy(u):
-    """Calculate entropy using the maximum entropy approximations."""
-    k1 = 79.047
-    k2 = 7.4129
-    gamma = 0.37457
-    return (1 + np.log(2 * np.pi)) / 2 - k1 * (np.mean(np.log(np.cosh(u))) - gamma)**2 - k2 * (np.mean(u * np.exp((-u**2) / 2)))**2
-
-def main():
-    x=np.random.uniform(size=1000)
-    y=np.random.uniform(size=1000)*0.2+0.5*x
-
-    mi=mutual_information_2d(x,y,normalized=True)
-
-    x=x.reshape(1000,1)
-    y=y.reshape(1000,1)
-
-    mi2=mutual_information((x,y),k=10)
-    print(mi)
-    print(mi2)
-##
-##    x=x.reshape(10000,1)
-##    y=y.reshape(10000,1)
-##    a =entropy(x)
-##    b=_entropy(x)
-##
-##    print(a)
-##    print(b)
-
-
-
-if __name__ == '__main__':
-    # Run our tests
-##    test_entropy()
-##    test_mutual_information()
-##    test_degenerate()
-##    test_mutual_information_2d()
-    main()
\ No newline at end of file

From e30742605f70fc0b70964f5d408f2d460d7d6f65 Mon Sep 17 00:00:00 2001
From: Feng Xie <33390790+fengxie009@users.noreply.github.com>
Date: Sat, 17 Dec 2022 09:16:19 +0800
Subject: [PATCH 14/14] Add files via upload

---
 causallearn/search/HiddenCausal/GIN/GIN.py    |  75 +++++++
 .../GIN/Identifying_Causal_Clusters.py        |  72 +++++++
 .../HiddenCausal/GIN/Infer_Causal_Order.py    | 101 ++++++++++
 .../search/HiddenCausal/GIN/Merge_Cluster.py  |  62 ++++++
 .../HiddenCausal/GIN/Mutual_Information.py    | 185 ++++++++++++++++++
 .../HiddenCausal/GIN/Test_GIN_Condition.py    |  81 ++++++++
 6 files changed, 576 insertions(+)
 create mode 100644 causallearn/search/HiddenCausal/GIN/GIN.py
 create mode 100644 causallearn/search/HiddenCausal/GIN/Identifying_Causal_Clusters.py
 create mode 100644 causallearn/search/HiddenCausal/GIN/Infer_Causal_Order.py
 create mode 100644 causallearn/search/HiddenCausal/GIN/Merge_Cluster.py
 create mode 100644 causallearn/search/HiddenCausal/GIN/Mutual_Information.py
 create mode 100644 causallearn/search/HiddenCausal/GIN/Test_GIN_Condition.py

diff --git a/causallearn/search/HiddenCausal/GIN/GIN.py b/causallearn/search/HiddenCausal/GIN/GIN.py
new file mode 100644
index 00000000..f0a762a3
--- /dev/null
+++ b/causallearn/search/HiddenCausal/GIN/GIN.py
@@ -0,0 +1,75 @@
+from causallearn.graph.GeneralGraph import GeneralGraph
+from causallearn.graph.GraphNode import GraphNode
+from causallearn.graph.NodeType import NodeType
+from causallearn.search.FCMBased.lingam.hsic import hsic_test_gamma
+from causallearn.utils.KCI.KCI import KCI_UInd
+
+import causallearn.search.HiddenCausal.GIN.Infer_Causal_Order as ICO
+import causallearn.search.HiddenCausal.GIN.Identifying_Causal_Clusters as ICC
+
+
+
+def GIN(data, indep_test_method='kci', alpha=0.05, MAX_Factor = 2):
+    '''
+    Learning causal structure of Latent Variables for Linear Non-Gaussian Latent Variable Model
+    with Generalized Independent Noise Condition
+    Parameters
+    ----------
+    data : numpy ndarray
+           data set
+    indep_test_method : str, default='kci'
+        the name of the independence test being used
+    alpha : float, default=0.05
+        desired significance level of independence tests (p_value) in (0,1)
+    MAX_Factor : int
+        Prior maximum factor number
+    Returns
+    -------
+    G : general graph
+        causal graph
+    causal_order : list
+        causal order
+    '''
+    if indep_test_method not in ['kci', 'hsic']:
+        raise NotImplementedError((f"Independent test method {indep_test_method} is not implemented."))
+
+    if indep_test_method == 'kci':
+        kci = KCI_UInd()
+
+    if indep_test_method == 'kci':
+        def indep_test(x, y):
+            return kci.compute_pvalue(x, y)[0]
+    elif indep_test_method == 'hsic':
+        def indep_test(x, y):
+            return hsic_test_gamma(x, y)[1]
+    else:
+        raise NotImplementedError((f"Independent test method {indep_test_method} is not implemented."))
+
+
+    '''identifying causal cluster by using HSIC (KCI)'''
+    Cluster=ICC.FindCluser(data, indep_test, alpha, MAX_Factor)
+    '''identifying causal order by using mutual information, (k nearest neighbors )'''
+    CausalOrder=ICO.LearnCausalOrder(Cluster, data)
+
+
+    latent_id = 1
+    l_nodes = []
+    G = GeneralGraph([])
+    for cluster in CausalOrder:
+        l_node = GraphNode(f"L{latent_id}")
+        l_node.set_node_type(NodeType.LATENT)
+        l_nodes.append(l_node)
+        G.add_node(l_node)
+        for l in l_nodes:
+            if l != l_node:
+                G.add_directed_edge(l, l_node)
+        for o in cluster:
+            o_node = GraphNode(f"X{o + 1}")
+            G.add_node(o_node)
+            G.add_directed_edge(l_node, o_node)
+        latent_id += 1
+
+    return G, CausalOrder
+
+
+
diff --git a/causallearn/search/HiddenCausal/GIN/Identifying_Causal_Clusters.py b/causallearn/search/HiddenCausal/GIN/Identifying_Causal_Clusters.py
new file mode 100644
index 00000000..d7f8170f
--- /dev/null
+++ b/causallearn/search/HiddenCausal/GIN/Identifying_Causal_Clusters.py
@@ -0,0 +1,72 @@
+import causallearn.search.HiddenCausal.GIN.Test_GIN_Condition as Test_GIN_Condition #based on hsic to find cluster
+import causallearn.search.HiddenCausal.GIN.Merge_Cluster as Merge_Cluster #overlap merge utils
+import itertools
+
+
+def FindCluser(data, test_function, alpha=0.05, Max_Factor = 2):
+    '''
+    Learning causal Cluster for Linear Non-Gaussian Latent Variable Model with Generalized Independent Noise Condition
+    Parameters
+    ----------
+    data : numpy ndarray
+           data set
+    indep_test_method : str, default='kci'
+        the name of the independence test being used
+    alpha : float, default=0.05
+        desired significance level of independence tests (p_value) in (0,1)
+    Max_Factor: int
+        Maximum number of factors
+    Returns
+    -------
+    Cluster: dic
+        Causal Cluster, e.g., {'1':[['x1','x2']],'2':[['x4','x5','x6']}
+    '''
+    #Initialize variable set
+    indexs = list(range(data.shape[1]))
+    B = indexs.copy()
+    Cluster = {}
+    Grlen = 2
+
+    while len(B) >= Grlen and len(indexs) >=2*Grlen-1:
+        LatentNum = Grlen-1
+        Set_P = itertools.combinations(B, Grlen)
+        print('Identifying causal cluster with '+str(LatentNum)+'-factor model:')
+        for P in Set_P:
+            tind = indexs.copy()
+            for t in P:
+                tind.remove(t)  #   tind= ALLdata\P
+            if GIN(list(P), tind, data, test_function, alpha):
+                key = list(Cluster.keys())
+                if (LatentNum) in key:
+                    temp = Cluster[LatentNum]
+                    temp.append(list(P))
+                    Cluster[LatentNum] = temp
+                else:
+                    Cluster[LatentNum] = [list(P)]
+
+        print('Debug------------',Cluster)
+        # Merge overlap cluster and update dataset
+        key = Cluster.keys()
+        if LatentNum in key:
+            Tclu = Merge_Cluster.merge_list(Cluster[LatentNum])
+            Cluster[LatentNum] = Tclu
+            for i in Tclu:
+                for j in i:
+                    if j in B:
+                        B.remove(j)
+
+        Grlen += 1
+        print('The identified cluster for '+ str(LatentNum)+ '-factor model:', Cluster)
+
+        if Max_Factor !=0 and (Grlen-1)>Max_Factor:
+            break
+
+    return Cluster
+
+
+
+def GIN(X, Z, data, test_function, alpha=0.05):
+    return Test_GIN_Condition.GIN(X, Z, data, test_function, alpha)
+    #Fisher method
+    #return Test_GIN_Condition.FisherGIN(X, Z, data, test_function, alpha)
+
diff --git a/causallearn/search/HiddenCausal/GIN/Infer_Causal_Order.py b/causallearn/search/HiddenCausal/GIN/Infer_Causal_Order.py
new file mode 100644
index 00000000..cc3813d8
--- /dev/null
+++ b/causallearn/search/HiddenCausal/GIN/Infer_Causal_Order.py
@@ -0,0 +1,101 @@
+import causallearn.search.HiddenCausal.GIN.Test_GIN_Condition as GIN #GIN based on mutual information
+
+
+
+def LearnCausalOrder(cluster, data):
+    '''
+    Learning causal order for Linear Non-Gaussian Latent Variable Model with Generalized Independent Noise Condition
+    Parameters
+    ----------
+    cluster: dic
+        causal cluster learned by phase I
+    data : numpy ndarray
+           data set
+    Returns
+    -------
+    causal_order : list
+        causal order
+    '''
+    Cluster = GetCluster(cluster)
+    K = []  # Initize causal order
+    while(len(Cluster) >0):
+        root = FindRoot(Cluster, data, K)
+        K.append(root)
+        Cluster.remove(root)
+    return K
+
+
+
+
+def FindRoot(clusters, data, causal_order):
+    '''
+    Find the causal order by statistics of dependence
+    Parameters
+    ----------
+    data : data set (numpy ndarray)
+    cov : covariance matrix
+    clusters : clusters of observed variables
+    causal_order : causal order
+    Returns
+    -------
+    root : latent root cause
+    '''
+    if len(clusters) == 1:
+        return clusters[0]
+    root = clusters[0]
+    MI_lists=[]
+    for i in clusters:
+        MI=0
+        for j in clusters:
+            if i == j:
+                continue
+
+            X = [j[0]]
+            Z = []
+            r_1, r_2 = array_split(i, 2)
+            X = X + r_1
+            Z = Z + r_2
+
+            if causal_order:
+                for k in causal_order:
+                    k_1, k_2 = array_split(k, 2)
+                    X = X + k_1
+                    Z = Z + k_2
+
+            print('Debug as Mutual Information: ', X , Z)
+            tmi = GIN.GIN_MI(X, Z, data)
+            MI+=tmi
+
+        MI_lists.append(MI)
+        print('Debug as Mutual Information: (All results of MI)', MI_lists)
+
+    mins=MI_lists.index(min(MI_lists))
+    root = clusters[mins]
+    return root
+
+
+
+def GetCluster(cluster):
+    Clu = []
+    key = cluster.keys()
+    for i in key:
+        C = cluster[i]
+        for j in C:
+            Clu.append(j)
+    return Clu
+
+def array_split(x, k):
+    x_len = len(x)
+    # div_points = []
+    sub_arys = []
+    start = 0
+    section_len = x_len // k
+    extra = x_len % k
+    for i in range(extra):
+        sub_arys.append(x[start:start + section_len + 1])
+        start = start + section_len + 1
+
+    for i in range(k - extra):
+        sub_arys.append(x[start:start + section_len])
+        start = start + section_len
+    return sub_arys
\ No newline at end of file
diff --git a/causallearn/search/HiddenCausal/GIN/Merge_Cluster.py b/causallearn/search/HiddenCausal/GIN/Merge_Cluster.py
new file mode 100644
index 00000000..37242783
--- /dev/null
+++ b/causallearn/search/HiddenCausal/GIN/Merge_Cluster.py
@@ -0,0 +1,62 @@
+import numpy as np
+import pandas as pd
+from itertools import combinations
+import itertools
+import networkx as nx
+
+'''Merge overlap causal cluster'''
+def merge_list(L2):
+    l = L2.copy()
+
+    edges = []
+    s = list(map(set, l))
+    for i, j in combinations(range(len(s)), r=2):
+
+        if s[i].intersection(s[j]):
+            edges.append((i, j))
+
+    G = graph(edges)
+
+    result = []
+    unassigned = list(range(len(s)))
+
+    for component in connected_components(G):
+        union = set().union(*(s[i] for i in component))
+        result.append(sorted(union))
+        unassigned = [i for i in unassigned if i not in component]
+
+    result.extend(map(sorted, (s[i] for i in unassigned)))
+
+    return result
+
+
+def bfs(graph, start):
+    visited, queue = set(), [start]
+    while queue:
+        vertex = queue.pop(0)
+        if vertex not in visited:
+            visited.add(vertex)
+            queue.extend(graph[vertex] - visited)
+    return visited
+
+def connected_components(G):
+    seen = set()
+    for v in G:
+        if v not in seen:
+            c = set(bfs(G, v))
+            yield c
+            seen.update(c)
+
+def graph(edge_list):
+    result = {}
+    for source, target in edge_list:
+        result.setdefault(source, set()).add(target)
+        result.setdefault(target, set()).add(source)
+    return result
+
+
+
+
+
+
+
diff --git a/causallearn/search/HiddenCausal/GIN/Mutual_Information.py b/causallearn/search/HiddenCausal/GIN/Mutual_Information.py
new file mode 100644
index 00000000..037aa0e8
--- /dev/null
+++ b/causallearn/search/HiddenCausal/GIN/Mutual_Information.py
@@ -0,0 +1,185 @@
+'''
+Non-parametric computation of entropy and mutual-information
+Adapted by G Varoquaux for code created by R Brette, itself
+from several papers (see in the code).
+These computations rely on nearest-neighbor statistics
+'''
+import numpy as np
+import pandas as pd
+from sklearn import metrics
+
+from scipy.special import gamma,psi
+from scipy import ndimage
+from scipy.linalg import det
+from numpy import pi
+
+from sklearn.neighbors import NearestNeighbors
+
+__all__=['entropy', 'mutual_information', 'entropy_gaussian']
+
+EPS = np.finfo(float).eps
+
+
+def nearest_distances(X, k=1):
+    '''
+    X = array(N,M)
+    N = number of points
+    M = number of dimensions
+    returns the distance to the kth nearest neighbor for every point in X
+    '''
+    knn = NearestNeighbors(n_neighbors=k + 1)
+    knn.fit(X)
+    d, _ = knn.kneighbors(X) # the first nearest neighbor is itself
+    return d[:, -1] # returns the distance to the kth nearest neighbor
+
+
+def entropy_gaussian(C):
+    '''
+    Entropy of a gaussian variable with covariance matrix C
+    '''
+    if np.isscalar(C): # C is the variance
+        return .5*(1 + np.log(2*pi)) + .5*np.log(C)
+    else:
+        n = C.shape[0] # dimension
+        return .5*n*(1 + np.log(2*pi)) + .5*np.log(abs(det(C)))
+
+
+def entropy2(u,k=1):
+    k1 = 79.047
+    k2 = 7.4129
+    gamma = 0.37457
+    return (1 + np.log(2 * np.pi)) / 2 - k1 * (np.mean(np.log(np.cosh(u))) - gamma)**2 - k2 * (np.mean(u * np.exp((-u**2) / 2)))**2
+
+
+def entropy(X, k=1):
+    ''' Returns the entropy of the X.
+    Parameters
+    ===========
+    X : array-like, shape (n_samples, n_features)
+        The data the entropy of which is computed
+    k : int, optional
+        number of nearest neighbors for density estimation
+    Notes
+    ======
+    Kozachenko, L. F. & Leonenko, N. N. 1987 Sample estimate of entropy
+    of a random vector. Probl. Inf. Transm. 23, 95-101.
+    See also: Evans, D. 2008 A computationally efficient estimator for
+    mutual information, Proc. R. Soc. A 464 (2093), 1203-1215.
+    and:
+    Kraskov A, Stogbauer H, Grassberger P. (2004). Estimating mutual
+    information. Phys Rev E 69(6 Pt 2):066138.
+    '''
+
+    # Distance to kth nearest neighbor
+    r = nearest_distances(X, k) # squared distances
+    n, d = X.shape
+    volume_unit_ball = (pi**(.5*d)) / gamma(.5*d + 1)
+    '''
+    F. Perez-Cruz, (2008). Estimation of Information Theoretic Measures
+    for Continuous Random Variables. Advances in Neural Information
+    Processing Systems 21 (NIPS). Vancouver (Canada), December.
+    return d*mean(log(r))+log(volume_unit_ball)+log(n-1)-log(k)
+    '''
+    return (d*np.mean(np.log(r + np.finfo(X.dtype).eps))
+            + np.log(volume_unit_ball) + psi(n) - psi(k))
+
+
+def mutual_information(variables, k=1):
+    '''
+    Returns the mutual information between any number of variables.
+    Each variable is a matrix X = array(n_samples, n_features)
+    where
+      n = number of samples
+      dx,dy = number of dimensions
+    Optionally, the following keyword argument can be specified:
+      k = number of nearest neighbors for density estimation
+    Example: mutual_information((X, Y)), mutual_information((X, Y, Z), k=5)
+    '''
+    if len(variables) < 2:
+        raise AttributeError(
+                "Mutual information must involve at least 2 variables")
+    all_vars = np.hstack(variables)
+    return (sum([entropy(X, k=k) for X in variables])
+            - entropy(all_vars, k=k))
+
+
+def mutual_information_2d(x, y, sigma=1, normalized=False):
+    """
+    Computes (normalized) mutual information between two 1D variate from a
+    joint histogram.
+    Parameters
+    ----------
+    x : 1D array
+        first variable
+    y : 1D array
+        second variable
+    sigma: float
+        sigma for Gaussian smoothing of the joint histogram
+    Returns
+    -------
+    nmi: float
+        the computed similariy measure
+    """
+    bins = (256, 256)
+
+    jh = np.histogram2d(x, y, bins=bins)[0]
+
+    # smooth the jh with a gaussian filter of given sigma
+    ndimage.gaussian_filter(jh, sigma=sigma, mode='constant',
+                                 output=jh)
+
+    # compute marginal histograms
+    jh = jh + EPS
+    sh = np.sum(jh)
+    jh = jh / sh
+    s1 = np.sum(jh, axis=0).reshape((-1, jh.shape[0]))
+    s2 = np.sum(jh, axis=1).reshape((jh.shape[1], -1))
+
+    # Normalised Mutual Information of:
+    # Studholme,  jhill & jhawkes (1998).
+    # "A normalized entropy measure of 3-D medical image alignment".
+    # in Proc. Medical Imaging 1998, vol. 3338, San Diego, CA, pp. 132-143.
+    if normalized:
+        mi = ((np.sum(s1 * np.log(s1)) + np.sum(s2 * np.log(s2)))
+                / np.sum(jh * np.log(jh))) - 1
+    else:
+        mi = ( np.sum(jh * np.log(jh)) - np.sum(s1 * np.log(s1))
+               - np.sum(s2 * np.log(s2)))
+
+    return mi
+
+
+def _entropy(u):
+    """Calculate entropy using the maximum entropy approximations."""
+    k1 = 79.047
+    k2 = 7.4129
+    gamma = 0.37457
+    return (1 + np.log(2 * np.pi)) / 2 - k1 * (np.mean(np.log(np.cosh(u))) - gamma)**2 - k2 * (np.mean(u * np.exp((-u**2) / 2)))**2
+
+
+
+
+
+def MI_2(x1, y1):
+    """estimating mutual information by sklearn"""
+    x = list(x1.copy())
+    y = list(y1.copy())
+
+    result_NMI = metrics.normalized_mutual_info_score(x, y)
+
+    return result_NMI
+
+
+def MI_1(x1, y1):
+    """estimating mutual information by Non-parametric computation of entropy and mutual-information"""
+    x=x1.reshape(len(x1),1)
+    y=y1.reshape(len(y1),1)
+
+    # set the number of neighborhood
+    if len(x) > 3000:
+        k = 15
+    else:
+        k = 10
+
+    mi = mutual_information((x, y), k)
+    return abs(mi)
\ No newline at end of file
diff --git a/causallearn/search/HiddenCausal/GIN/Test_GIN_Condition.py b/causallearn/search/HiddenCausal/GIN/Test_GIN_Condition.py
new file mode 100644
index 00000000..5625257b
--- /dev/null
+++ b/causallearn/search/HiddenCausal/GIN/Test_GIN_Condition.py
@@ -0,0 +1,81 @@
+import numpy as np
+import pandas as pd
+import causallearn.search.HiddenCausal.GIN.FisherTest as FisherTest
+import causallearn.search.HiddenCausal.GIN.Mutual_Information as ID
+
+
+
+def GIN(X, Z, data, test_function, alpha = 0.05):
+    '''
+    Generalized Independent Noise Condition Test
+    Parameters
+    ----------
+    data : numpy ndarray
+           data set
+    X, Z : list
+
+    test_function : str, default='kci'
+        the name of the independence test being used
+    alpha : float, default=0.05
+        desired significance level of independence tests (p_value) in (0,1)
+    Returns
+    -------
+    Boolean : True or False
+    '''
+    omega = getomega(data, X, Z)
+    tdata = data[:,X]
+    result = np.dot(tdata, omega)[:,None]
+    for i in Z:
+        temp = np.array(data[:, [i]])
+        pval = test_function(result, temp)
+        if pval > alpha:
+            flag = True
+        else:
+            flag = False
+
+        if not flag:
+            return False
+
+    return True
+
+
+def GIN_MI(X, Z, data):
+    """Method : Calculate mutual information by k nearest neighbors (density estimation) """
+    omega = getomega(data, X, Z)
+    tdata = data[:, X]
+    result = np.dot(tdata, omega)
+    MIS = 0
+    for i in Z:
+        temp = np.array(data[:, i])
+        mi = ID.MI_1(result, temp)
+        MIS += mi
+    MIS = MIS/len(Z)
+
+    return MIS
+
+
+def FisherGIN(X, Z, data, test_function, alpha = 0.01):
+    """Test GIN Condition by fisher method """
+    omega = getomega(data, X, Z)
+    tdata = data[:, X]
+    result = np.dot(tdata, omega)[:,None]
+    pvals = []
+
+    for i in Z:
+        temp = np.array(data[:, [i]])
+        pval = test_function(result, temp)
+        pvals.append(pval)
+
+    flag, fisher_pval = FisherTest.FisherTest(pvals, alpha)
+
+    return flag
+
+def getomega(data, X, Z):
+    cov = np.cov(data, rowvar=False)
+    cov_m = cov[np.ix_(Z, X)]
+    _, _, v = np.linalg.svd(cov_m)
+    omega = v.T[:, -1]
+    return omega
+
+
+