Click here to hide/show the list of notebooks.
  pyAgrum on notebooks   pyAgrum jupyter
☰  ParametricEM 
pyAgrum 0.16.2   
Zipped notebooks   
generation: 2019-10-02 10:58  

Creative Commons License
This pyAgrum's notebook is licensed under a Creative Commons Attribution-NonCommercial 4.0 International License.

In [1]:
import pyAgrum as gum
import pyAgrum.lib.notebook as gnb

from pyAgrum.lib._utils.oslike import head

import os
#the bases will be saved in "out/*.csv"
EMnomissing=os.path.join("out","EM_nomissing.csv")
EMmissing=os.path.join("out","EM_missing.csv")

generating data with missing values (at random)

In [2]:
src=gum.fastBN("A->B<-C->D->E<-B;D->F")
gum.generateCSV(src,EMnomissing,5000,random_order=False)
src
Out[2]:
G A A B B A->B E E B->E C C C->B D D C->D D->E F F D->F
In [3]:
import pandas as pd
import numpy as np

def add_missing(src,dst,proba):
  df=pd.read_csv(src)
  mask=np.random.choice([True, False], size=df.shape,p=[proba,1-proba])
  df.mask(mask).to_csv(dst,na_rep='?',index=False,float_format='%.0f')

gum.generateCSV(src,EMnomissing,5000,random_order=False)
add_missing(EMnomissing,EMmissing,proba=0.1)
In [4]:
print("No missing")
head(EMnomissing)
print("Missing")
head(EMmissing)
No missing
A,B,C,D,E,F
0,1,1,0,0,0
0,0,0,0,1,0
1,1,1,1,1,0
0,0,1,1,0,0
0,0,1,0,1,0
0,0,1,1,1,0
0,1,1,0,0,0
1,0,1,0,1,0
1,0,1,1,1,1

Missing
A,B,C,D,E,F
0,1,1,?,?,0
0,0,0,?,?,0
1,1,1,1,1,?
0,0,1,1,0,0
0,0,1,0,1,0
0,0,1,1,1,0
0,1,1,0,0,0
1,0,?,0,1,0
1,0,?,1,1,1

learning with missing data

In [5]:
learner = gum.BNLearner(EMmissing, ["?"])
print(f"Missing values in {EMmissing} : {learner.hasMissingValues()}")
Missing values in out/EM_missing.csv : True
In [6]:
# this will fail : missing data !
# learner.learnParameters(src.dag())
In [7]:
learner.useEM(1e-3)
learner.useAprioriSmoothing()
bn=learner.learnParameters(src.dag())
print(f"# iterations : {learner.nbrIterations()}")
gnb.sideBySide(gnb.getInference(src),gnb.getInference(bn))
# iterations : 7
structs Inference in   1.15ms A B A->B E B->E C C->B D C->D D->E F D->F
structs Inference in   0.76ms A B A->B E B->E C C->B D C->D D->E F D->F

learning with smaller error (and no smoothing)

In [8]:
learner = gum.BNLearner(EMmissing, ["?"])
learner.setVerbosity(True)
learner.useEM(1e-8)
bn2=learner.learnParameters(src.dag())
print(f"# iterations : {learner.nbrIterations()}")
gnb.sideBySide(gnb.getInference(src),gnb.getInference(bn2))
# iterations : 15
structs Inference in   0.69ms A B A->B E B->E C C->B D C->D D->E F D->F
structs Inference in   0.75ms A B A->B E B->E C C->B D C->D D->E F D->F
In [9]:
print(learner.history())
(0.4822113567457864, 0.19167208534569385, 0.0570599311574769, 0.015319003698147725, 0.0040198457557376, 0.00105433890106329, 0.00027768138315214516, 7.345289203230893e-05, 1.9502375337583795e-05, 5.193776337099988e-06, 1.3865808773904608e-06, 3.709140893017355e-07, 9.938224682675875e-08, 2.6663963178139166e-08, 7.161741559269573e-09)
In [ ]: