Use Case: Logic Test

Set by a senior data scientist and researcher to test the logic of correlation observed across a distributions. This applies to both the preparation of features for machine learning and distribution techniques for synthetic data. These logic tests were considered challenges to the data science team in the preparation of their data for consumption into their models.

import pandas as pd
from ds_discovery import Wrangle
wr = Wrangle.from_memory()
tools = wr.tools

Logic Tests

  1. (A AND B) OR C

  2. !A AND B

  3. !(A AND B)

  4. A AND !B

  5. (A OR B) AND (C OR D)

df = pd.DataFrame()
df['s1'] = pd.Series(list('AAAABBBBCCCCDDDD'))
df['s2'] = pd.Series(list('ABCDABCDABCDABCD'))
df['s3'] = pd.Series([1,2,3,4,5,6,7,8,1,2,3,4,5,6,7,8])

(A AND B) OR C

Single column

A = tools.select2dict(column='s3', condition="(@ > 2)", logic='AND')
B = tools.select2dict(column='s3', condition="(@ < 5)", logic='AND')
C = tools.select2dict(column='s3', condition="@ == 8", logic='OR')

selection = [[A, B], C]

df['l1'] = tools.correlate_selection(df, selection=selection, action=1, default_action=0)
df[df['l1'] == 1].loc[:,['s3']]
../../_images/log_img01.png

Multi column

A = tools.select2dict(column='s1', condition="@ == 'A'")
B = tools.select2dict(column='s2', condition="@ == 'B'", logic='AND')
C = tools.select2dict(column='s1', condition="@ == 'C'", logic='OR')

selection = [[A, B], C]

df['l1'] = tools.correlate_selection(df, selection=selection, action=1, default_action=0)
df[df['l1'] == 1].loc[:,['s1','s2']]
../../_images/log_img02.png

!A AND B

Single column

A = tools.select2dict(column='s3', condition="@ == 7", logic='NOT')
B = tools.select2dict(column='s3', condition="@ > 4", logic='AND')

selection = [A, B]

df['l2'] = tools.correlate_selection(df, selection=selection, action=1, default_action=0)
df[df['l2'] == 1].loc[:,['s3']]
../../_images/log_img03.png

Multi column

A = tools.select2dict(column='s1', condition="@ == 'A'", logic='NOT')
B = tools.select2dict(column='s2', condition="@ == 'B'", logic='AND')

selection = [A, B]

df['l2'] = tools.correlate_selection(df, selection=selection, action=1, default_action=0)
df[df['l2'] == 1].loc[:,['s1', 's2']]
../../_images/log_img04.png

!(A AND B)

Single column

A = tools.select2dict(column='s3', condition="@ < 8")
B = tools.select2dict(column='s3', condition="@ > 3", logic='AND')

selection = [[A, B], 'NOT']

df['l1'] = tools.correlate_selection(df, selection=selection, action=1, default_action=0)
df[df['l1'] == 1].loc[:,['s3']]
../../_images/log_img05.png

Multi column

A = tools.select2dict(column='s1', condition="@ == 'A'")
B = tools.select2dict(column='s2', condition="@ == 'B'", logic='AND')

selection = selection = [[A, B], 'NOT']

df['l3'] = tools.correlate_selection(df, selection=selection, action=1, default_action=0)
df[df['l3'] == 1].loc[:,['s1','s2']]
../../_images/log_img06.png

A AND !B

Single column

A = tools.select2dict(column='s3', condition="@ > 5")
B = tools.select2dict(column='s3', condition="@ == 7", logic='NOT')

selection = [A, B]

df['l1'] = tools.correlate_selection(df, selection=selection, action=1, default_action=0)
df[df['l1'] == 1].loc[:,['s3']]
../../_images/log_img07.png

Multi column

A = tools.select2dict(column='s1', condition="@ == 'A'")
B = tools.select2dict(column='s2', condition="@ == 'B'", logic='NOT')

selection = [A, B]

df['l4'] = tools.correlate_selection(df, selection=selection, action=1, default_action=0)
df[df['l4'] == 1].loc[:,['s1','s2']]
../../_images/log_img08.png

(A OR B) AND (C OR D)

Single column

A = tools.select2dict(column='s3', condition="(@ < 3)")
B = tools.select2dict(column='s3', condition="(@ > 5)", logic='OR')
C = tools.select2dict(column='s3', condition="@ == 2")
D = tools.select2dict(column='s3', condition="@ > 7", logic='OR')

selection = [[A, B], 'AND', [C, D]]

df['l1'] = tools.correlate_selection(df, selection=selection, action=1, default_action=0)
df[df['l1'] == 1].loc[:,['s3']]
../../_images/log_img09.png

Multi column

A = tools.select2dict(column='s1', condition="@ == 'A'")
B = tools.select2dict(column='s2', condition="@ == 'B'", logic='OR')
C = tools.select2dict(column='s1', condition="@ == 'C'")
D = tools.select2dict(column='s2', condition="@ == 'D'", logic='OR')

selection = [[A, B], 'AND', [C, D]]

df['l4'] = tools.correlate_selection(df, selection=selection, action=1, default_action=0)
df[df['l4'] == 1].loc[:,['s1','s2']]
../../_images/log_img10.png