Introduction to dataSDA

Po-Wei Chen

2023-06-13

Example of Interval-Valued Data

Built-in Data

data(mushroom)
head(mushroom)
#>     Species Pileus.Cap.Width_min Pileus.Cap.Width_max Stipe.Length_min
#> 1    arorae                    3                    8                4
#> 2   arvenis                    6                   21                4
#> 3    benesi                    4                    8                5
#> 4 bernardii                    7                    6                4
#> 5  bisporus                    5                   12                2
#> 6 bitorquis                    5                   15                4
#>   Stipe.Length_max Stipe.Thickness_min Stipe.Thickness_max Edibility
#> 1                9                 0.5                 2.5         U
#> 2               14                 1.0                 3.5         Y
#> 3               11                 1.0                 2.0         Y
#> 4                7                 3.0                 4.5         Y
#> 5                5                 1.5                 2.5         Y
#> 6               10                 2.0                 4.0         Y

Set variable format

Changes the format of the set variables in the data to conform to the RSDA format.

mushroom.set <- set_variable_format(data = mushroom, location = 8, var = "Species")
head(mushroom.set)
#>   Species arorae arvenis benesi bernardii bisporus bitorquis califorinus
#> 1      23      1       0      0         0        0         0           0
#> 2      23      0       1      0         0        0         0           0
#> 3      23      0       0      1         0        0         0           0
#> 4      23      0       0      0         1        0         0           0
#> 5      23      0       0      0         0        1         0           0
#> 6      23      0       0      0         0        0         1           0
#>   campestris comtulus cupreo-brunneus diminutives fuseo-fibrillosus
#> 1          0        0               0           0                 0
#> 2          0        0               0           0                 0
#> 3          0        0               0           0                 0
#> 4          0        0               0           0                 0
#> 5          0        0               0           0                 0
#> 6          0        0               0           0                 0
#>   fuscovelatus hondensis lilaceps micromegathus praeclaresquamosus pattersonae
#> 1            0         0        0             0                  0           0
#> 2            0         0        0             0                  0           0
#> 3            0         0        0             0                  0           0
#> 4            0         0        0             0                  0           0
#> 5            0         0        0             0                  0           0
#> 6            0         0        0             0                  0           0
#>   perobscurus semotus silvicola subrutilescens xanthodermus
#> 1           0       0         0              0            0
#> 2           0       0         0              0            0
#> 3           0       0         0              0            0
#> 4           0       0         0              0            0
#> 5           0       0         0              0            0
#> 6           0       0         0              0            0
#>   Pileus.Cap.Width_min Pileus.Cap.Width_max Stipe.Length_min Stipe.Length_max
#> 1                    3                    8                4                9
#> 2                    6                   21                4               14
#> 3                    4                    8                5               11
#> 4                    7                    6                4                7
#> 5                    5                   12                2                5
#> 6                    5                   15                4               10
#>   Stipe.Thickness_min Stipe.Thickness_max Edibility U Y T
#> 1                 0.5                 2.5         3 1 0 0
#> 2                 1.0                 3.5         3 0 1 0
#> 3                 1.0                 2.0         3 0 1 0
#> 4                 3.0                 4.5         3 0 1 0
#> 5                 1.5                 2.5         3 0 1 0
#> 6                 2.0                 4.0         3 0 1 0

RSDA format

Changes the format of the data to conform to RSDA format.

mushroom.tmp <- RSDA_format(data = mushroom.set, sym_type1 = c("I", "S"),
                            location = c(25, 31), sym_type2 = c("S", "I", "I"),
                            var = c("Species", "Stipe.Length_min", "Stipe.Thickness_min"))
head(mushroom.tmp)
#>   $S Species arorae arvenis benesi bernardii bisporus bitorquis califorinus
#> 1 $S      23      1       0      0         0        0         0           0
#> 2 $S      23      0       1      0         0        0         0           0
#> 3 $S      23      0       0      1         0        0         0           0
#> 4 $S      23      0       0      0         1        0         0           0
#> 5 $S      23      0       0      0         0        1         0           0
#> 6 $S      23      0       0      0         0        0         1           0
#>   campestris comtulus cupreo-brunneus diminutives fuseo-fibrillosus
#> 1          0        0               0           0                 0
#> 2          0        0               0           0                 0
#> 3          0        0               0           0                 0
#> 4          0        0               0           0                 0
#> 5          0        0               0           0                 0
#> 6          0        0               0           0                 0
#>   fuscovelatus hondensis lilaceps micromegathus praeclaresquamosus pattersonae
#> 1            0         0        0             0                  0           0
#> 2            0         0        0             0                  0           0
#> 3            0         0        0             0                  0           0
#> 4            0         0        0             0                  0           0
#> 5            0         0        0             0                  0           0
#> 6            0         0        0             0                  0           0
#>   perobscurus semotus silvicola subrutilescens xanthodermus $I
#> 1           0       0         0              0            0 $I
#> 2           0       0         0              0            0 $I
#> 3           0       0         0              0            0 $I
#> 4           0       0         0              0            0 $I
#> 5           0       0         0              0            0 $I
#> 6           0       0         0              0            0 $I
#>   Pileus.Cap.Width_min Pileus.Cap.Width_max $I Stipe.Length_min
#> 1                    3                    8 $I                4
#> 2                    6                   21 $I                4
#> 3                    4                    8 $I                5
#> 4                    7                    6 $I                4
#> 5                    5                   12 $I                2
#> 6                    5                   15 $I                4
#>   Stipe.Length_max $I Stipe.Thickness_min Stipe.Thickness_max $S Edibility U Y
#> 1                9 $I                 0.5                 2.5 $S         3 1 0
#> 2               14 $I                 1.0                 3.5 $S         3 0 1
#> 3               11 $I                 1.0                 2.0 $S         3 0 1
#> 4                7 $I                 3.0                 4.5 $S         3 0 1
#> 5                5 $I                 1.5                 2.5 $S         3 0 1
#> 6               10 $I                 2.0                 4.0 $S         3 0 1
#>   T
#> 1 0
#> 2 0
#> 3 0
#> 4 0
#> 5 0
#> 6 0

Clean the column names

Clean up variable names to conform to the RSDA format.

mushroom.clean <- clean_colnames(data = mushroom.tmp)
head(mushroom.clean)
#>   $S Species arorae arvenis benesi bernardii bisporus bitorquis califorinus
#> 1 $S      23      1       0      0         0        0         0           0
#> 2 $S      23      0       1      0         0        0         0           0
#> 3 $S      23      0       0      1         0        0         0           0
#> 4 $S      23      0       0      0         1        0         0           0
#> 5 $S      23      0       0      0         0        1         0           0
#> 6 $S      23      0       0      0         0        0         1           0
#>   campestris comtulus cupreo-brunneus dutives fuseo-fibrillosus fuscovelatus
#> 1          0        0               0       0                 0            0
#> 2          0        0               0       0                 0            0
#> 3          0        0               0       0                 0            0
#> 4          0        0               0       0                 0            0
#> 5          0        0               0       0                 0            0
#> 6          0        0               0       0                 0            0
#>   hondensis lilaceps micromegathus praeclaresquamosus pattersonae perobscurus
#> 1         0        0             0                  0           0           0
#> 2         0        0             0                  0           0           0
#> 3         0        0             0                  0           0           0
#> 4         0        0             0                  0           0           0
#> 5         0        0             0                  0           0           0
#> 6         0        0             0                  0           0           0
#>   semotus silvicola subrutilescens xanthodermus $I Pileus.Cap.Width
#> 1       0         0              0            0 $I                3
#> 2       0         0              0            0 $I                6
#> 3       0         0              0            0 $I                4
#> 4       0         0              0            0 $I                7
#> 5       0         0              0            0 $I                5
#> 6       0         0              0            0 $I                5
#>   Pileus.Cap.Width $I Stipe.Length Stipe.Length $I Stipe.Thickness
#> 1                8 $I            4            9 $I             0.5
#> 2               21 $I            4           14 $I             1.0
#> 3                8 $I            5           11 $I             1.0
#> 4                6 $I            4            7 $I             3.0
#> 5               12 $I            2            5 $I             1.5
#> 6               15 $I            4           10 $I             2.0
#>   Stipe.Thickness $S Edibility U Y T
#> 1             2.5 $S         3 1 0 0
#> 2             3.5 $S         3 0 1 0
#> 3             2.0 $S         3 0 1 0
#> 4             4.5 $S         3 0 1 0
#> 5             2.5 $S         3 0 1 0
#> 6             4.0 $S         3 0 1 0

Write a symbolic data table from a CSV data file

write_csv_table(data = mushroom.clean, file = 'mushroom_interval.csv')

Read the symbolic data table and check the format

mushroom.int <- read.sym.table(file = 'mushroom_interval.csv', header = T, sep = ';', dec = '.', row.names = 1)
head(mushroom.int)
#> # A tibble: 6 × 5
#>       Species Pileus.Cap.Width   Stipe.Length Stipe.Thickness  Edibility
#>    <symblc_s>       <symblc_n>     <symblc_n>      <symblc_n> <symblc_s>
#> 1    {arorae}    [3.00 : 8.00]  [4.00 : 9.00]   [0.50 : 2.50]        {U}
#> 2   {arvenis}   [6.00 : 21.00] [4.00 : 14.00]   [1.00 : 3.50]        {Y}
#> 3    {benesi}    [4.00 : 8.00] [5.00 : 11.00]   [1.00 : 2.00]        {Y}
#> 4 {bernardii}    [7.00 : 6.00]  [4.00 : 7.00]   [3.00 : 4.50]        {Y}
#> 5  {bisporus}   [5.00 : 12.00]  [2.00 : 5.00]   [1.50 : 2.50]        {Y}
#> 6 {bitorquis}   [5.00 : 15.00] [4.00 : 10.00]   [2.00 : 4.00]        {Y}

Example of iGAP format Data

Built-in Data

data(Abalone.iGAP)
head(Abalone.iGAP)
#>                 Length       Diameter          Height           Whole
#> F-10-12  0.1275,0.9975   0.075, 0.815 -0.0175, 0.3125   -1.021, 3.883
#> F-13-15  0.1775,1.0275    0.125,0.825    0.025, 0.325 -0.8567, 3.6303
#> F-16-18      0.22,0.92 0.1725, 0.7425  0.0375, 0.3075 -0.5725, 3.1235
#> F-19-21 0.3725, 0.8425 0.2575, 0.6875  0.0825, 0.2525 -0.0368, 2.8443
#> F-23-24   0.275, 0.975   0.255, 0.755      0.09, 0.27   -0.303, 3.469
#> F-25-29   0.475, 0.775   0.405, 0.645  0.1625, 0.2325    0.915, 2.105
#>                 Shucked         Viscera           Shell
#> F-10-12 -0.6322, 2.1948 -0.2077, 0.7712   -0.258, 1.054
#> F-13-15 -0.4548, 1.7942 -0.1905, 0.7555   -0.269, 1.153
#> F-16-18   -0.244, 1.206 -0.1037, 0.6752 -0.3233, 1.4477
#> F-19-21     -0.16, 1.14   -0.033, 0.615 -0.1175, 1.1725
#> F-23-24 -0.2295, 1.3205     -0.13, 0.83    0.005, 0.945
#> F-25-29    0.134, 0.896  0.1467, 0.3798      0.45, 0.55

iGAP to MM

To convert iGAP files to CSV files.

Abalone <- iGAP_to_MM(Abalone.iGAP, c(1, 2, 3, 4, 5, 6, 7))
head(Abalone)
#>         Length_min Length_max Diameter_min Diameter_max Height_min Height_max
#> F-10-12     0.1275     0.9975        0.075        0.815    -0.0175     0.3125
#> F-13-15     0.1775     1.0275        0.125        0.825      0.025      0.325
#> F-16-18       0.22       0.92       0.1725       0.7425     0.0375     0.3075
#> F-19-21     0.3725     0.8425       0.2575       0.6875     0.0825     0.2525
#> F-23-24      0.275      0.975        0.255        0.755       0.09       0.27
#> F-25-29      0.475      0.775        0.405        0.645     0.1625     0.2325
#>         Whole_min Whole_max Shucked_min Shucked_max Viscera_min Viscera_max
#> F-10-12    -1.021     3.883     -0.6322      2.1948     -0.2077      0.7712
#> F-13-15   -0.8567    3.6303     -0.4548      1.7942     -0.1905      0.7555
#> F-16-18   -0.5725    3.1235      -0.244       1.206     -0.1037      0.6752
#> F-19-21   -0.0368    2.8443       -0.16        1.14      -0.033       0.615
#> F-23-24    -0.303     3.469     -0.2295      1.3205       -0.13        0.83
#> F-25-29     0.915     2.105       0.134       0.896      0.1467      0.3798
#>         Shell_min Shell_max
#> F-10-12    -0.258     1.054
#> F-13-15    -0.269     1.153
#> F-16-18   -0.3233    1.4477
#> F-19-21   -0.1175    1.1725
#> F-23-24     0.005     0.945
#> F-25-29      0.45      0.55