Adding conditions to loop to generate class intervals using ClassInt in R for areas with fewer n classes

Question

I have issues with assigning classes to intervals that dont meet the criteria in my loop function using the classInt library in R. How do a include a condition for an automatic class if the rows in the dataframe subset are less than the number of classes (n) defined for class intervals, please? In my example, n=3. Here is an example with a sample dataframe you can create in R

library(classInt)
library(rlist)
library(dplyr)

##Create dataframe 
Country <- c('Australia', 'Italy', 'Peru', 'China','Australia', 'Italy', 'Peru', 
'China','Australia', 'Italy', 'Peru', 'China','Nigeria','Australia', 'Italy', 'Peru', 
'China')
Time <- c(21, 18, 17, 10,10,15,27,0,2,4,5,7,4,8,9,10,5)
Area <- c("A","A","A","A","B","B","B","B","C","C","C","C","D","D","D","D","D")
DF  <- data.frame(Country, Time, Area)

This should produce this dataframe:


      Country Time Area
 1  Australia   21    A
 2      Italy   18    A
 3       Peru   17    A
 4      China   10    A
 5  Australia   10    B
 6      Italy   15    B
 7       Peru   27    B
 8      China    0    B
 9  Australia    2    C
 10     Italy    4    C
 11      Peru    5    C
 12     China    7    C
 13   Nigeria    4    D
 14 Australia    8    D
 15     Italy    9    D
 16      Peru   10    D
 17     China    5    D


## Split by Country
NewXL <- split(DF,DF$Country)

## Generate the ranges and category/classes for each country
NewXL2 <- list()
for (i in 1:length(NewXL)) { AB <- NewXL[[i]]
#Create condition:
skip_to_next <- FALSE
tryCatch(Classes <- classIntervals(AB$Time, n=3, 
cutlabels=F,style='fisher',factor=F,warnSmallN=F,warnLargeN=F), error = function(e) { 
skip_to_next <<- TRUE})
if(skip_to_next) { next } 
## Classify
# Range and Class for each Absolute population exposed
AB$Range_Abs <- classify_intervals(AB$Time, 3, "fisher", factor = T)
AB$Class_Abs <- classify_intervals(AB$Time,3, "fisher", factor = FALSE)

NewXL2[[i]] <-AB }

This results is a list of 5 countries with Nigeria being Null because it only has one row (at least 3 could have been ideal to create intervals). Is there a way to write the code for the loop such that I can define a class and range minimum to be added for any dataframe in the loop with only one row? In this case, Nigeria should have only one row so I could have a class of 3 (the maximum) assigned automatically to the single row and the range would be [0,4). Below is what the loop output looks like.

  NewXL2
  [[1]]
       Country Time Area Range_Abs Class_Abs
  1  Australia   21    A [15.5,21]         3
  5  Australia   10    B  [5,15.5)         2
  9  Australia    2    C     [2,5)         1
  14 Australia    8    D  [5,15.5)         2

  [[2]]
     Country Time Area Range_Abs Class_Abs
  4    China   10    A  [8.5,10]         3
  8    China    0    B   [0,2.5)         1
  12   China    7    C [2.5,8.5)         2
  17   China    5    D [2.5,8.5)         2

  [[3]]
     Country Time Area Range_Abs Class_Abs
  2    Italy   18    A   [12,18]         3
  6    Italy   15    B   [12,18]         3
  10   Italy    4    C   [4,6.5)         1
  15   Italy    9    D  [6.5,12)         2

  [[4]]
  NULL

  [[5]]
     Country Time Area Range_Abs Class_Abs
  3     Peru   17    A [13.5,22)         2
  7     Peru   27    B   [22,27]         3
  11    Peru    5    C  [5,13.5)         1
  16    Peru   10    D  [5,13.5)         1

This is what the Nigeria dataframe should look like after the loop:

[[4]]
    Country Time Area Range_Abs Class_Abs
 13 Nigeria   10    D     [0,4)         3  

 #Merge all lists into long dataframe with class intervals
 NewXL2b <- list.rbind(NewXL2)

the-mad-statter · Accepted Answer · 2024-07-03 21:02:37Z

2

You could make use of an if/if else/else in your loop:

library(classInt)

data <- data.frame(
  country = c('Australia', 'Italy', 'Peru', 'China','Australia', 'Italy', 'Peru', 'China','Australia', 'Italy', 'Peru', 'China','Nigeria','Australia', 'Italy', 'Peru', 'China'), 
  time = c(21, 18, 17, 10, 10, 15, 27, 0, 2, 4, 5, 7, 4, 8, 9, 10, 5), 
  area = c("A", "A", "A", "A", "B", "B", "B", "B", "C", "C", "C", "C", "D", "D", "D", "D", "D")
)

split_data <- split(data, data$country)

result <- list()

for (i in 1:length(split_data)) {
  split <- split_data[[i]]
  
  if(nrow(split) == 1) {
    # add a made up lower level less than the given time
    # so that the given time is the second level
    levels <- c(-1, split$time)
    labels <- sprintf("[%s]", levels)
    split$range_abs = factor(split$time, levels, labels)
    split$class_abs = as.numeric(split$range_abs)
  } else if(nrow(split) == 2) {
    levels <- sort(split$time)
    labels <- sprintf("[%s]", levels)
    split$range_abs = factor(split$time, levels, labels)
    split$class_abs = as.numeric(split$range_abs)
  } else {
    skip_to_next <- FALSE
  
    tryCatch(
      Classes <- classIntervals(
        split$time, 
        n = 5, 
        cutlabels = FALSE,
        style = 'fisher',
        factor = FALSE,
        warnSmallN = FALSE,
        warnLargeN = FALSE
      ), 
      error = function(e) { 
        skip_to_next <<- TRUE
      }
    )
  
    if(skip_to_next) { next } 
  
    split$range_abs <- classify_intervals(split$time, 3, "fisher", factor = TRUE)
    split$class_abs <- classify_intervals(split$time, 3, "fisher", factor = FALSE)
  }

  result[[i]] <- split
}

result
#> [[1]]
#>      country time area range_abs class_abs
#> 1  Australia   21    A [15.5,21]         3
#> 5  Australia   10    B  [5,15.5)         2
#> 9  Australia    2    C     [2,5)         1
#> 14 Australia    8    D  [5,15.5)         2
#> 
#> [[2]]
#>    country time area range_abs class_abs
#> 4    China   10    A  [8.5,10]         3
#> 8    China    0    B   [0,2.5)         1
#> 12   China    7    C [2.5,8.5)         2
#> 17   China    5    D [2.5,8.5)         2
#> 
#> [[3]]
#>    country time area range_abs class_abs
#> 2    Italy   18    A   [12,18]         3
#> 6    Italy   15    B   [12,18]         3
#> 10   Italy    4    C   [4,6.5)         1
#> 15   Italy    9    D  [6.5,12)         2
#> 
#> [[4]]
#>    country time area range_abs class_abs
#> 13 Nigeria    4    D       [4]         2
#> 
#> [[5]]
#>    country time area range_abs class_abs
#> 3     Peru   17    A [13.5,22)         2
#> 7     Peru   27    B   [22,27]         3
#> 11    Peru    5    C  [5,13.5)         1
#> 16    Peru   10    D  [5,13.5)         1

^{Created on 2024-07-03 with reprex v2.1.0.9000}

_{Reprex files hosted with on GitHub}

edited Jul 3, 2024 at 21:02

answered Jul 3, 2024 at 19:49

the-mad-statter

9,4261 gold badge16 silver badges31 bronze badges

Sign up to request clarification or add additional context in comments.

4 Comments

Joke O. Over a year ago

Thank you! This works well. What if I want to assign a middle class e.g. 2 (if there are 3 classes) to such dataframes with one row?

the-mad-statter Over a year ago

I'm not sure I understand the question. If there is only one row and therefore a single value of time in your case, how do you want to split that single value into more than one class?

Joke O. Over a year ago

Not split per se. Just assign a class value. In my case, this has to do with health risk so if an island for example has only one area, a medium risk level should be assigned so that when a global view is pulled from the combined dataframes, it isn't assigned minimum risk for that area with one row.

the-mad-statter Over a year ago

You would have to manipulate what you pass as levels and labels to the factor() call. See my edit to add a lower class to the one-row case.

Collectives™ on Stack Overflow

Adding conditions to loop to generate class intervals using ClassInt in R for areas with fewer n classes

1 Answer 1

4 Comments

Your Answer

Hot Network Questions

Collectives™ on Stack Overflow

1 Answer 1

4 Comments

Your Answer

Sign up or log in

Post as a guest

Related