1

I have issues with assigning classes to intervals that dont meet the criteria in my loop function using the classInt library in R. How do a include a condition for an automatic class if the rows in the dataframe subset are less than the number of classes (n) defined for class intervals, please? In my example, n=3. Here is an example with a sample dataframe you can create in R

library(classInt)
library(rlist)
library(dplyr)

##Create dataframe 
Country <- c('Australia', 'Italy', 'Peru', 'China','Australia', 'Italy', 'Peru', 
'China','Australia', 'Italy', 'Peru', 'China','Nigeria','Australia', 'Italy', 'Peru', 
'China')
Time <- c(21, 18, 17, 10,10,15,27,0,2,4,5,7,4,8,9,10,5)
Area <- c("A","A","A","A","B","B","B","B","C","C","C","C","D","D","D","D","D")
DF  <- data.frame(Country, Time, Area)

This should produce this dataframe:


      Country Time Area
 1  Australia   21    A
 2      Italy   18    A
 3       Peru   17    A
 4      China   10    A
 5  Australia   10    B
 6      Italy   15    B
 7       Peru   27    B
 8      China    0    B
 9  Australia    2    C
 10     Italy    4    C
 11      Peru    5    C
 12     China    7    C
 13   Nigeria    4    D
 14 Australia    8    D
 15     Italy    9    D
 16      Peru   10    D
 17     China    5    D


## Split by Country
NewXL <- split(DF,DF$Country)

## Generate the ranges and category/classes for each country
NewXL2 <- list()
for (i in 1:length(NewXL)) { AB <- NewXL[[i]]
#Create condition:
skip_to_next <- FALSE
tryCatch(Classes <- classIntervals(AB$Time, n=3, 
cutlabels=F,style='fisher',factor=F,warnSmallN=F,warnLargeN=F), error = function(e) { 
skip_to_next <<- TRUE})
if(skip_to_next) { next } 
## Classify
# Range and Class for each Absolute population exposed
AB$Range_Abs <- classify_intervals(AB$Time, 3, "fisher", factor = T)
AB$Class_Abs <- classify_intervals(AB$Time,3, "fisher", factor = FALSE)

NewXL2[[i]] <-AB }

This results is a list of 5 countries with Nigeria being Null because it only has one row (at least 3 could have been ideal to create intervals). Is there a way to write the code for the loop such that I can define a class and range minimum to be added for any dataframe in the loop with only one row? In this case, Nigeria should have only one row so I could have a class of 3 (the maximum) assigned automatically to the single row and the range would be [0,4). Below is what the loop output looks like.

  NewXL2
  [[1]]
       Country Time Area Range_Abs Class_Abs
  1  Australia   21    A [15.5,21]         3
  5  Australia   10    B  [5,15.5)         2
  9  Australia    2    C     [2,5)         1
  14 Australia    8    D  [5,15.5)         2

  [[2]]
     Country Time Area Range_Abs Class_Abs
  4    China   10    A  [8.5,10]         3
  8    China    0    B   [0,2.5)         1
  12   China    7    C [2.5,8.5)         2
  17   China    5    D [2.5,8.5)         2

  [[3]]
     Country Time Area Range_Abs Class_Abs
  2    Italy   18    A   [12,18]         3
  6    Italy   15    B   [12,18]         3
  10   Italy    4    C   [4,6.5)         1
  15   Italy    9    D  [6.5,12)         2

  [[4]]
  NULL

  [[5]]
     Country Time Area Range_Abs Class_Abs
  3     Peru   17    A [13.5,22)         2
  7     Peru   27    B   [22,27]         3
  11    Peru    5    C  [5,13.5)         1
  16    Peru   10    D  [5,13.5)         1

This is what the Nigeria dataframe should look like after the loop:

[[4]]
    Country Time Area Range_Abs Class_Abs
 13 Nigeria   10    D     [0,4)         3  

 #Merge all lists into long dataframe with class intervals
 NewXL2b <- list.rbind(NewXL2)  

1 Answer 1

2

You could make use of an if/if else/else in your loop:

library(classInt)

data <- data.frame(
  country = c('Australia', 'Italy', 'Peru', 'China','Australia', 'Italy', 'Peru', 'China','Australia', 'Italy', 'Peru', 'China','Nigeria','Australia', 'Italy', 'Peru', 'China'), 
  time = c(21, 18, 17, 10, 10, 15, 27, 0, 2, 4, 5, 7, 4, 8, 9, 10, 5), 
  area = c("A", "A", "A", "A", "B", "B", "B", "B", "C", "C", "C", "C", "D", "D", "D", "D", "D")
)

split_data <- split(data, data$country)

result <- list()

for (i in 1:length(split_data)) {
  split <- split_data[[i]]
  
  if(nrow(split) == 1) {
    # add a made up lower level less than the given time
    # so that the given time is the second level
    levels <- c(-1, split$time)
    labels <- sprintf("[%s]", levels)
    split$range_abs = factor(split$time, levels, labels)
    split$class_abs = as.numeric(split$range_abs)
  } else if(nrow(split) == 2) {
    levels <- sort(split$time)
    labels <- sprintf("[%s]", levels)
    split$range_abs = factor(split$time, levels, labels)
    split$class_abs = as.numeric(split$range_abs)
  } else {
    skip_to_next <- FALSE
  
    tryCatch(
      Classes <- classIntervals(
        split$time, 
        n = 5, 
        cutlabels = FALSE,
        style = 'fisher',
        factor = FALSE,
        warnSmallN = FALSE,
        warnLargeN = FALSE
      ), 
      error = function(e) { 
        skip_to_next <<- TRUE
      }
    )
  
    if(skip_to_next) { next } 
  
    split$range_abs <- classify_intervals(split$time, 3, "fisher", factor = TRUE)
    split$class_abs <- classify_intervals(split$time, 3, "fisher", factor = FALSE)
  }

  result[[i]] <- split
}

result
#> [[1]]
#>      country time area range_abs class_abs
#> 1  Australia   21    A [15.5,21]         3
#> 5  Australia   10    B  [5,15.5)         2
#> 9  Australia    2    C     [2,5)         1
#> 14 Australia    8    D  [5,15.5)         2
#> 
#> [[2]]
#>    country time area range_abs class_abs
#> 4    China   10    A  [8.5,10]         3
#> 8    China    0    B   [0,2.5)         1
#> 12   China    7    C [2.5,8.5)         2
#> 17   China    5    D [2.5,8.5)         2
#> 
#> [[3]]
#>    country time area range_abs class_abs
#> 2    Italy   18    A   [12,18]         3
#> 6    Italy   15    B   [12,18]         3
#> 10   Italy    4    C   [4,6.5)         1
#> 15   Italy    9    D  [6.5,12)         2
#> 
#> [[4]]
#>    country time area range_abs class_abs
#> 13 Nigeria    4    D       [4]         2
#> 
#> [[5]]
#>    country time area range_abs class_abs
#> 3     Peru   17    A [13.5,22)         2
#> 7     Peru   27    B   [22,27]         3
#> 11    Peru    5    C  [5,13.5)         1
#> 16    Peru   10    D  [5,13.5)         1

Created on 2024-07-03 with reprex v2.1.0.9000

Reprex files hosted with on GitHub

Sign up to request clarification or add additional context in comments.

4 Comments

Thank you! This works well. What if I want to assign a middle class e.g. 2 (if there are 3 classes) to such dataframes with one row?
I'm not sure I understand the question. If there is only one row and therefore a single value of time in your case, how do you want to split that single value into more than one class?
Not split per se. Just assign a class value. In my case, this has to do with health risk so if an island for example has only one area, a medium risk level should be assigned so that when a global view is pulled from the combined dataframes, it isn't assigned minimum risk for that area with one row.
You would have to manipulate what you pass as levels and labels to the factor() call. See my edit to add a lower class to the one-row case.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.