1

I have the following data frame df:

 Index col_a col_b col_c col_d
1     4     c     v     g     j
2     1     x  <NA>     z     s
3  1, 3     k     j     n     y
4     2     q     t     o     i
5  2, 3     y     m     w     r
6     2     d     u     x     a
7  3, 4     n     y     k     g
8     4     h     d  <NA>     u
9     1     s     x     j     m
df <- structure(list(Index = c("4", "1", "1, 3", "2", "2, 3", "2", 
"3, 4", "4", "1"), col_a = c("c", "x", "k", "q", "y", "d", "n", 
"h", "s"), col_b = c("v", NA, "j", "t", "m", "u", "y", "d", "x"
), col_c = c("g", "z", "n", "o", "w", "x", "k", NA, "j"), col_d = c("j", 
"s", "y", "i", "r", "a", "g", "u", "m")), class = "data.frame", row.names = c(NA, 
-9L))

I wish to recode/update the values in the df to "Yes"/"No" based on the Index column, where 1 = col_a, 2 = col_b and so on.

This is my desired output:

  Index col_a col_b col_c col_d
1     4    No    No    No   Yes
2     1   Yes    No    No    No
3  1, 3   Yes    No   Yes    No
4     2    No   Yes    No    No
5  2, 3    No   Yes   Yes    No
6     2    No   Yes    No    No
7  3, 4    No    No   Yes   Yes
8     4    No    No    No   Yes
9     1   Yes    No    No    No

Currently I'm using a lapply approach to solve the problem, but I wonder if there's any simpler solution in just one or two lines of code.

do.call(rbind, lapply(1:nrow(df), \(x) {
  vec <- as.integer(unlist(strsplit(df[x, 1], ","))) + 1
  df[x, vec] <- "Yes"
  df[x, -c(1, vec)] <- "No"
  df[x, ]
}))

2 Answers 2

1

A base R option is to use row/column indexing should be faster for assignment than with rowwise looping

m1 <- cbind(rep(seq_len(nrow(df)), nchar(gsub("\\D+", "", df$Index))), 
    scan(text = df$Index, what = numeric(), sep=","))
df[-1][m1] <- "Yes"
df[-1][df[-1] != "Yes"|is.na(df[-1])] <- "No"

-output

> df
  Index col_a col_b col_c col_d
1     4    No    No    No   Yes
2     1   Yes    No    No    No
3  1, 3   Yes    No   Yes    No
4     2    No   Yes    No    No
5  2, 3    No   Yes   Yes    No
6     2    No   Yes    No    No
7  3, 4    No    No   Yes   Yes
8     4    No    No    No   Yes
9     1   Yes    No    No    No

Or another option is

i1 <- Reduce(`|`, lapply(read.csv(text = df$Index, 
    header = FALSE),\(x) col(df[-1]) == x))
df[-1] <- c("No", "Yes")[1+(i1 & !is.na(i1))]

-output

> df
  Index col_a col_b col_c col_d
1     4    No    No    No   Yes
2     1   Yes    No    No    No
3  1, 3   Yes    No   Yes    No
4     2    No   Yes    No    No
5  2, 3    No   Yes   Yes    No
6     2    No   Yes    No    No
7  3, 4    No    No   Yes   Yes
8     4    No    No    No   Yes
9     1   Yes    No    No    No

Or with tidyverse

library(dplyr)
library(stringr)
df %>%
  mutate(across(starts_with("col_"),
   ~ case_when(str_detect(Index, as.character( match(cur_column(), 
      names(df))-1)) ~ "Yes", TRUE ~ "No")))

-output

  Index col_a col_b col_c col_d
1     4    No    No    No   Yes
2     1   Yes    No    No    No
3  1, 3   Yes    No   Yes    No
4     2    No   Yes    No    No
5  2, 3    No   Yes   Yes    No
6     2    No   Yes    No    No
7  3, 4    No    No   Yes   Yes
8     4    No    No    No   Yes
9     1   Yes    No    No    No
Sign up to request clarification or add additional context in comments.

Comments

1

What you have already seems pretty good. We could make a few minor improvements as shown:

Index <- strsplit(df[[1]], ",")
do.call("rbind", lapply(1:nrow(df), function(i) {
  dfi <- df[i, ]
  dfi[-1] <- "No"
  replace(dfi, as.integer(Index[[i]]) + 1L, "Yes")
}))

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.