3

I have a df and I want to do a simple calculation using test*factor. How could I take care of the signs in front of the value?

enter image description here

df<-structure(list(test = structure(c(3L, 2L, 5L, 4L, 1L), .Label = c("<=1.5", 
"<5", ">=1", ">7.8", "6"), class = "factor"), factor = c(2, 4, 
6, 9, 8)), row.names = c(NA, -5L), class = "data.frame")

6 Answers 6

6

Another possible solution:

library(tidyverse)

df %>% 
  mutate(outcome = str_c(str_remove(test, "\\d+"), factor *
     as.numeric(str_extract(test, "\\d+"))))

#>   test factor outcome
#> 1  >=1      2     >=2
#> 2   <5      4     <20
#> 3    6      6      36
#> 4  >78      9    >702
#> 5 <=15      8   <=120

Or with parse_number:

library(tidyverse)

df %>% 
  mutate(outcome = str_c(str_remove(test, "\\d+"),
    factor * parse_number(test %>% as.character())))

#>   test factor outcome
#> 1  >=1      2     >=2
#> 2   <5      4     <20
#> 3    6      6      36
#> 4  >78      9    >702
#> 5 <=15      8   <=120

With decimals:

library(tidyverse)

df %>% 
  mutate(outcome = str_c(str_remove(df$test, "\\d+(\\.\\d+)?"),
   factor * parse_number(test %>% as.character())))

#>    test factor outcome
#> 1   >=1      2     >=2
#> 2    <5      4     <20
#> 3     6      6      36
#> 4  >7.8      9   >70.2
#> 5 <=1.5      8    <=12
Sign up to request clarification or add additional context in comments.

Comments

6

We can try

transform(
    df,
    outcome = paste0(gsub("\\d","",test),as.numeric(gsub("\\D", "", test)) * factor)
)

which gives

  test factor outcome
1  >=1      2     >=2
2   <5      4     <20
3    6      6      36
4  >78      9    >702
5 <=15      8   <=120

Comments

5

One more. Here we use parse_number to multiply and then paste after removing the numbers from test column:

library(dplyr)
library(readr)
df %>% 
  mutate(outcome = paste0(str_replace(test, '\\d+', ''), factor*parse_number(as.character(test))
         ))
  test factor outcome
1  >=1      2     >=2
2   <5      4     <20
3    6      6      36
4  >78      9    >702
5 <=15      8   <=120

Comments

5

We could use str_replace - in the pattern, match one or more digits (\\d+) and replace with a function to multiply with the factor column after converting to numeric

library(stringr)
df$outcome <- str_replace(df$test, "\\d+", function(x) as.numeric(x) * df$factor)

-output

df$outcome
[1] ">=2"   "<20"   "36"    ">702"  "<=120"

For the updated, change the \\d+ to include the decimal as well

str_replace(df$test, "[0-9.]+", function(x) as.numeric(x) * df$factor)
[1] ">=2"   "<20"   "36"    ">70.2" "<=12" 

Comments

4

And another solution using separate:

library(tidyr)
library(stringr)
library(dplyr)
df %>%
    separate(test, 
             into = c("temp1", "temp2"), 
             sep = "(?<=\\D|^)(?=\\d+)", 
             remove = FALSE) %>%
  mutate(
    temp3 = as.numeric(temp2)*factor,
    outcome = str_c(temp1, temp3)
    ) %>%
  select(-matches("temp"))
  test factor outcome
1  >=1      2     >=2
2   <5      4     <20
3    6      6      36
4  >78      9    >702
5 <=15      8   <=120

Comments

1

Here is an option using data.table and stringr:

library(data.table)

dt <- as.data.table(df)

dt[, outcome := str_c(str_remove(test, "[0-9.]+"), as.numeric(str_extract(test, "[0-9.]+")) * factor)]

Output

    test factor outcome
1:   >=1      2     >=2
2:    <5      4     <20
3:     6      6      36
4:  >7.8      9   >70.2
5: <=1.5      8    <=12

Or another option using tidyr::extract:

library(tidyverse)

df %>%
  tidyr::extract("test", c("sign", "outcome"), "(\\D*)(\\d.*)", remove = F, convert = T) %>%
  mutate(outcome = paste0(sign, outcome * factor)) %>% 
  select(names(df), outcome)

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.