R Solution for Excel Puzzles

[This article was first published on Numbers around us - Medium, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

Week 47 — Puzzles no. 329–333

Puzzles

Author: ExcelBI

Puzzles:
# 329: content file
# 330: content file
# 331: content file
# 332: content file
# 333: content file

Lets dive into solutions!

Puzzle 329

In this puzzle we were asked to generate first 50 elements of Iccanobif sequence. Wait what? It is Fibonacci backwords… and that is exactly our goal to generate Fibonacci like sequence with one constraint: next element is a sum of previous two but with digits in reversed order.
As it looks hard, it really isn’t. And this solution is so short in base R, that translating it to another syntax would be just over complicating.

Loading data and libraries

library(tidyverse)
library(readxl)
library(stringi)

test = read_excel("Iccanobif Numbers.xlsx", range = "A2:A51", col_names = FALSE) %>% pull()

Approach: Base R

reverse_digits <- function(n) {
  as.numeric(stri_reverse(as.character(n)))
}

generate_iccanobif <- function(N) {
  iccanobif <- c(0, 1)
  for (i in 3:N) {
    next_term <- reverse_digits(iccanobif[i - 1]) + reverse_digits(iccanobif[i - 2])
    iccanobif <- c(iccanobif, next_term)
  }
  iccanobif
}

iccanobif_50 <- generate_iccanobif(50)

Validation

identical(iccanobif_50, test)
#> [1] TRUE

Puzzle 330

In puzzle #330 we have sentences in table and our goal is to return matrix populated rowwise containing only words that were longer than average word in original sentence.

Load data and libraries

library(tidyverse)
library(tidytext)
library(readxl)
library(data.table)

input = read_excel("Average Word Length.xlsx", range= "A1:A10")
test = read_excel("Average Word Length.xlsx", range = "B2:D5", col_names = F)
colnames(test) = c("1", "2", "3")

Approach 1: tidyverse

result = input %>%
  mutate(number = row_number()) %>%
  unnest_tokens(word, Books) %>%
  group_by(number) %>%
  mutate(word_len = nchar(word)) %>%
  reframe(number, word, word_len, avg_len = mean(word_len)) %>%
  ungroup() %>%
  filter(word_len > avg_len) %>% 
  select(word) %>%
  mutate(group = rep(1:4, each = 3)) %>%
  group_by(group) %>%
  mutate(row = row_number()) %>%
  pivot_wider(names_from = row, values_from = word) %>%
  ungroup() %>%
  select(-group) %>%
  mutate(across(everything(), ~ str_to_title(.x)))

Approach 2: data.table

input_dt <- setDT(copy(input))

input_dt[, number := .I]
input_dt_long <- input_dt[, .(word = unlist(strsplit(Books, " "))), by = number]
input_dt_long[, word_len := nchar(word)]
input_dt_long[, avg_len := mean(word_len), by = number]

input_dt_filtered <- input_dt_long[word_len > avg_len, .(word, number)]
input_dt_filtered[, group := rep(1:4, each = 3, length.out = .N)]
input_dt_filtered[, row := rep(1:3, times = 4, length.out = .N)]
wider_dt <- dcast(input_dt_filtered, group ~ row, value.var = "word")
setnames(wider_dt, old = names(wider_dt), new = c("group", "1", "2", "3"))

wider_dt[, group := NULL]

Approach 3: base R

input_base <- input
input_base$number = seq_along(input_base$Books)
words_list_base = strsplit(input_base$Books, " ")
words_df_base = setNames(data.frame(do.call(rbind, lapply(words_list_base, function(x) data.frame(word = x))), row.names = NULL), c("word"))
words_df_base$number = rep(input_base$number, sapply(words_list_base, length))
words_df_base$word_len = nchar(as.character(words_df_base$word))
words_df_base$avg_len = ave(words_df_base$word_len, words_df_base$number, FUN = mean)
filtered_df_base = subset(words_df_base, word_len > avg_len, select = c("word", "number"))
filtered_df_base$group = rep(1:4, each = 3, length.out = nrow(filtered_df_base))
mat = matrix(filtered_df_base$word, nrow = 4, ncol = 3, byrow = TRUE)
wider_df_base = as_tibble(as.data.frame(mat))
colnames(wider_df_base) = c("1", "2", "3")
wider_df_base[] = lapply(wider_df_base, function(x) if(is.character(x)) tools::toTitleCase(x) else x)

Validation

identical(test, result)
#> [1] TRUE

identical(test, wider_dt)
#> [1] TRUE

identical(test, wider_df_base)
#> [1] TRUE

Puzzle 331

In this puzzle we were asked to find which of given numbers can be represented as sum of two cubes.

Load data and libraries

library(tidyverse)
library(readxl)
library(data.table)

input = read_excel("Sum of Cube of Two Numbers.xlsx", range = "A1:A10")
test = read_excel("Sum of Cube of Two Numbers.xlsx", 
                  range = "B2:D6", 
                  col_names = c("Number", "Factor1", "Factor2"))

Approach 1: tidyverse

check_if_sum_of_cubes = function(number) {
  x = floor(number^(1/3))
  
  range = data.frame(Number = number, Factor1 = 1:x) %>%
    mutate(diff = number - Factor1^3, 
           is_cube = round(diff^(1/3))^3 == diff) %>%
    filter(is_cube) %>%
    mutate(Factor2 = diff^(1/3)) %>%
    slice(1) %>%
    select(Number, Factor1, Factor2) 
  return(range)
}

result = map_dfr(input$Number, check_if_sum_of_cubes) %>% as_tibble()

Approach 2: data.table

check_if_sum_of_cubes_dt <- function(number) {
  x <- floor(number^(1/3))
  
  DT <- CJ(Factor1 = 1:x, Number = number)
  DT[, `:=`(diff = Number - Factor1^3)]
  DT[, `:=`(is_cube = round((diff)^(1/3))^3 == diff,
            Factor2 = (diff)^(1/3))]
  
  result <- DT[is_cube == TRUE, .(Number, Factor1, Factor2)]
  result <- result[order(Factor1)][1]
  
  return(result)
}

input_numbers_dt <- as.vector(input$Number)
result_dt <- rbindlist(lapply(input_numbers_dt, check_if_sum_of_cubes_dt))
result_dt <- as.data.frame(result_dt) %>% drop_na()

Approach 3: base R

check_if_sum_of_cubes_base <- function(number) {
  x <- floor(number^(1/3))
  
  df <- expand.grid(Factor1 = 1:x, Number = number)
  df$diff <- df$Number - df$Factor1^3
  df$is_cube <- round(df$diff^(1/3))^3 == df$diff
  df$Factor2 <- df$diff^(1/3)
  
  result <- df[df$is_cube, ]
  result <- result[order(result$Factor1), ]
  result <- result[1, c("Number", "Factor1", "Factor2")]
  
  return(result)
}

input_numbers <- as.vector(input$Number)
result_base <- do.call("rbind", lapply(input_numbers, check_if_sum_of_cubes_base))
result_base <- as.data.frame(result_base) %>% drop_na()

Validation

Sometimes aligning all structures to be identical generate to much code. Then I we can also check it visually.

> test
# A tibble: 5 × 3
    Number Factor1 Factor2
     <dbl>   <dbl>   <dbl>
1       35     2         3
2      855     7         8
3     3744    10        14
4   300827     4        67
5 90000576    44.0     448

> result
# A tibble: 5 × 3
    Number Factor1 Factor2
     <dbl>   <int>   <dbl>
1       35       2       3
2      855       7       8
3     3744      10      14
4   300827       4      67
5 90000576      44     448

> result_base
     Number Factor1 Factor2
2        35       2       3
7       855       7       8
10     3744      10      14
4    300827       4      67
44 90000576      44     448

> result_dt
    Number Factor1 Factor2
1       35       2       3
2      855       7       8
3     3744      10      14
4   300827       4      67
5 90000576      44     448

Puzzle 332

One of the shortest solutions which I solved so far, but it is linked to the fact that there were new functions introduced in MS Excel. We had to find maximal and minimal value per group.

Load data and libraries

library(tidyverse)
library(readxl)
library(data.table)

input = read_excel("Max Min.xlsx", range = "A1:B20")
test = read_excel("Max Min.xlsx", range = "D2:F6")

Approach 1: tidyverse

result = input %>%
  group_by(Zone) %>%
  summarise(Max = max(Sales), Min = min(Sales)) 

Approach 2: data.table

input_dt = setDT(input)

result_dt = input_dt[, .(Max = max(Sales), Min = min(Sales)), by = Zone][order(Zone)]
result_dt = as_tibble(result_dt)

Approach 3: base R

result_base <- do.call(data.frame, aggregate(Sales ~ Zone, data = input, FUN = function(x) c(Max = max(x), Min = min(x))))
names(result_base) <- c("Zone","Max", "Min")

Validation

identical(test, result)
#> [1] TRUE

identical(test, result_dt)
#> [1] TRUE

identical(test, as_tibble(result_base))
#> [1] TRUE

Puzzle 333

As we all know words consists of letters, and in many words letters repeat itself. And that is the topic in this puzzle. We have words and we need to cut them… before second occurence of first repeating letter (bull becomes bul, excel becomes exc and so on).

Load data and libraries

library(tidyverse)
library(readxl)
library(data.table)

input = read_excel("Extract String Before a Repeated Character.xlsx", range = "A1:A10")
test = read_excel("Extract String Before a Repeated Character.xlsx", range = "B1:B10")

Approach 1: tidyverse

result = input %>%
  mutate(lett = str_split(str_to_lower(String), pattern = ""),
         reps = map(lett, ~ duplicated(.x)),
         first_reps = map_int(reps, ~ which(.x)[1]),
         `Answer Expected` = ifelse(!is.na(first_reps), 
                                    str_sub(String, 1, first_reps-1), 
                                    String)) %>%
  select(`Answer Expected`)

Approach 2: data.table

input_dt <- as.data.table(input)

input_dt[, lett := strsplit(tolower(String), "")]

input_dt[, reps := lapply(lett, function(x) duplicated(x))]
input_dt[, first_reps := sapply(reps, function(x) { pos <- which(x)[1]; if (length(pos) > 0) pos else NA })]
input_dt[, `Answer Expected` := ifelse(!is.na(first_reps), substr(String, 1, first_reps - 1), String)]

result_dt <- input_dt$`Answer Expected`

Approach 3: base R

input_b <- input
input_b$lett <- strsplit(tolower(input_b$String), "")
input_b$reps <- lapply(input_b$lett, function(x) duplicated(x))
input_b$first_reps <- sapply(input_b$reps, function(x) { pos <- which(x)[1]; if (length(pos) > 0) pos else NA })
input_b$`Answer Expected` <- ifelse(!is.na(input_b$first_reps), substr(input_b$String, 1, input_b$first_reps - 1), input_b$String)
result_b <- input_b$`Answer Expected`

Validation

identical(result, test)
#> [1] TRUE

identical(result_dt, test$`Answer Expected`)
#> [1] TRUE

identical(result_b, test$`Answer Expected`)
#> [1] TRUE

Conclussion

Feel free to comment, share and contact me with advices, questions and your ideas how to improve anything.


R Solution for Excel Puzzles was originally published in Numbers around us on Medium, where people are continuing the conversation by highlighting and responding to this story.

To leave a comment for the author, please follow the link and comment on their blog: Numbers around us - Medium.

R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

Never miss an update!
Subscribe to R-bloggers to receive
e-mails with the latest R posts.
(You will not see this message again.)

Click here to close (This popup will not appear again)