score成绩单
install.packages("dplyr")
library(dplyr)
install.packages("tibble")
library(tibble)
install.packages("stringr")
library(stringr)
score = tibble(ID=c("1222-1","2001-0","3321-1","4898-0","2782-0","1002-8","4211-0","1023-1","3325-1"),
          gender=c("female","male","male","male","female","female","male","female","male") ,
          chinese_mid_score=round(runif(9,80,90),digits = 0),
          chinese_final_score=round(runif(9,80,90),digits = 0),
          english_score=c(round(runif(8,80,90),digits = 0),NA),
          match_score=round(runif(9,80,90),digits = 0),
          musci_score=round(runif(9,80,90),digits = 0)
               )
view(score)

rowSums
 
注意 S 是大写的
计算每个学生的总分
score_sum = score |> 
  mutate(total_score = rowSums(select(score, contains("_score")), na.rm = TRUE))
view(score_sum)
rowSums,select 以及 contains结合,可以灵活处理较复杂数据

如果数据量特别大且列名的命名没有规律该怎么办呢?
contain这种情况下不好用了,用across
score_across = score |> 
  mutate(total_score = rowSums(across(where(is.numeric)),na.rm=TRUE))
view(score_across)
任务:这次考试总体难度偏难,为了鼓励学生,给每门分数都除以0.95
score_change = score |> 
  mutate(across(.cols=where(is.numeric), .fns = ~.x/0.95, .names = "curve_{col}"))
view(score_change)
注意across函数里命名规则,前边都需要加 dot .
 其中.fnsfunctions函数后边有个~,表示自定义的 function,
 .names命名时记得加"",{col}表示每列的名字
 
设置 成绩>=80 为 pass,低于80为 fail
score_pass = score |> 
  mutate(across(.cols=where(is.numeric), .fns = ~if_else(.x>80,"pass","fail"), .names = "pass_{col}"))
view(score_pass)
通过在.fns中加入条件判断即可
 
最后一个综合任务
只考虑数学成绩,先将每个学生的成绩除以0.95,再判断修改后的成绩是否>85分,且新的列用curve_col及pass_col命名
score_match = score |> 
  mutate(across(.cols = match_score, 
                .fns =  ~ .x/0.95 , 
                .names = "curve_{col}"
                )
         ) |> 
  mutate(pass_match_score = if_else(curve_match_score>=85,"pass","fail")) |> 
  select(contains("match"))
view(score_match)

 以上代码看着些许累赘,我们只考虑原来的成绩决定 pass or fail 就会简单许多
score_match = score |> 
  mutate(across(.cols = match_score, 
                .fns =  c(curve = ~ .x/0.95 , pass = ~if_else(.>=85,"pass","fail")), 
                .names = "{fn}_{col}"
  )
  ) |> 
  select(contains("match"))
view(score_match)
其中,.fns = c(curve = , pass = )表示两个function的名字
 .names=“{fn}_{col}”,{fn}表示.fns里新加的两个名字
 
计算各科平均分、方差、百分数:5%,80%
score_summary = score |> 
  summarise(across(.cols = where(is.numeric), 
                   .fns = c(
                     mean = ~mean(.x , na.rm = TRUE),
                     var = ~var(.x , na.rm = TRUE),
                     q5 = ~quantile(.x , 0.05, na.rm = TRUE),
                     q80 = ~quantile(.x , 0.8, na.rm = TRUE)
                   ), 
                   .names = "{fn}_{col}"))
view(score_summary)
平均值 mean、方差 var、第 5 百分位数 q5 和第 80 百分位数 q80。
 



















