Skip to contents

Scale probabilities so that they are consistent with pre-specified benchmarks, such as totals obtained from a national survey.

Usage

scale_prob(unscaled, benchmark, wt = NULL)

Arguments

unscaled

Reported probability of having the attribute of interest. A numeric vector with values between 0 and 1.

benchmark

Benchmark probability. A number between 0 and 1.

wt

Weights to use when calculating overall unscaled probability from individual unscaled probabilities. Optional.

Value

A numeric vector with scaled probabilities.

See also

Examples

set.seed(10)
p1 <- runif(n = 10)
p2 <- 0.6
p1_scaled <- scale_prob(unscaled = p1,
                        benchmark = p2)
rbind(p1, p1_scaled)
#>                [,1]      [,2]      [,3]      [,4]       [,5]      [,6]
#> p1        0.5074782 0.3067685 0.4269077 0.6931021 0.08513597 0.2254366
#> p1_scaled 0.6803277 0.5500567 0.6280333 0.8008073 0.40620568 0.4972681
#>                [,7]      [,8]      [,9]     [,10]
#> p1        0.2745305 0.2723051 0.6158293 0.4296715
#> p1_scaled 0.5291326 0.5276882 0.7506532 0.6298272
mean(p1)
#> [1] 0.3837165
mean(p1_scaled)
#> [1] 0.6

## use tidyverse functions to scale separately
## within age-sex groups
census <- smoothscale::syn_census
survey <- smoothscale::syn_survey
library(dplyr)
#> 
#> Attaching package: ‘dplyr’
#> The following objects are masked from ‘package:stats’:
#> 
#>     filter, lag
#> The following objects are masked from ‘package:base’:
#> 
#>     intersect, setdiff, setequal, union

census |>
  left_join(survey, by = c("age", "sex")) |>
  mutate(prob_direct = child_labour / all_children) |>
  group_by(age, sex) |>
  mutate(prob_scaled = scale_prob(unscaled = prob_direct,
                                  benchmark = prob_child_labour))
#> # A tibble: 100 × 8
#> # Groups:   age, sex [4]
#>    area    age   sex    child_labour all_children prob_child_labour prob_direct
#>    <chr>   <chr> <chr>         <int>        <dbl>             <dbl>       <dbl>
#>  1 Area 01 5-9   Female          134          372             0.297      0.360 
#>  2 Area 02 5-9   Female           14           35             0.297      0.4   
#>  3 Area 03 5-9   Female           92          388             0.297      0.237 
#>  4 Area 04 5-9   Female           46          345             0.297      0.133 
#>  5 Area 05 5-9   Female           25          102             0.297      0.245 
#>  6 Area 06 5-9   Female            2            5             0.297      0.4   
#>  7 Area 07 5-9   Female            4           13             0.297      0.308 
#>  8 Area 08 5-9   Female           10           34             0.297      0.294 
#>  9 Area 09 5-9   Female            2           52             0.297      0.0385
#> 10 Area 10 5-9   Female          578         2087             0.297      0.277 
#> # ℹ 90 more rows
#> # ℹ 1 more variable: prob_scaled <dbl>