TP Noté

Auteur·rice

Paul Géhin

Date de publication

24 avr. 2026

## Init
rm(list = ls())

library(sampling)
library(dplyr)
library(logger)

bds <- read.csv("bds.csv")
ech_sas <- read.csv("echantillon_sas.csv")
ech_strat <- read.csv("echantillon_stratifie.csv")

## Question 1
log_info("1. Population : Ensemble des menages des departements 75, 92, 13, 33, 69, 59, 31, 67, 34 et 44")
log_info("Variable d'interet : niveau de patrimoine du menage")
log_info("Fonction d'interet : moyenne sur la population entiere")

## Question 2
N <- nrow(bds)
log_info("2. N = {N}")

## Question 3
log_info("3. ^r_1 = ^t~HT,patrimoine~ / N")

## Question 4
n_sas <- nrow(ech_sas)

HTestimator_sas <- function(y, n_i = n_sas, N_i = N) {
  pik <- rep(x = n_i/N_i, times = n_i)
  HTestimator(y = y, pik = pik)
}

r_1 <- HTestimator_sas(y = ech_sas$patrimoine) / N
log_info("4. ^r_1 = {r_1}")

## Question 5

log_info("5. ^V(^r_1) = ^V(^t~HT,patrimoine~) / N^2")

## Question 6

varHT_sas <- function(y, n_i = n_sas, N_i= N, method = 1) {
  pikl <- matrix(data = (n_i*(n_i-1))/(N_i*(N_i-1)),
                 nrow = n_i,
                 ncol = n_i)
  diag(pikl) <- n_i/N_i
  varHT(y = y, pikl = pikl, method = method)
}

var_r_1 <- varHT_sas(ech_sas$patrimoine) / (N^2)
log_info("6. ^V(^r_1) = {var_r_1}")

## Question 7

interval_confiance <- function(estimation, variance, niveau = 0.99) {
  alpha <- 1 - niveau
  q <- qnorm(1 - alpha/2)

  sd <- sqrt(variance)

  list(
    inf = estimation - q*sd,
    sup = estimation + q*sd
  )
}

IC_r_1 <- interval_confiance(r_1, var_r_1, niveau = 0.99)

log_info("7. Intervalle de confiance de r_1 au niveau 99% :")
log_info("[ {IC_r_1$inf} ; {IC_r_1$sup} ]")

## Question 8

# R de base
allocation <- table(ech_strat$departement)
# Avec dplyr
allocation <- ech_strat |>
  group_by(departement) |>
  summarise(n_h = n())
log_info("8. Allocation :")
log_info("n_h du departement {allocation$departement} : {allocation$n_h}")

taille_strates <- bds |>
  group_by(departement) |>
  summarise(N_h = n())

# R de base
is_allocation_proportionnelle <- length(unique(table(ech_strat$departement)/table(bds$departement))) == 1
# Avec dplyr
is_allocation_proportionnelle <- allocation |>
  left_join(taille_strates, by = "departement") |>
  mutate(fraction = n_h/N_h) |>
  group_by(fraction) |>
  summarise(n = n()) |>
  nrow() == 1
log_info("L'allocation est proportionnelle : {is_allocation_proportionnelle}")


## Question 9

## r_2 = sum_h t~HT,patrimoine,h~/N
log_info("9. ^r_2 = sum_departement ^t~HT,patrimoine,departement~ / N")

## Question 10

df_r_2 <- ech_strat |>
  left_join(allocation, by = "departement") |>
  left_join(taille_strates, by = "departement") |>
  group_by(departement) |>
  summarise(estim_tot_HT = HTestimator_sas(y = patrimoine, n = unique(n_h), N = unique(N_h)),
            var_tot_HT = varHT_sas(y = patrimoine, n = unique(n_h), N = unique(N_h)))

r_2 <- sum(df_r_2$estim_tot_HT)/N

log_info("10. ^r_2 = {r_2}")

## Question 11

log_info("11. ^V(^r_2) = sum_departement ^V(^t~HT,patrimoine,departement~) / N^2")

## Question 12
var_r_2 <- sum(df_r_2$var_tot_HT)/(N^2)
log_info("12. ^V(^r_2) = {var_r_2}")

## Question 13

IC_r_2 <- interval_confiance(estimation = r_2, variance = var_r_2, niveau = 0.99)
log_info("13. Intervalle de confiance de r_2 au niveau 99% :")
log_info("[ {IC_r_2$inf} ; {IC_r_2$sup} ]")

## Question 14
log_info("14. Pour rappel")
log_info("IC 99% r_1 : [ {IC_r_1$inf} ; {IC_r_1$sup} ]")
log_info("IC 99% r_2 : [ {IC_r_2$inf} ; {IC_r_2$sup} ]")
log_info("IC_r_2 est inclus dans IC_r_1")
log_info("La stratification est efficace car permet une estimation plus precise de notre moyenne.")

## Question 15
log_info("15. ^l_1 = log(^t~HT,revenu~)")

## Question 16
t_1 <- HTestimator_sas(y = ech_sas$revenu_mensuel)
l_1 <- log(t_1)
log_info("16. ^l_1 = {l_1}")

## Question
log_info("17. ^V(^l_1) = ^V(^t~HT,revenu/^t~HT,revenu~~)")

## Question 18
var_l_1 <- varHT_sas(y = ech_sas$revenu/as.vector(t_1))
IC_l_1 <- interval_confiance(estimation = l_1, variance = var_l_1, niveau = 0.99)
log_info("18. Intervalle de confiance de l_1 au niveau 99% :")
log_info("[ {IC_l_1$inf} ; {IC_l_1$sup} ]")
log_info("Pour info : ^V(^l_1) = {var_l_1}")

## Question 19
l_1_exact <- log(sum(bds$revenu_mensuel))
is_l_1_in_IC <- l_1_exact > IC_l_1$inf && l_1_exact < IC_l_1$sup
log_info("19. l_1 = {l_1_exact}")
log_info("l_1 dans IC 99% : {is_l_1_in_IC}")
log_info("l_1 - ^l_1 = {l_1_exact - l_1}")

## Question 20
log_info("20. ^l_2 = ^t~HT,patrimoine~ * log(^t~HT,patrimoine~)")

## Question 21
t_2 <- HTestimator_sas(y = ech_sas$patrimoine)
l_2 <- t_2 * log(t_2)
log_info("21. ^l_2 = {l_2}")

## Question 22
log_info("22. ^V(^l_2) = ^V(^t~HT,patrimoine * (log(^t~HT,patrimoine~) + 1)~)")

## Question 23
var_l_2 <- varHT_sas(ech_sas$patrimoine * as.vector(log(t_2) + 1))

IC_l_2 <- interval_confiance(estimation = l_2, variance = var_l_2, niveau = 0.99)
log_info("23. Intervalle de confiance de l_2 au niveau 99% :")
log_info("[ {IC_l_2$inf} ; {IC_l_2$sup} ]")
log_info("Pour info : ^V(^l_2) = {var_l_2}")

## Question 24
ech_sas_avec_tranche <- ech_sas |>
  mutate(tranche_patrimoine = case_when(
    patrimoine < 121000 ~ "1",
    patrimoine < 627000 ~ "2",
    patrimoine < 906000 ~ "3",
    patrimoine < 2341000 ~ "4",
    patrimoine < 4352000 ~ "5",
    .default = "6"
  ))
log_info("24. table 'ech_sas_avec_tranche'")


## Question 25
log_info('25. ^p_1 = ^t~HT,tranche_patrimoine == "1"~ / N')

## Question 26
ech_sas_avec_tranche <- ech_sas_avec_tranche |>
  mutate(is_tranche_1 = if_else(tranche_patrimoine == "1", 1, 0))
p_1 <- HTestimator_sas(y = ech_sas_avec_tranche$is_tranche_1) / N
log_info("26. ^p_1 = {p_1}")

## Question 27
log_info('27. ^V(^p_1) = ^V(^t~HT,tranche_patrimoine == "1"~) / N^2')

## Question 28
var_p_1 <- varHT_sas(y = ech_sas_avec_tranche$is_tranche_1) / N^2

IC_p_1 <- interval_confiance(estimation = p_1, variance = var_p_1, niveau = 0.95)
log_info("28. Intervalle de confiance de p_1 au niveau 95% :")
log_info("[ {IC_p_1$inf} ; {IC_p_1$sup} ]")
log_info("Pour info : ^V(^p_1) = {var_p_1}")

## Question 29
ech_sas_is_tranche <- function(tranche) {
   ech_sas_avec_tranche |>
     mutate(is_tranche = if_else(tranche_patrimoine == tranche, 1, 0)) |>
     select(is_tranche) |>
     pull()
}

IC_tranches <- ech_sas_avec_tranche |>
  group_by(tranche_patrimoine) |>
  summarise(estimation = HTestimator_sas(y = ech_sas_is_tranche(unique(tranche_patrimoine))) / N,
            variance = varHT_sas(y = ech_sas_is_tranche(unique(tranche_patrimoine))) / N^2,
            IC_95_inf = interval_confiance(estimation = estimation, variance = variance, niveau = 0.95)$inf,
            IC_95_sup = interval_confiance(estimation = estimation, variance = variance, niveau = 0.95)$sup)
log_info("29. Intervales des confiances au niveau 95% de proportion des individus de la population dans la tranche")
log_info("Tranche {IC_tranches$tranche_patrimoine} : [ {IC_tranches$IC_95_inf * 100}% ; {IC_tranches$IC_95_sup * 100}% ]")