#chapitre 2 #1 x=c(3.88,3.9,3.93,3.9,3.93,3.96,3.88,3.94,3.96,3.88,3.94,3.99,3.98) n=length(x) tableau=table(x)/n plot(tableau, ylab="relative frequency") plot(tableau, type="l",ylab="relative frequency") #3 pie(c(38.7,22.6,8.8,60), labels =c("United States","South America","Canada","Mexico")) title("Estimated Oil Reserves") #4 #a time=c(19,21.9,18,22.7,19.2,21.1,28.6,25.3,21.6,20.7,20.4,25.1,21.2,18.3,19.1, 16.2,21.6,13,13.8,15.8,17.2,20,27,24,21,19.8,20.5,22.7,21.8,20.7,21.5,21.2,20.6, 19,22.3,19.3,22.2,14.8,17.3,15.4,20.7,19.1,21.6,18.9,19.8,22,19.6,24.6,16.7,23.8) length(time) hist(time,main="Average Travel Time to Work (minutes)",ylab="Relative frequency") plot(ecdf(time),verticals=TRUE) #b percent=c(.9,.7,.7,8.3,2.5,3.9,24.8,8.8,6.4,2.5,1.3,10.1,1.6,2.5,3.6,1.2,2,.6,.3, 1.2,.6,2.4,8.1,4,1.1,1,1.1,2.8,2,1.6,1.3,.8,.8,.5,3,.6,2.2,.6,1.9,1.4,2.9,1,2.1, 2.3,2.7,4.5,3.4,4.9,2.4,7.4) length(percent) stem(percent) plot(time,percent) cor(time,precent) #6 #a accidents=c(4,2,4,3,11,6,4,4,1,4,2,3,3,1,2,2,6,0,2,1,3,2) tableau=table(accidents) #b plot(tableau) #graphique de fréquences plot(tableau,type="l") #polygone de fréquences #c plot(ecdf(accidents),verticals=TRUE) #d mean(accidents) #e n=length(accidents) n/2 sort(accidents) median(accidents) #f #du tableau le mode est 2 #g sd(accidents) sqrt(var(accidents)) #7 fatalities=c(197,5,231,285,278,39,62,33,1,239,166,342,3,1,12,89,531,0,22,13,22,50) n=length(fatalities) #a hist(fatalities,main="Yearly Airline Fatalities") #b sort(fatalities) stem(fatalities) #c mean(fatalities) #d sort(fatalities) n/2 median(fatalities) #e sd(fatalities) sqrt(var(fatalities)) #10 x=runif(99,0,100) # 99 obs suivant une loi uniforme entre 0$ et 100$ x=c(x,1000000) #salaires chez A y=runif(110,0,1000) #salaires chez B sum(x) # masse salariale de A sum(y) # masse salariale de B median(x) median(y) #médiane chez A peut être plus petite que celle chez B même si la masse salaire chez A est #plus grande #11 age=c(2.5,7.5, 12.5,17.5,25,35,45,55,65,75,90) freq.male=c(120,184,44,24,23,50,60,102,167,150,49) freq.fem=c(67,120,22,15,25,22,40,76,104,90,27) #a n.male=sum(freq.male) sum(age*freq.male)/n.male weighted.mean(age,freq.male) #b n.fem=sum(freq.fem) sum(age*freq.fem)/n.fem weighted.mean(age,freq.fem) #c age.all.male=rep(age,freq.male) n.male*c(.25,.5,.75) sort(age.all.male)[c(244,487,730)] quantile(age.all.male,probs=c(.25,.5,.75)) #d age.all.fem=rep(age,freq.fem) n.fem*c(.25,.5,.75) mean(sort(age.all.fem)[c(152,153)]) mean(sort(age.all.fem)[c(304,305)]) mean(sort(age.all.fem)[c(456,457)]) quantile(age.all.fem,probs=c(.25,.5,.75)) #15 pay1992=c(22340,31825,23153,20108,28902,25040,32603,26596,23145,24373,25538,20649,27910, 23570,20937,21982,21858,22342,21808,27145,29664,27463,25324,19237,23550,19378,20355,24743, 24866,32073,21051,32399,22249,18945,24845,21698,23514,25785,24315,21398,18016,22807,25088, 21976,22360,24940,25553,22168,23008,21215) #on enleve la donnee sur Washington D.C. qui n'est pas un etat. #a mean(pay1992) #different de la moyenne globale des Etats-Unis. #b #Il nous faudrait connaitre le poids relatif (population) de chaque etat dans le pays. #c sort(pay1992) n=length(pay1992) n/2 median(pay1992) #d mean(c(22340,31825,23153,20108,28902,25040,32603,26596,23145,24373)) mean(pay1992[1:10]) #17 percent=c(18.2,20.8,16.4,16.6,17.4,19.3,20.5,21.2,19.6,18,21.2,19.4,18.7,24,23.6,18.5,19,20.4, 20.6,20.8,23.1,15.4,18.2,17.6,17.5,19.3,17.6,21.4,14.8,15.8,18.5,21.2,19.6,17.8,20.3,21.2,22.3, 20.3,19.7,23.1,15.6,13.4,14.3,20.2,16.6,13.9,18.4,20.1,20.5,17) #a stem(percent) #b mean(percent) median(percent) table(percent) # mode=21.2 #c var(percent) #d hist(percent,13:24) #e mid.point=(13:23)+.5 freq=c(2,2,3,4,6,7,6,10,5,1,4) length(mid.point) length(freq) sum(freq) length(percent) weighted.mean(mid.point,freq) var(rep(mid.point,freq)) #18 #a mv=function(x){ m=x[1] v=0 n=length(x) for(i in 1:(n-1)){ print(c(m,v)) mp=m m=mp+(x[i+1]-mp)/(i+1) v=(1-1/i)*v+(i+1)*(m-mp)^2 } list(moyenne=m,variance=v) } x=c(3,4,7,2,9,6) mv(x) #b mean(x) var(x) #19 score=c(283,278,276,271,276,277,280,275,280,284,280,277,282,279,285,281,283,278,277,275,277, 279,274,276,270,279,280,278,272,276,281,279,276,281,289,280) quantile(score,probs=c(.1,.9))