******************
* LMUP score calculation and imputation of missing data
* JENNIFER HALL
* DECEMBER 2013
******************
* This do file calculates the London Measure of Unplanned Pregnancy score, checks missing values and imputes the mean item score to replace the missing values
* variable names are as follows:
* LMUP_q1 - contraception question
* LMUP_q2 - timing question
* LMUP_q3 - planning question
* LMUP_q4 - desire question
* LMUP_q5 - partner discussion question
* LMUP_q6 - pre-pregnnacy preparation question
* some of the commands used are not in the standard stata package and may need to be downloaded e.g nmis
* it assumes that each variable LMUP_q1 - 6 is already scored 0,1,2
******** calculate LMUP scores *************
* calculate the lmup score, using egen means missing values are counted as 0 so all cases will have a score
egen byte LMUP_score = rowtotal(LMUP_q1 LMUP_q2 LMUP_q3 LMUP_q4 LMUP_q5 LMUP_q6)
summarize LMUP_score
label variable LMUP_score "LMUP score no imputation"
summarize LMUP_score, detail
tab LMUP_score, m
******** check for missing data *************
* check missing data for each question 1-5
foreach var of varlist LMUP_q1 LMUP_q2 LMUP_q3 LMUP_q4 LMUP_q5 LMUP_q6 {
tab `var', m
}
* check whether any woman has missing data for more than 2 questions
mvpatterns LMUP_q1 LMUP_q2 LMUP_q3 LMUP_q4 LMUP_q5 LMUP_q6
******** impute missing data using the mean of the items scored *************
* impute the missing values for questions 1 - 6
* Calculate the total of the LMUP items that have been answered - done in LMUP_total_score
* Calculate the mean of the items answered
* first create a variable that counts the number of items answered, do this by creating variables that count the number missing in 1-5 and 6 then summing them and subtracting the number missing from 6 to get the total missing
* count number missing q1-5
egen nmis_lmup = rmiss2(LMUP_q1 LMUP_q2 LMUP_q3 LMUP_q4 LMUP_q5 LMUP_q6)
label variable nmis_lmup "Number of missing answers on LMUP questions"
tab nmis_lmup
* count total number answered
gen lmup_total_answered=6-nmis_lmup
label variable lmup_total_answered "Total number of LMUP questions answered"
tab lmup_total_answered
tab lmup_total_answered nmis_lmup
* any women with more than 2 missing answers must be excluded
count
drop if nmis_lmup >2
count
* calculate the mean of the items answered
gen lmup_mean_score=LMUP_score/lmup_total_answered
label variable lmup_mean_score "Mean LMUP item score"
summarize lmup_mean_score
* Calculate the imputation value, which is the mean of the items (lmup_mean_score) multiplied by the number of missing items (nmis_lmup_total).
gen imputation_value = lmup_mean_score*nmis_lmup
label variable imputation_value "Imputation value"
* Add the imputation value (imputation_value) to the total of the answered items (LMUP_total_score) and round up/down.
gen lmup_imp = LMUP_score +imputation_value
replace lmup_imp=round(lmup_imp)
label variable lmup_imp "Total LMUP score with imputation"
tab lmup_imp LMUP_score
* Summarize the LMUP scores containing the imputed missing values - this is the variable to be used for analysis
summarize lmup_imp, detail
*** depending on the level and pattern of missing data you have you may wish to look at other methods of handling missing data and how this affects the LMUP score
* calculate LMUP summary statistics using casewise deletion, i.e. women with any missing data are excluded
summarize LMUP_score if nmis_lmup==0, detail
* don't forget to save!