Exploratory analysis of CMS Medicare Part D claims data

Read in the data

require(data.table)

## Loading required package: data.table

require(dplyr)

## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:data.table':
## 
##     between, last
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

require(ggplot2)

## Loading required package: ggplot2

df <- tbl_df(fread("pp_ccwid_bn_2012_r.csv", header=T))

## 
Read 0.0% of 21970751 rows
Read 6.2% of 21970751 rows
Read 12.3% of 21970751 rows
Read 18.6% of 21970751 rows
Read 24.7% of 21970751 rows
Read 30.9% of 21970751 rows
Read 37.0% of 21970751 rows
Read 43.2% of 21970751 rows
Read 49.4% of 21970751 rows
Read 55.7% of 21970751 rows
Read 61.9% of 21970751 rows
Read 68.1% of 21970751 rows
Read 74.3% of 21970751 rows
Read 80.5% of 21970751 rows
Read 86.6% of 21970751 rows
Read 92.5% of 21970751 rows
Read 98.3% of 21970751 rows
Read 21970751 rows and 9 (of 9) columns from 1.370 GB file in 00:00:20

Find top 50 drugs by claim count

d <- df %>% group_by(BN) %>% summarize(tot_claim = sum(CLAIM_COUNT)) %>% arrange(desc(tot_claim))
qplot(x=reorder(BN, -tot_claim), y=tot_claim, geom="bar", stat="identity", data=d[1:50,]) + 
  labs(title="Top 50 drugs by claim count", x="Drug", y="Claims") +
  theme_bw() + 
  theme(axis.text.x = element_text(angle=45, hjust=1))

Find the top 50 drugs by gross expense

d <- df %>% group_by(BN) %>%  summarize(tot_cost = sum(GROSS_DRUG_COST_SUM1)/1e6) %>% 
  arrange(desc(tot_cost))
qplot(x=reorder(BN, -tot_cost), y=tot_cost, geom="bar", stat="identity", data=d[1:50,]) +
  labs(title="Top 50 drugs by cost", x="Drug", y="Cost in millions ($)") +
  theme_bw() + 
   theme(axis.text.x = element_text(angle=45, hjust=1))

Find top 50 drug classes by claims count

df <- tbl_df(fread("pp_part_d_with_category.csv", header=T))

## 
Read 2.6% of 10186671 rows
Read 20.1% of 10186671 rows
Read 34.8% of 10186671 rows
Read 50.5% of 10186671 rows
Read 66.6% of 10186671 rows
Read 82.4% of 10186671 rows
Read 98.0% of 10186671 rows
Read 10186671 rows and 10 (of 10) columns from 0.692 GB file in 00:00:09

d <- df %>% group_by(CATEGORY) %>% summarize(tot_claim = sum(CLAIM_COUNT)) %>%
  arrange(desc(tot_claim))

qplot(x=reorder(CATEGORY, -tot_claim), y=tot_claim, geom="bar", stat="identity", data=d[1:50,]) + 
  labs(title="Top 50 drug categories by claim count", x="Drug category", y="Claims") +
  theme_bw() + 
  theme(axis.text.x = element_text(angle=45, hjust=1))

pdf("test.pdf", width=6, height=4)
qplot(x=CATEGORY, y=CLAIM_COUNT, geom="boxplot", data=df %>% filter(CATEGORY %in% d$CATEGORY[1:50]), outlier.size=.8) + 
  labs(title="Top 50 drug categories by claim count", x="Drug category", y="Claims") +
  theme_bw() + 
  theme(axis.text.x = element_text(angle=45, hjust=1))
dev.off()

## pdf 
##   2

Find top 50 drug classes by gross cost

d <- df %>% group_by(CATEGORY) %>% summarize(tot_cost = sum(GROSS_DRUG_COST_SUM1)/1e6) %>%
  arrange(desc(tot_cost))

qplot(x=reorder(CATEGORY, -tot_cost), y=tot_cost, geom="bar", stat="identity", data=d[1:50,]) +
  labs(title="Top 50 drug categories by total cost", x="Drug category", y="Cost in millions ($)") +
  theme_bw() + 
  theme(axis.text.x = element_text(angle=45, hjust=1))

d <- df %>% filter(CATEGORY %in% d$CATEGORY[1:50])

qplot(x=CATEGORY, y=GROSS_DRUG_COST_SUM1, geom="boxplot", data=d, outlier.size=0.8) +
  labs(title="Top 50 drug categories by total cost", x="Drug category", y="Cost in dollars ($)") + 
  theme_bw() + 
  theme(axis.text.x = element_text(angle=45, hjust=1))

d <- df %>% group_by(CATEGORY) %>% summarize(tot_cost = sum(GROSS_DRUG_COST_SUM1)) %>%
  arrange(desc(tot_cost))
qplot(x=tot_cost, y=reorder(CATEGORY, tot_cost), geom="point", data=d) +
  geom_point(shape=5) +
  labs(title="Total cost for each drug category", x="Cost ($)", y="Drug category") +
  theme_bw()

Exploratory analysis of CMS Medicare Part D claims data

by Michael L. Bernauer

06/08/2015

Read in the data

Find top 50 drugs by claim count

Find the top 50 drugs by gross expense

Find top 50 drug classes by claims count

Find top 50 drug classes by gross cost