\documentclass[a4paper,12pt,titlepage]{article}
\usepackage{Sweave}
\usepackage{setspace}
\onehalfspacing
\begin{document}
<>=
require(ws.data)
library(backtest)
data(secref)
data(yearly)
@
<>=
#####################################
##
## Functions
##
#####################################
grab.data <- function(symbols, years){
## function for getting data for multiple ids and multiple years
## symbols - character vector
## years - numeric vector
## all calculations are done using the ws.data package
## loads the package if it is not loaded already
require(ws.data)
##initialize an empty data.frame
grabbed <- data.frame()
for( i in years ){
## iterate through each year in the year array,
## getting the subset of each year that contains symbols
file.name <- paste("daily", i, sep = ".")
data(list = file.name)
##combine all the data by rows
grabbed <- rbind(grabbed, subset(eval(parse(text=file.name)), symbol %in% symbols))
}
##return the desired data.frame, but do not print it
invisible(grabbed)
}
eom <- function(d){
## Takes a vector of dates and figures out all of last days of the
## month correspoding to each date. Allows us to figure out all
## the dates that we need from an input dataframe instead of
## calculating them ourselves.
stopifnot(is(d, "Date"))
## Trick is to take the date, change it to the first of that
## month, and then add 40 days. The resulting date is guaranteed
## to be in the next month. Then create the first day of that next
## month. Then subtract 1 day. That day is guaranteed to be the
## last day of the month you started in. Hacky but effective.
next.month <- as.Date(paste(format(d, "%Y-%m"), "01", sep =
"-")) + 40
end.of.month <- as.Date(paste(format(next.month, "%Y-%m"), "01",
sep = "-")) - 1
invisible(end.of.month)
}
calc.returns <- function(the.data,
date = NULL,
d.before = 30,
d.after = 0){
## Function for calculating returns over a given window for all
## stocks in dataframe the.data. date can be a character vector or it
## will to default to all the end-of-month days for all dates in
## the.data. d.before and d.after are calendar days.
## Need to ensure that all these variables are in the dataframe the.data
stopifnot(all(c("symbol","v.date", "tret") %in% names(the.data)))
## Key step is that, instead of requiring the user to pass in a
## long list of dates, we are going to look in the.data, grab all the
## dates that are there, figure out the month end for all them,
## pick out the unique month ends, and then loop through
## those. Users can still pass in a date themselves. We want to
## retain that for testing.
if(is.null(date)){
## strip out all duplicate dates
anchors <- unique(eom(the.data$v.date))
} else {
stopifnot(is.character(date))
anchors <- as.Date(date)
}
## Now that we have our list of dates, we initialize the data
## frame that is going to store our results and then loop through
## all the dates in anchors, even if there is just one. We also
## need to name the output variable on the basis of the
## arguments. Again, we only allow for arguments in terms of
## calendar days, so the d suffix makes sense.
out.name <- paste("ret", d.before, d.after, "d", sep = ".")
res <- NULL
for(i in 1:length(anchors)){
anchor <- anchors[i]
## Once we have a single date from anchors, we figure out what
## range we want around it. Later, we may think of ways to
## make this exact rather than approximate.
start.date <- anchor - d.before
end.date <- anchor + d.after
## Key trick is to throw away all the data that we don't
## need. aggregate works well as long as you are going to
## calculate all the data that is passed in, so we ensure that
## only such data is included.
data.sub <- the.data[start.date < the.data$v.date & the.data$v.date <= end.date,]
## One of the biggest bothers will be dealing with
## endpoints. What happens in the first day when we do not
## have 30 days of data? We will worry about such details (and
## they do matter) later. But, for now, we should just skip
## any date which produces a subset with zero rows.
if(nrow(data.sub) == 0){next()}
## Need to be careful about how you pass things around for two
## reasons. First, aggregate gets the names it uses as outputs
## from the inputs, so I set the names here. Second, as the
## dataframes get bigger, you want to think about how much
## data you are taking from step to step.
sub.tret <- data.sub["tret"]
names(sub.tret) <- out.name
this.res <- aggregate(sub.tret,
by = data.sub["symbol"],
function(x){ prod(1 + x) -1 })
## We need to add in a column for the specific date in order
## to organize the output date frame.
this.res$v.date <- anchor
res <- rbind(this.res, res)
}
## Having gone through all the dates and rbind'd things together,
## we drop any rows with NA values in our return column.
res <- res[!is.na(res[[out.name]]),]
invisible(res)
}
calc.industry.returns <- function(x, ret.var, date.var = "v.date"){
## a helper function to calculate average returns on a specific date
## within an industry. Industry membership is polled from the secref
## data set.
## terminate with error if the columns of interest do not appear x
stopifnot(all(c("symbol", date.var, ret.var) %in% names(x)))
## all calculations require the ws.data package. load if not already loaded
require(ws.data)
## load the securities reference data to access industry information
data(secref)
## add industry data to x by merging on symbol
merged = merge(x, secref[c("m.ind", "symbol")])
## calcuate the average returns by industry
a <- aggregate(x = merged[ret.var] , by = list( "v.date" = merged[[date.var]], "m.ind" = merged$m.ind), FUN = "mean")
invisible(a)
}
calc.52wh <- function(x, date = NULL){
## Function for calculating the 52-week high over a given window for all
## stocks in dataframe x. date can be a character vector or it
## will to default to all the end-of-month days for dates in
## x.
## Need to ensure that all these variables are in the dataframe x
## that is passed in to the function. If they are not, bad things
## will happen. In general, you want to test for any variables
## that you later use.
stopifnot(all(c("symbol", "v.date", "tret") %in% names(x)))
## Key step is that, instead of requiring the user to pass in a
## long list of date, we are going to look in x, grab all the
## dates that are there, figure out the month end for all them,
## pick out the unique month ends, and then loop through
## those. Users can still pass in a date themselves. We want you
## retain that for testing.
if(is.null(date)){
anchors <- unique(eom(x$v.date))
} else {
stopifnot(is.character(date))
anchors <- as.Date(date)
}
## Now that we have our list of dates, we initialize the data
## frame that is going to store our results and then loop through
## all the dates in anchors, even if there is just one. We also
## need to name the output variable on the basis of the
## arguments. Again, we only allow for arguments in terms of
## calendar days, so the d suffix makes sense.
res <- NULL
for(i in 1:length(anchors)){
day <- anchors[i]
## Since we want the 52-week high, we subtract 365 days for
## start.date. End.date is just _day_ (the zero is added for
## clarification).
start.date <- day - 365
end.date <- day + 0
## Key trick is to throw away all the data that we don't
## need. aggregate works well as long as you are going to
## calculate all the data that is passed in, so we ensure that
## only such data is included.
x.sub <- x[start.date < x$v.date & x$v.date <= end.date,]
## One of the biggest bothers will be dealing with
## endpoints. What happens in the first day when we do not
## have 30 days of data? We will worry about such details (and
## they do matter) later. But, for now, we should just skip
## any date which produces a subset with zero rows.
if(nrow(x.sub) == 0) next()
## Need to be careful about how you pass things around for two
## reasons. First, aggregate gets the names it uses as outputs
## from the inputs, so I set the names here. Second, as the
## dataframes get bigger, you want to think about how much
## data you are taking from step to step.
sub.price <- x.sub["price"]
names(sub.price) <- "week.52.high"
## Obviously, everything else was a pre-amble to this one crucial
## command. You should understand aggregate well. Read the help
## page. Study the examples. Read the help pages for all the
## functions mentioned under See Also, especially tapply. Would
## using tapply be faster here? Test it.
this.res <- aggregate(sub.price,
by = x.sub["symbol"],
function(x){max(x)})
## We need to add in a column for the specific date in order
## to organize the output date frame. How does the result look
## without this this? The date should not be the last of the
## month, but the last TRADING DAY of the month
sub.dates <- sort(x.sub$v.date, dec = TRUE)
this.res$v.date <- sub.dates[1]
res <- rbind(this.res, res)
}
## Having gone through all the dates and rbind'd things together,
## we just return.
invisible(res)
}
calc.52wh.proximity <- function(x, date = NULL){
## Function for calculating the 52-week high over a given window for all
## stocks in dataframe x. date can be a character vector or it
## will to default to all the end-of-month days for dates in
## x.
## Need to ensure that all these variables are in the dataframe x
## that is passed in to the function. If they are not, bad things
## will happen. In general, you want to test for any variables
## that you later use.
stopifnot(all(c("symbol", "v.date", "tret") %in% names(x)))
## Key step is that, instead of requiring the user to pass in a
## long list of date, we are going to look in x, grab all the
## dates that are there, figure out the month end for all them,
## pick out the unique month ends, and then loop through
## those. Users can still pass in a date themselves. We want you
## retain that for testing.
if(is.null(date)){
anchors <- unique(eom(x$v.date))
} else {
stopifnot(is.character(date))
anchors <- as.Date(date)
}
## Now that we have our list of dates, we initialize the data
## frame that is going to store our results and then loop through
## all the dates in anchors, even if there is just one. We also
## need to name the output variable on the basis of the
## arguments. Again, we only allow for arguments in terms of
## calendar days, so the d suffix makes sense.
res <- NULL
for(i in 1:length(anchors)){
day <- anchors[i]
## Since we want the 52-week high, we subtract 365 days for
## start.date. End.date is just _day_ (the zero is added for
## clarification).
start.date <- day - 365
end.date <- day + 0
## throw away all the data that we don't
x.sub <- x[start.date < x$v.date & x$v.date <= end.date, c("v.date","price", "symbol")]
## skip any date which produces a subset with zero rows.
if(nrow(x.sub) == 0) next()
## make a new data frame to store the resulting info
prox <- data.frame(cbind(symbol=unique(x.sub$symbol), proximity.52w.high = NA))
## proximity to the 52 week high is a numeric value (the number of days since the 52 week high)
prox$proximity.52w.high <- as.numeric(prox$proximity.52w.high)
for( symbol in unique(x.sub$symbol) ){
sub.symbol <- x.sub[x.sub$symbol == symbol,]
high.price <- max(sub.symbol$price)
## if there are two dates on which the 52-week high is registered, take the most recent
high.date <- max(sub.symbol[sub.symbol$price == high.price, "v.date"])
prox[prox$symbol == symbol,names(prox)=="proximity.52w.high"] <- as.numeric(end.date - high.date)
}
## We now have a table for all symbols on the date _day_ and
## their corresponding proximitis. Now we rbind it to form our
## giant table. We need to add in a column for the specific
## date in order to organize the output date frame. The date
## should not be the last of the month, but the last TRADING
## DAY of the month.
sub.dates <- sort(x.sub$v.date, dec = TRUE)
prox$v.date <- sub.dates[1]
res <- rbind(prox, res)
}
## We are measuring recency, and smaller numbers are closer. So we invert the recency row
res$proximity.52w.high <- -res$proximity.52w.high
invisible(res)
}
calc.52w.ratio <- function(x) {
## function to calculate the \frac{P_{i,t-1}}{high_{i,t-1}}
## x is expected to be the full data.frame as obtained with grab.data
highs <- calc.52wh(x)
merged = merge(highs, x[c("price", "symbol", "v.date")])
merged$ratio = merged$price / merged$week.52.high
invisible(merged)
}
make.industry.portfolio <- function(x, ret.var = "ret.182.0.d", date.var="v.date",
pct.keep = .333){
stopifnot(all(date.var %in% names(x)))
## we calculate the returns by industry so that we can rank our
## industries in order to form portfolios new data frame object is
## x.ind
x.ind <- calc.industry.returns(calc.returns(x, d.before = 182, d.after = 0), ret.var, date.var=date.var)
## now we remove all rows where the industry return for that month
## cannot be calculated (NA)
x.ind <- x.ind[!is.na(x.ind[[ret.var]]),]
## we form new portfolios for each month, so it is important to have
## a list of all dates included in our data
unique.dates <- unique(x.ind[[date.var]])
## make a blank data frame to build upon
portfolios <- data.frame()
for(date in unique.dates){
## extract the rows that pertain to a single date
sample <- x.ind[x.ind$v.date == date,]
## order the rows so that we can use head/tail for subsetting
sample <- sample[order(sample[[ret.var]], decreasing=TRUE),]
## the number of "winners" and "losers" are pct.keep times the number of observations
num <- round(nrow(sample) * pct.keep)
## after sorting by returns, the winners are the top, the losers
## are the bottom, and we will call everything in between neutral
winners <- head(sample,num)
losers <- tail(sample,num)
neutral <- sample[(num+1):(nrow(sample)-num),]
## now we add a column with a label
winners$industry.rank <- "winner"
losers$industry.rank <- "loser"
neutral$industry.rank <- "neutral"
## and add the rows back into our final data set. when we are done,
## we have simply added a column to x.ind with a
## winner/loser/neutral label
portfolios <- rbind(portfolios, winners)
portfolios <- rbind(portfolios, neutral)
portfolios <- rbind(portfolios, losers)
}
names(portfolios)[names(portfolios)==ret.var] <- "ret.6m.0.ind"
invisible(portfolios)
}
make.52wh.portfolio <- function(x, sort.var="ratio",
date.var="v.date", pct.keep = .333){
stopifnot(all(date.var %in% names(x)))
## we calculate the 52week high ratios so that we can rank our
## stocks in order to form portfolios. new data frame object is
## x.ratio
x.ratio <- calc.52w.ratio(x)
## now we remove all rows where the 52-week high for that month
## cannot be calculated (NA)
x.ratio <- x.ratio[!is.na(x.ratio[[sort.var]]),]
## we form new portfolios for each month, so it is important to have
## a list of all dates included in our data
unique.dates <- unique(x.ratio[[date.var]])
## make a blank data frame to build upon
portfolios <- data.frame()
for(date in unique.dates){
## extract the rows that pertain to a single date
sample <- x.ratio[x.ratio$v.date == date,]
## order the rows so that we can use head/tail for subsetting
sample <- sample[order(sample[[sort.var]], decreasing=TRUE),]
## the number of "winners" and "losers" are pct.keep times the number of observations
num <- round(nrow(sample) * pct.keep)
## after sorting by returns, the winners are the top, the losers
## are the bottom, and we will call everything in between neutral
winners <- head(sample,num)
losers <- tail(sample,num)
neutral <- sample[(num+1):(nrow(sample)-num),]
## now we add a column with a label
winners$ratio.52w.rank <- "winner"
losers$ratio.52w.rank <- "loser"
neutral$ratio.52w.rank <- "neutral"
## and add the rows back into our final data set. when we are done,
## we have simply added a column to x.ratio with a
## winner/loser/neutral label
portfolios <- rbind(portfolios, winners)
portfolios <- rbind(portfolios, neutral)
portfolios <- rbind(portfolios, losers)
}
invisible(portfolios)
}
pairwise.industry.portfolio <- function(big.table, ind.returns, rank.52="winner", date.var="v.date"){
## takes the big table returned by ultra.fun, and industry returns by date
## loops through the winner, loser, and
## take subset of big.table corresponding to only stocks we are
## concerned with. Since this is a pairwise test of
## industry-momentum within 52-week high portfolios, we only care
## about the rank.52 portfolio
sub.table <- big.table[big.table$ratio.52w.rank==rank.52,]
dates <- unique(sub.table[[date.var]])
ind.ranks <- data.frame()
for(date in dates){
##all rows from the big.table that pertain to this date.
x <- sub.table[sub.table[[date.var]]==date,]
ind.list <- unique(x$m.ind)
returns <- x[match(ind.list, x$m.ind), c("ret.6m.0.ind", "m.ind", "v.date")]
returns <- returns[order(returns$ret.6m.0.ind, decreasing=TRUE),]
num <- round(length(ind.list) * .333)
## after sorting by returns, the winners are the top, the losers
## are the bottom, and we will call everything in between neutral
winners <- head(returns,num)
losers <- tail(returns,num)
neutral <- returns[(num+1):(nrow(returns)-num),]
## now we add a column with a label
winners$pw.52.ind.rank <- "winner"
losers$pw.52.ind.rank <- "loser"
neutral$pw.52.ind.rank <- "neutral"
## and add the rows back into our final data set. when we are done,
## we have simply added a column to x.ratio with a
## winner/loser/neutral label
ind.ranks <- rbind(ind.ranks, winners)
ind.ranks <- rbind(ind.ranks, neutral)
ind.ranks <- rbind(ind.ranks, losers)
}
ind.ranks$ret.6m.0.ind <- NULL
pw <- merge(x=sub.table,y=ind.ranks, by=c("v.date", "m.ind"), all.x=TRUE)
pw <- pw[!is.na(pw$pw.52.ind.rank),]
invisible(pw)
}
month.to.end.of.month <- function(y.m){
## uses the same trick as eom to convert a date (for use with tapply)
next.month <- as.Date(paste(y.m, "01", sep ="-")) + 40
end.of.month <- as.Date(paste(format(next.month, "%Y-%m"), "01", sep = "-")) - 1
return(end.of.month)
}
#####################################
##
## Data generation / cleanup
##
#####################################
require(ws.data)
require(backtest)
data(secref)
data(yearly)
## load the full grabbed data from memory.
x <- grab.data(symbol = secref$symbol, years = 1998:2007)
## save(x, file = "grabbed.Rdata")
## f <- file.choose() ## when prompted, grabbed.Rdata
## load(f)
## here we remove the erroneous data. Anything with a return of 200%
## for two consecutive trading days is the product of a data error or
## some strange coorporate action. These returns drive our results
## and create meaningless statistics.
clean <- subset(x, tret < 2)
## calculate the forward returns. this "calculates" returns
## even if there isn't sufficient future data. as a result,
## we must remember to remove rows from our final table that
## correspond to the last 6 months.
forward <- calc.returns(clean, d.before = 0, d.after = 182)
## save(forward, file = "forward.Rdata")
## f <- file.choose() ##forward.Rdata
## load(f)
## calculate the backward returns. this "calculates" returns
## even if there isn't sufficient past data. as a result,
## we must remember to remove rows from our final table that
## correspond to the last 6 months.
backward <- calc.returns(clean, d.before = 182, d.after = 0)
## save(backward, file = "backward.Rdata")
## f <- file.choose() ##backwardward.Rdata
## load(f)
## trim the data. only include data for stocks in the top 1500 largest
## cap during a given year.
z <- subset(clean, paste(clean$symbol, format(clean$v.date, "%Y")) %in% paste(yearly$symbol, yearly$year)[yearly$top.1500])
## calculate the portfolios of winner, loser, neutral for the 52-week high strategy
ratios <- make.52wh.portfolio(z)
## save(ratios, file = "ratios.Rdata")
## f <- file.choose() ##ratios.Rdata
## load(f)
## calculate the recencies of the 52-week high, in days
recency <- calc.52wh.proximity(z)
## save(recency, file = "recency.Rdata")
## f <- file.choose() ##recency.Rdata
## load(f)
## combine into one table. To deal with end of month issues where the
## data contains the last trading day of hthe month, but our eom
## function calculates the last calendar day of the month, we format
## each date into %Y-%m, dropping the day.
forward$v.date <- format(forward$v.date, "%Y-%m")
ratios$v.date <- format(ratios$v.date, "%Y-%m")
backward$v.date <- format(backward$v.date, "%Y-%m")
recency$v.date <- format(recency$v.date, "%Y-%m")
##we save the %Y-%m formatted date as a column for later
ratios$month <- ratios$v.date
## merge the rows into a large table
merged <- merge(x = ratios, y = forward, by = c("v.date", "symbol"))
merged <- merge(x = merged, y = backward, by = c("v.date", "symbol"))
merged <- merge(x = merged, y = recency, by = c("v.date", "symbol"))
## add industry data to merged by merging on symbol
merged <- merge(merged, secref[c("m.ind", "symbol")])
merged$v.date <- month.to.end.of.month(merged$v.date)
## now calculate returns by industry, generate
## winners/losers/neutral portfolios by date
ind.portfolios <- make.industry.portfolio(z)
## merge portfolio membership into final table
merged <- merge(x=merged,y=ind.portfolios, by = c("v.date", "m.ind"), all.x=TRUE)
## This code converts recencies into set categories, rather than a
## straight sort by percentage. We do not use this in our paper, but
## it is a logical alternative and may yield interesting results
## merged$recency <- "intermediate(3to6months)"
## merged[merged$proximity.52w.high > 180, names(merged)=="recency"] <- "old(>6months)"
## merged[merged$proximity.52w.high < 90, names(merged)=="recency"] <- "close(<3months)"
## now we remove the rows generated without sufficient data. These are
## the rows < 1 year from when the data was initially collected (not
## sufficient data to calculate 52-week high statistics), and all rows
## < 6 months from the last date (not sufficient data to calculate
## 6-month forward returns)
big.table <- subset(merged, (merged$v.date > (min(merged$v.date) + 365)) &
(merged$v.date < (max(merged$v.date) - 182))
)
## save(big.table, file = "bigtable.Rdata")
## f <- file.choose()##bigtable.Rdata
## load(f)
#####################################
##
## Analyze data, generate stats / figures
##
#####################################
num.stocks <- length(unique(big.table$symbol))
begin.date <- min(big.table$v.date)
end.date <- max(big.table$v.date)
## JT refers to Jegadeesh and Titman, MG to Moskowitz and Grinblatt, and GH to George and Hwang.
## EXT refers to our extension of their research, ranking by proximity to the 52-week
## high date (in number of days)
## bt stands for backtest object
## stats stands for the data.frame returned by summaryStats(backtest_class)
## run backtests with 3 buckets, high bucket is winners, low bucket is losers
btJT <- backtest(big.table, id.var = "symbol", date.var = "v.date", in.var = "ret.182.0.d", ret.var = "ret.0.182.d", natural = TRUE, buckets = 3)
btMG <- backtest(big.table, id.var = "symbol", date.var = "v.date", in.var = "industry.rank", ret.var = "ret.0.182.d", natural = TRUE, buckets = 3)
btGH <- backtest(big.table, id.var = "symbol", date.var = "v.date", in.var = "ratio.52w.rank", ret.var = "ret.0.182.d", natural = TRUE, buckets = 3)
btEXT <- backtest(big.table, id.var = "symbol", date.var = "v.date", in.var = "proximity.52w.high", ret.var = "ret.0.182.d", natural = TRUE, buckets = 3)
## run multiple invar backtests with 3 buckets, high bucket is winners, low bucket is losers
btJT.MG.GH <- backtest(big.table, id.var = "symbol", date.var = "v.date", in.var = c("ret.182.0.d","ret.6m.0.ind","ratio"), ret.var = "ret.0.182.d", natural = TRUE, buckets = 3)
btEXT.GH <- backtest(big.table, id.var = "symbol", date.var = "v.date", in.var = c("proximity.52w.high","ratio"), ret.var = "ret.0.182.d", natural = TRUE, buckets = 3)
## multiply all stats by 2 to convert from 6-month to yearly
statsJT <- summaryStats(btJT)
JT.mean.spread <- statsJT[nrow(statsJT),names(statsJT) == "spread"]
JT.avg.loser.return <- statsJT[nrow(statsJT),names(statsJT) == "low"]
JT.avg.winner.return <- statsJT[nrow(statsJT),names(statsJT) == "high"]
statsMG <- summaryStats(btMG)
MG.mean.spread <- statsMG[nrow(statsMG),names(statsMG) == "spread"]
MG.avg.loser.return <- statsMG[nrow(statsMG),names(statsMG) == "low"]
MG.avg.winner.return <- statsMG[nrow(statsMG),names(statsMG) == "high"]
statsGH <- summaryStats(btGH)
GH.mean.spread <- statsGH[nrow(statsGH),names(statsGH) == "spread"]
GH.avg.loser.return <- statsGH[nrow(statsGH),names(statsGH) == "low"]
GH.avg.winner.return <- statsGH[nrow(statsGH),names(statsGH) == "high"]
## for our extension.
statsEXT <- summaryStats(btEXT)
EXT.mean.spread <- statsEXT[nrow(statsEXT),names(statsEXT) == "spread"]
EXT.avg.loser.return <- statsEXT[nrow(statsEXT),names(statsEXT) == "low"]
EXT.avg.neutral.return <- statsEXT[nrow(statsEXT),names(statsEXT) == 2]
EXT.avg.winner.return <- statsEXT[nrow(statsEXT),names(statsEXT) == "high"]
## take subsets of each table to run pairwise backtests
MG.winners <- big.table[big.table$industry.rank == "winner",]
MG.losers <- big.table[big.table$industry.rank == "loser",]
MG.neutral <- big.table[big.table$industry.rank == "neutral",]
GH.winners <- big.table[big.table$ratio.52w.rank == "winner",]
GH.losers <- big.table[big.table$ratio.52w.rank == "loser",]
GH.neutral <- big.table[big.table$ratio.52w.rank == "neutral",]
## pairwise backtests of industry within 52-week high
pw.w <- pairwise.industry.portfolio(big.table, rank.52="winner")
pw.n <- pairwise.industry.portfolio(big.table, rank.52="neutral")
pw.l <- pairwise.industry.portfolio(big.table, rank.52="loser")
pw.GH.MG.w <- backtest(pw.w, id.var = "symbol", date.var = "v.date", in.var = "pw.52.ind.rank", ret.var = "ret.0.182.d", natural = TRUE, buckets = 3)
pw.GH.MG.n <- backtest(pw.n, id.var = "symbol", date.var = "v.date", in.var = "pw.52.ind.rank", ret.var = "ret.0.182.d", natural = TRUE, buckets = 3)
pw.GH.MG.l <- backtest(pw.l, id.var = "symbol", date.var = "v.date", in.var = "pw.52.ind.rank", ret.var = "ret.0.182.d", natural = TRUE, buckets = 3)
## info for table
stats.GH.MG.w <- summaryStats(pw.GH.MG.w)
GH.MG.w.spread <- stats.GH.MG.w[nrow(stats.GH.MG.w),names(stats.GH.MG.w) == "spread"]
GH.MG.w.loser.ret <- stats.GH.MG.w[nrow(stats.GH.MG.w),names(stats.GH.MG.w) == "low"]
GH.MG.w.winner.ret <- stats.GH.MG.w[nrow(stats.GH.MG.w),names(stats.GH.MG.w) == "high"]
stats.GH.MG.n <- summaryStats(pw.GH.MG.n)
GH.MG.n.spread <- stats.GH.MG.n[nrow(stats.GH.MG.n),names(stats.GH.MG.n) == "spread"]
GH.MG.n.loser.ret <- stats.GH.MG.n[nrow(stats.GH.MG.n),names(stats.GH.MG.n) == "low"]
GH.MG.n.winner.ret <- stats.GH.MG.n[nrow(stats.GH.MG.n),names(stats.GH.MG.n) == "high"]
stats.GH.MG.l <- summaryStats(pw.GH.MG.l)
GH.MG.l.spread <- stats.GH.MG.l[nrow(stats.GH.MG.l),names(stats.GH.MG.l) == "spread"]
GH.MG.l.loser.ret <- stats.GH.MG.l[nrow(stats.GH.MG.l),names(stats.GH.MG.l) == "low"]
GH.MG.l.winner.ret <- stats.GH.MG.l[nrow(stats.GH.MG.l),names(stats.GH.MG.l) == "high"]
## pairwise backtest of 52-week high within industry
pw.MG.GH.w <- backtest(MG.winners, id.var = "symbol", date.var = "v.date", in.var = "ratio.52w.rank", ret.var = "ret.0.182.d", natural = TRUE, buckets = 3)
pw.MG.GH.n <- backtest(MG.neutral, id.var = "symbol", date.var = "v.date", in.var = "ratio.52w.rank", ret.var = "ret.0.182.d", natural = TRUE, buckets = 3)
pw.MG.GH.l <- backtest(MG.losers, id.var = "symbol", date.var = "v.date", in.var = "ratio.52w.rank", ret.var = "ret.0.182.d", natural = TRUE, buckets = 3)
## info for table
stats.MG.GH.w <- summaryStats(pw.MG.GH.w)
MG.GH.w.spread <- stats.MG.GH.w[nrow(stats.MG.GH.w),names(stats.MG.GH.w) == "spread"]
MG.GH.w.loser.ret <- stats.MG.GH.w[nrow(stats.MG.GH.w),names(stats.MG.GH.w) == "low"]
MG.GH.w.winner.ret <- stats.MG.GH.w[nrow(stats.MG.GH.w),names(stats.MG.GH.w) == "high"]
stats.MG.GH.n <- summaryStats(pw.MG.GH.n)
MG.GH.n.spread <- stats.MG.GH.n[nrow(stats.MG.GH.n),names(stats.MG.GH.n) == "spread"]
MG.GH.n.loser.ret <- stats.MG.GH.n[nrow(stats.MG.GH.n),names(stats.MG.GH.n) == "low"]
MG.GH.n.winner.ret <- stats.MG.GH.n[nrow(stats.MG.GH.n),names(stats.MG.GH.n) == "high"]
stats.MG.GH.l <- summaryStats(pw.MG.GH.l)
MG.GH.l.spread <- stats.MG.GH.l[nrow(stats.MG.GH.l),names(stats.MG.GH.l) == "spread"]
MG.GH.l.loser.ret <- stats.MG.GH.l[nrow(stats.MG.GH.l),names(stats.MG.GH.l) == "low"]
MG.GH.l.winner.ret <- stats.MG.GH.l[nrow(stats.MG.GH.l),names(stats.MG.GH.l) == "high"]
## for printing pretty graphs
date.index <- seq(20,110,10)
date.labels <- as.character(1998:2007)
@
%% Title to change once we have analyzed our results
\title{52 Week High and Momentum Investing: A Partial Replication of George and Hwang's Results
\footnotemark[1]\footnotetext[1]{ For access to the data used in
this paper, contact David Kane of Kane Capital Management. A very
special thanks to David Kane, David Phillips, and the rest of
Economics 18 during the 2009 winter study at Williams College. The code which replicates the results in this paper is written in R \cite{aRticle} and is available from the authors.} }
\author{Bill Jannen and Vincent Pham}
\maketitle
\begin{abstract}
We replicate the momentum strategies in George and Hwang (2004)
using large cap US stocks from 1998 through 2007. We examine their
findings that the ratio of current stock price to 52-week high is
a useful tool in forecasting future returns over 6 month horizons
and receive discouraging results. We test a new strategy, ranking
based on the recency of the 52-week high, and find it to be a much
stronger indicator of portfolio performance. Portfolios formed using
of 52-week high recency have an average return spread of
\Sexpr{formatC(100*(EXT.mean.spread) / 6, digits=3,format="f")}$\%$
per month over 6 months, outperforming portfolios formed on the
basis of 52-week high ratio by \Sexpr{formatC((100*(EXT.mean.spread - GH.mean.spread) / 6), digits=3,format="f")}$\%$
per month on average.
\end{abstract}
\section*{ Introduction }
We compare one new and three previously discussed momentum-based portfolio formation strategies over six-month horizons. Jagadeesh and Titman (1993) (hereafter JT) investigates the effect of past returns on the future prices of individual stocks over short-term horizons \cite{IndividualMomentum}. Moskowitz and Grinblatt (1999) (hereafter MG) documents the effect of industry trends on individual stock returns \cite{industrymomentum}. George and Hwang (2004) (hereafter GH) shows that the ratio of current price to the 52-week high price is largely responsible for momentum investing profits. In doing so, GH performs pairwise comparisons of different momentum strategies, including those proposed in JT and MG. We partially replicate the pairwise comparisons from GH, focusing on 6-month future returns.
JT's methodology ranks stocks according to their past 6-month individual returns. Winner and loser portfolios are formed from the top and bottom deciles. The strategy is then to short the losers and go long the winners, with the winner-loser spread generating profits. As such, it is independent of overall market performance. MG's instead ranks industries based on their past 6-month value-weighted returns. It then forms portfolios that include all stocks in the top (bottom) $30\%$ of the winner (loser) industries. GH's strategy ranks stocks based on the proximity of their current price to their 52-week high price. Stocks are ordered according to the ratio $\frac{P_{i,t-1}}{high_{i,t-1}}$ where $P_{i,t-1}$ is the price of stock $i$ at the end of month $t-1$ and $high_{i,t-1}$ is the highest price of stock $i$ during the 12-month period ending on the last day of month $t-1$. We standardize these three portfolio formation strategies as in GH, forming winner and loser portfolios of comparable size and performing comparisons using identical data.
Specifically, we partially replicate the methods of Tables $I$ and $IV$ from George and Hwang (2004). They can be found on pages $2148$ and $2153$ respectively. GH Table I compares average monthly returns for three momentum investing strategies: JT's individual stock momentum, MG's industry momentum, and GH's 52-week high ratio. It reports nearly identical monthly return spreads for all three strategies when independently employed; JT portfolios net a $0.48\%$ monthly spread with both MG and the 52-week high ratio returning $0.45\%$ monthly. GH Table IV documents the pairwise comparisons of MG's industry momentum and GH's 52-week high ratio. Excluding January returns, when the 52-week high ratio is applied within industry momentum groupings, profits are two to four times as large as profits from industry momentum alone. Yet when January returns are included, both using GH's strategy within MG groups and using MG's strategy within GH groups are almost equally profitable \cite{52weekhigh}.
%fix this citation: suppress
In our replication, we use different data than that used in GH. While GH uses CRSP data from 1963 to 2001, we restrict our focus to the $1,500$ largest cap US stocks yearly, spanning the ten years from 1998 to 2007. We form portfolios based on 6-month past returns and 52-week high measures, and evaluate performance through 6-month future returns; so our universe is further limited to \Sexpr{num.stocks} unique securities traded between \Sexpr{begin.date} and \Sexpr{end.date}. The modernity of our data offers a current perspective on momentum investing strategies, and we hope to achieve real-world applicability by using only large cap US stocks. Other authors have similarly applied GH's strategy to unique data sets with favorable results. Marshall and Cahan (2005) applies 52-week high momentum investing to Australian stocks, achieving an average monthly return of $2.14\%$ \cite{Australianmomentum}.
%fix this citation: suppress
%These next two sections will likely change when the data is cleaned. Make sure you update it.
Comparisons of the JT, MG, and GH strategies show significant differences in performance. We find that the ratio of a current stock price to its 52-week high is a significantly weaker predictor of future returns than either industry or individual stock momentum. Of the three strategies, MG's industry momentum is the strongest, with an average \Sexpr{formatC(100*MG.mean.spread / 6,digits=3,format="f")}$\%$ return per month over the next 6 months. JT's individual stock momentum performs second best with an average spread of \Sexpr{formatC(100*JT.mean.spread / 6,digits=3,format="f")}$\%$ per month over 6 months. GH is by far the weakest, with an average monthly spread of \Sexpr{formatC(100 * GH.mean.spread,digits=3,format="f")}$\%$, less than one fourth of JT and one seventh of MG.
%fix. rewrite later.
We analyze a new portfolio strategy that ranks stocks by the recency of the date on which their 52-week high was recorded, achieving strong results. We sort stocks based on 52-week high recency and form three portfolios: close, medium, and far. Close outperforms medium, and medium outperforms far. The average spread between the close and far portfolios is \Sexpr{formatC(EXT.mean.spread,digits=3,format="f")}$\%$. 52-week high recency applies the same principles as the 52-week high ratio; it capitalizes on the market's slow reaction to new information. Unlike the 52-week high ratio, it accounts for smaller volatility in the market. If a stock rarely fluctuates, it will always be near its 52-week high and will always be placed in the winner portfolio according to GH's strategy. However, our strategy takes into account {\it when} the 52-week high was last recorded, and take advantage of the market's slow reaction to new information. so the slowness of market reaction to new information can be taken advantage.
\section*{ Data and Methods }
<>=
total.securities <- length(unique(x$symbol))
@
%fix. rewrite later.
The core of our data consists of daily statistics for \Sexpr{total.securities} securities, spanning from \Sexpr{begin.date} to \Sexpr{end.date}. To form this core, we go to the December $31^{st}$ preceding each year from 1998 to 2007, and we list the $1,500$ largest cap US stocks. All daily information on any stock that appeared in at least one of those lists is part of the core of our universe. All return calculations are done on all stocks in our core. However, in order to be included in a portfolio in a given date, a stock must be in the top $1,500$ largest in terms of market capitalization as of the last trading day of the previous year.
%This is done, but we need to include the code in our script...
We eliminate stocks with returns exceding $200\%$ over consecutive trading days. Such excessive returns are the result of data errors and/or abnormal corporate actions. These data points dominated our results in initial testing, leading to less meaningful conclusions. We remove any rows that correspond to returns exceeding $200\%$ rather than removing all data for those stocks. In total, we remove $1293$ rows, less than $0.02\%$ of the rows from our original data set.
%Check the code to make sure this is the case. It does not currently appear to be so...
Once we have the core, we must calculate 6-month past and future returns for both individual stocks and by industry, as well as the 52-week high. Any portfolios formed on days for which we do not have data spanning both the previous year and the next 6 months cannot be evaulated for all strategies, so those dates are dropped. For this reason, our first portfolios are not formed until \Sexpr{as.character(sort(big.table$v.date, dec=FALSE))}, and our last portfolios are formed on \Sexpr{as.character(sort(big.table$v.date, dec=TRUE))}.
For every stock and every date in our universe, we calculate past 1-month returns, past 6-month returns, forward 6-month returns, and the 52-week high price. We also calcuate the past 6-month industry returns by aggregating the returns of all stocks within each industry, regardless of whether each stock is in the top $1,500$ market cap at the time. For both JT's and MG's strategies, we form portfolios based on past 6-month data and hold those positions 6 months. The strategy is known as (6,6) hereafter. For JT portfolios, our (6,6) positions are formed by sorting stocks in ascending order based on individual past 6-month returns. We place the top (bottom) $33\%$ into the winner (loser) portfolio, and the remaining stocks form the neutral portfolio. Within any month, each JT winner, loser, and nuetral portfolio is approximately equal in size. Winner portfolios are approximately equal in size across months as well. The same is true for loser and neutral portfolios.
Under MG's strategy, our (6,6) positions are formed through ordering industries by past 6-month industry returns. For each industry in the top (bottom) $33\%$ of all industries, we put its member stocks into the winner (loser) portfolio, and the remaining stocks into the neutral portfolio. In any given month, the distribution of stocks within winner, loser, and neutral portfolios are unbalanced. Portfolios vary in size across months as well. This methodology is consistent with the strategy as outlined in MG, but our data has great variation in industry size. %include code to calculate information about average industry sizes, and extremes. from jimmy.
To replicate GH's strategy, we form portfolios based on the relation of current stock prices to 52-week highs and hold the positions for 6 months. This relation can be expressed as $\frac{P_{i,t-1}}{high_{i,t-1}}$, where $P_{i,t-1}$ is the price of stock $i$ at the end of month $t-1$ and $high_{i,t-1}$ is the highest price of of the stock $i$ during the 12-month period ending on the last day of month $t-1$. Much like JT, we rank and divide stocks into winner, loser, and neutral portfolios that are approximately equally sized within and across months.
%%=check this section for accuracy. change pct.keep =0.3 to =0.33
We equal-weight the three portfolios each month. By shorting the loser portfolio and use the money received from our short positions to go long on the winners, our strategy is market-neutral. We report the spread between the winner and the loser portfolios in Table \ref{table:singlecomp}.
%Add a paragraph about means, modes, and outliers
\section*{ Results }
Table \ref{table:singlecomp} presents average 6-month returns for the winner and loser portfolios of the three momentum investing strategies. The rightmost column shows the spread, the return achieved by shorting the loser stocks and going long the winner portfolio. This is a measurement of overall strategy performance. The first row of Table \ref{table:singlecomp} presents JT's individual stock momentum results. The second row is for MG's industry momentum statistics, and the last row displays the returns for GH's 52-week high ratio portfolios.
\begin{table}[h]
\caption{ {\bf Profits from Momentum Strategies}
\newline This is a replication of GH's Table I on page $2148$ \cite{52weekhigh}. This table reports the average monthly returns over 6-month holding periods for three different momentum investing strategies, spanning \Sexpr{begin.date} to \Sexpr{end.date}. JT portfolios are based on the past 6-month returns of individual stocks. MG portfolios are formed through sorting stocks by past 6-month industry returns. GH portfolios rank the ratio of current stock prices to their 52-week highs. In all cases, portfolios are held for 6 months. For each strategy, winner and loser portfolios are formed from the top and bottom $33\%$. Note that the top (bottom) $33\%$ of industries does not necessarily contain the top (bottom) $33\%$ of all stocks.}
\begin{tabular}[t]{l c c c }
\hline
& Winner & Loser & Winner-Loser \\
\hline
JT's individual stock momentum & \Sexpr{formatC(JT.avg.winner.return/6, digits=3,format="f")} &
\Sexpr{formatC(JT.avg.loser.return/6, digits=3,format="f")} &
\Sexpr{formatC(JT.mean.spread/6, digits=3,format="f")} \\
MG's industry momentum & \Sexpr{formatC(MG.avg.winner.return/6, digits=3,format="f")} &
\Sexpr{formatC(MG.avg.loser.return/6, digits=3,format="f")} &
\Sexpr{formatC(MG.mean.spread/6, digits=3,format="f")} \\
GH's 52-week high & \Sexpr{formatC(GH.avg.winner.return/6, digits=3,format="f")} &
\Sexpr{formatC(GH.avg.loser.return/6, digits=3,format="f")} &
\Sexpr{formatC(GH.mean.spread/6, digits=3,format="f")} \\
\end{tabular}
\label{table:singlecomp}
\end{table}
On average, JT's individual stock momentum returns
a monthly spread of \Sexpr{formatC(100*(JT.mean.spread)/6, digits=3,format="f")}$\%$ per month over a 6 month period; MG's industry momentum \Sexpr{formatC(100*(MG.mean.spread)/6, digits=3,format="f")}$\%$; and GH's 52-week high ratio \Sexpr{formatC(100*(GH.mean.spread)/6, digits=3,format="f")}$\%$. The most profitable is MG's industry momentum, outperforming JT's individual stock momentum by \Sexpr{formatC(100*(MG.mean.spread - JT.mean.spread)/6,digits=3,format="f")}$\%$, and GH's 52-week high ratio by \Sexpr{formatC(100*(MG.mean.spread - GH.mean.spread)/6,digits=3,format="f")}$\%$.
\begin{figure}[h]
\centering
\vspace*{.1in}
<>=
print(plot(btGH, type="return", scales = list(x=list(at=date.index, labels=date.labels, rot=45))))
@
\caption{Monthly return spreads for 52-week high ratio}
\label{fig:52wratio}
\end{figure}
Figure \ref{fig:52wratio} reveals that returns from GH's 52-week high ratio exhibit a certain pattern where gains (losses) in any given month are carried onto subsequent months. It is due to the fact that we report the future 6-month returns of portfolios. Because portfolios are overlapped with their neighbor portfolios, they all exhibit relatively similar return patterns.
To test the dominance of GH's 52-week high ratio over MG's industry momentum, we conduct two pairwise comparisons as documented in Table \ref{pairwise}. On panel A, we divide all the stocks into three catalogues corresponding to the winner, neutral and loser portfolios as determined by GH's 52-week high ratio. Then, we form three sub-portfolios within each of these catalogue by applying MG's industry momentum on them separately. On panel B, we reverse the process and form three sub-portfolios within each winner, neutral, loser portfolio as determined by MG's industry momentum. Neither panels suggests dominace of GH's 52-week high ratio over MG's industry momentum.
\begin{table}[h]
\caption{ {\bf Pairwise Comparisons of the 52-Week High and Industry Momentum Strategies} \newline This is a partial replication of GH's Table IV on page $2153$ \cite{52weekhigh}. Stocks are sorted independently by the ratio of their current price to the previous 52-week high price, and by the past 6-month industry returns. Industry momentum winners (losers) are stocks that fall within the top (bottom) $33\%$ of the industries with the highest (lowest) 6-month returns. 52-week high winners (losers) are stocks that fall within the top (bottom) $33\%$ of all ratios of current stock price to 52-week high price. Panel A reports the average monthly returns over 6-month holding periods from \Sexpr{begin.date} through \Sexpr{end.date} for equal-weighted portfolios that are long 52-week high ratio winners and short 52-week losers {\it within} winner, neutral, and loser portfolios as identified by industry momentum rankings. Panel B reports the average monthly returns over 6-month holding periods from \Sexpr{begin.date} through \Sexpr{end.date} for equal-weighted portfolios that are long industry momentum winners and short industry momentum losers {\it within} winner, neutral, and loser portfolios as identified by 52-week high ratio rankings. }
\begin{tabular}[t]{ l l c }
\hline
Panel A: & 52-week high {\it within} & \\
& industry momentum & \\
\hline
Winner & Winner & \Sexpr{formatC(MG.GH.w.winner.ret/6,digits=3,format="f")}\\
& Loser & \Sexpr{formatC(MG.GH.w.loser.ret/6,digits=3,format="f")}\\
& Spread & \Sexpr{formatC(MG.GH.w.spread/6,digits=3,format="f")}\\
Neutral& Winner & \Sexpr{formatC(MG.GH.n.winner.ret/6,digits=3,format="f")}\\
& Loser & \Sexpr{formatC(MG.GH.n.loser.ret/6,digits=3,format="f")}\\
& Spread & \Sexpr{formatC(MG.GH.n.spread/6,digits=3,format="f")}\\
Loser & Winner & \Sexpr{formatC(MG.GH.l.winner.ret/6,digits=3,format="f")}\\
& Loser & \Sexpr{formatC(MG.GH.l.loser.ret/6,digits=3,format="f")}\\
& Spread & \Sexpr{formatC(MG.GH.l.spread/6,digits=3,format="f")}\\
\hline
Panel B: & Industry momentum {\it within}& \\
& 52-week high & \\
\hline
Winner & Winner & \Sexpr{formatC(GH.MG.w.winner.ret/6,digits=3,format="f")}\\
& Loser & \Sexpr{formatC(GH.MG.w.loser.ret/6,digits=3,format="f")}\\
& Spread & \Sexpr{formatC(GH.MG.w.spread/6,digits=3,format="f")}\\
Neutral& Winner & \Sexpr{formatC(GH.MG.n.winner.ret/6,digits=3,format="f")}\\
& Loser & \Sexpr{formatC(GH.MG.n.loser.ret/6,digits=3,format="f")}\\
& Spread & \Sexpr{formatC(GH.MG.n.spread/6,digits=3,format="f")}\\
Loser & Winner & \Sexpr{formatC(GH.MG.l.winner.ret/6,digits=3,format="f")}\\
& Loser & \Sexpr{formatC(GH.MG.l.loser.ret/6,digits=3,format="f")}\\
& Spread & \Sexpr{formatC(GH.MG.l.spread/6,digits=3,format="f")}\\
\hline
\end{tabular}
\label{table:pairwise}
\end{table}
% paragraph to compare our results to GH's
The results are striking, epsecially in comparison to GH's. One explanation for these differences is the way we calculate monthly returns. In GH, the return for any given month $t$ is calculated as the equal-weighted average of the returns from the six separate self-financing portfolios formed in months $t-6$ to $t-1$. We calculate the overall return for any given month $t$ as one sixth of the 6-month equal-weighted return of the portfolio formed in that specific month. However, we believe this difference in calculation methods is insufficient to explain the difference in our results. One hypothesis is that the market has become more effective due to faster diffusion of information, especially those readily available like the 52-week high price. This results in quicker market reassessment and a quicker price corrrection process. For this reason, strategies that employ ranking parameters less readily available to the public might return higher profit.
Figure \ref{fig:cumspread} documents the cumulative spread return and the cumulative quantile return of three momentum investing strategies. Winner portfolios from MG's industry momentum exhibit the strongest growth pattern, followed by winner portfolios from JT's individual stock momentum and GH's 52-week high ratio in that order. The order is reversed for the loser portfolios. In terms of sorting stocks into proper portfolios, MG's industry momentum is most effective as evident in the wide spreads between its winner, neutral and loser portfolios. In the case of JT's individual stock momentum, neutral and loser portfolios exhibit an identical growth pattern though winner portfolios show a considerably stronger growth pattern. The least effective is GH's 52-week high ratio. Its loser and neutral portfolios perform almost identically during the whole 10 years, and its winner portfolios only perform slightly better.
\begin{figure}[h]
\centering
\vspace*{.1in}
<>=
print(plot(btJT.MG.GH, type="cumreturn.split"))
@
\caption{Cumulative Spread Return and Cumulative Quantile Return for JT, MG and GH }
\label{fig:cumspread}
\end{figure}
\section*{ Extensions}
%used to be in data/methods, needs to be put in the right place
We employ a new portfolio formation strategy based on the recency of the 52-week high. Instead of calculating the ratio $\frac{P_{i,t-1}}{high_{i,t-1}}$ as in GH, we calculate the number of days that have passed since the latest occurrence of the 52-week high (in cases where the 52-week high is recorded on multiple days, we use the most recent date). We then rank stocks, sorting into winner, neutral, and loser portfolios. The $33\%$ of stocks whose 52-week high occured most (least) recently are placed in the winner (loser) portfolio. The remaining stocks form the neutral portfolio. We form a self-financing market-neutral portfolio by going long on the winners and shorting the loser portfolios. The strategy is known as 52-week high recency hereafter.
The strategy grows out of an assumption that the market moves as a whole. Moreover, strong stocks tend to grow faster in bull markets and to hold up better in bear markets. It follows that strong stocks would stand a better chance than weak stocks to make a new 52-week high in both markets. Subsequently, stocks that have recently passed their 52-week high prices are more likely to be strong stocks than those that have not passed their 52-week high prices for a while. As a result, we believe factoring 52-week high recency into the 52-week high momentum strategy would improve its forecasting power.
\begin{table}[h]
\caption{ {\bf Proximity to 52-week high} \newline Stocks are sorted independently by the number of days since their most recent 52-week high was recorded. Winners (losers) are stocks that fall within the top (bottom) $33\%$ among stocks with the fewest (most) days since their 52-week high was recorded. Each column represents a separate portfolio, and returns are average monthly returns per month over a 6-month period.}
\begin{tabular}[t]{l c c c c }
\hline
& Winner & Neutral & Loser & Winner-Loser \\
\hline
Proximity to 52-week high (days) &
\Sexpr{formatC(EXT.avg.winner.return/6, digits=3,format="f")} &
\Sexpr{formatC(EXT.avg.neutral.return/6, digits=3,format="f")} &
\Sexpr{formatC(EXT.avg.loser.return/6, digits=3,format="f")} &
\Sexpr{formatC(EXT.mean.spread/6, digits=3,format="f")} \\
\end{tabular}
\label{table:singleext}
\end{table}
\begin{figure}[h]
\centering
\vspace*{.1in}
<>=
print(plot(btEXT, type="return", scales = list(x=list(at=date.index, labels=date.labels, rot=45))))
@
\caption{Monthly return spreads for 52-week high proximity}
\label{fig:spread}
\end{figure}
Empirically, we find 52-week high recency to be a strong measure of portfolio performance. 52-week high recency realizes an average profit of \Sexpr{formatC(EXT.mean.spread/6,digits=3,format="f")}$\%$ per month over 6 months, almost 7 times the average returns of GH. Table \ref{table:singleext} shows the average monthly returns of the winner, loser, and self financing portfolio generated by going long the winner portfolio and shorting the losers. The average spread compares favorably to the strategies proposed in JT, MG, and GH as seen in Table \ref{table:singlecomp}.
Figure \ref{fig:cumspread2} documents the cumulative spread return and the cumulative quantile return of both 52-week high ratio and 52-week high recency. It is important to note that, under both strategies, portfolios are constructed in a separate and equal-weighted manner so that no gain (loss) of one portfolio is carried onto that of another. In terms of cumulative returns, winner and neutral portfolios from 52-week high recency considerably outperform those from 52-week high ratio. Nonetheless, the loser portfolios from 52-week high ratio outperforms those from 52-week high recency, especially during the period between 1999-2002. More importantly, 52-week high recency is much better in sorting stocks into proper winner, neutral and loser portfolios as shown in the wide spreads between their returns. 52-week high ratio, on the other hand, is not as effective. Stocks in loser and neutral portfolios perform almost identically during the whole 10 years while winner stocks only perform slightly better.
\begin{figure}[h]
\centering
\vspace*{.1in}
<>=
print(plot(btEXT.GH, type="cumreturn.split"))
@
\caption{Cumulative Spread Return and Cumulative Quantile Return for the 52-week high recency (proximity.52w.high) and 52-week high ratio (ratio) }
\label{fig:cumspread2}
\end{figure}
%suppress citation
Our extension is partially motivated by Wang (2008) \cite{reversalfearmomentum}, which documents a reversal fear in momentum investing. Wang (2008) suggests an important case of underreaction that follows large price surges, which is sometimes associated with new 52-week highs. After such occurrences, people tend to question whether the positive information is already factored into prices and whether prices are sustainable at the new high. Traders are consequentially unwilling to bid a stock as high as its fundamental values suggest. Meanwhile, speculation investors are more likely to cash out. Altogether, they create a temporary sell pressure that pushes down the prices. The more recent the latest 52-week high is, the stronger the pressure is, especially when the sustainability of a recent price surge is uncertain. However, such pressure is only temporary, and the prices eventually rise as good news prevails. By going long on those stocks that have recently passed their 52-week highs, we can capture the momentum effect created by such irrational behavior.
\section*{ Conclusion }
We compare the profitability of three momentum investing strategies: JT's individual stock momentum, MG's industry momentum and GH's 52-week high ratio. JT's individual stock momentum takes long (short) positions on the top (bottom) $33\%$ of stocks based on their past 6-month returns. The strategy was proposed by Jagadeesh and Titman (1993) and realizes an average monthly profit of \Sexpr{formatC(100*JT.mean.spread/6,digits=3,format="f")}$\%$ during the period between
<>=
begin.date <- min(big.table$v.date)
end.date <- max(big.table$v.date)
@
\Sexpr{begin.date} and \Sexpr{end.date}.
MG's Industry momentum, which was proposed by Moskowitz and Grinblatt (1999), takes long (short) positions on stocks on the top (bottom) $33\%$ industries based on the past 6-month returns of the industries. The strategy performs much better than JT's individual stock momentum and realizes an average monthly profit of \Sexpr{formatC(100*MG.mean.spread/6,digits=3,format="f")}$\%$ during the same period. GH's 52-week high ratio bases their measurement on the proximity of stock prices to their 52-week high prices, and takes long (short) positions on the top (bottom) 33$\%$ stocks ranked by that criteria. The strategy returns an average monthly profit of \Sexpr{formatC(100*GH.mean.spread/6,digits=3,format="f")}$\%$ over a 6-month period.
In contradiction of George and Hwang (2004), our results show that proximity to the 52-week high is a weaker indicator of future returns than past returns of either individual stock or of the whole industry. Our results are actually consistent with the results of Alsubaie and Najand (2008) on momentum investing in the Saudi stock market, the largest market in the Middle East \cite{saudi}. Their results of 52-week high strategy documents a reserval in stocks that have reached their 52-week high. Alsubaie and Najand attribute the differences to the diffusion of information and investor overreaction. They also suggest that investor speculation can move stocks to their 52-week high price. The stocks however fall back below their 52-week high price once more accurate news emerge in the market.
Nonetheless, our modified version of GH's 52-week high momentum strategy, which measures stock performance according to the recency of the 52-week high, realizes an average 6-month profit of \Sexpr{formatC(100*EXT.mean.spread/6,digits=3,format="f")}$\%$, almost 7 times the returns from GH. To the best of our knowledge, there is no existing paper dedicated to this version of the 52-week high momentum strategy. Hence, a more indepth study on the topic is very promising. Possible directions of future work include set ranges for categorizing recency instead of ranking stocks according to percentage. We could use 3 month, 3-6 month, and 6-12 month periods as recency categories. Pairwise testing of 52-week high recency within other momentum strategies would provide further insight.
\bibliography{JPpaper}
\bibliographystyle{alpha}
\end{document}