A gallery view for Craigslist
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
As much as I love Craigslist, I sometimes find the interface a bit limited.
My biggest wish? That there was an option for showing the search results as an image gallery, like eBay has. This could prove quite useful for browsing things like antiques, furniture, art: categories where I only need a quick glance at an object’s picture and I immediately know if I am interested or not. Craigslist does have an option for embedding pictures to the search results, but their size and resolution are so small it is quite impractical.
Here is a short R program I have put together that does the job. As much as possible, I have tried to break it into small and meaningful modules so it will be easy to maintain and build upon.
First, a function for turning a query into a Craigslist URL:search.url <- function(query, | |
site.url = "http://atlanta.craigslist.org", | |
category = "sss", | |
title.only = TRUE, | |
min.price = integer(0), | |
max.price = integer(0), | |
pic.only = TRUE) { | |
## This function creates a search URL on Craigslist. | |
## | |
## Inputs: | |
## - query: search string | |
## - site.url: Craigslist site URL | |
## - category: a three-letter code for the category; some examples | |
## "sss": all for sale/wanted, "zip": free stuff | |
## - title.only: boolean for restricting the search to add titles | |
## - min.price minimum price | |
## - max.price: maximum price | |
## - pic.only: boolean for restricting the search to adds with pics | |
## | |
## Output: a string representing a Craigslist search URL | |
search <- list(query = gsub(" ", "+", query), | |
catAbb = category, | |
srchType = ifelse(title.only, "T", "A"), | |
minAsk = as.integer(min.price), | |
maxAsk = as.integer(max.price), | |
hasPic = as.numeric(pic.only)) | |
valid <- sapply(search, length) > 0L | |
search.str <- paste(names(search)[valid], search[valid], | |
sep = "=", collapse = "&") | |
search.url <- paste(site.url, "/search/sss?", search.str, sep = "") | |
return(search.url) | |
} |
Then a function that reads the search results and recombines them into an image gallery:
image.gallery <- function(url, ncol = 3L) { | |
## This function reformats the contents of a Craigslist search result URL | |
## into an image gallery, opened into the default browser | |
## | |
## Inputs: | |
## - url: a Craigslist search URL as created by search.url | |
## - ncol: the number of columns for the output image gallery | |
## | |
## Output: none. As a side effect, a browser is opened. | |
require(RCurl) | |
require(stringr) | |
require(R2HTML) | |
scrap <- getURL(url) | |
adds <- str_extract_all(scrap, '<p class="row">.*?</p>')[[1]] | |
pic.str <- str_extract(adds, 'id="images:.*?jpg"') | |
pic.base <- str_replace(pic.str, 'id="images:(.*jpg)"', "\\1") | |
pic.url <- paste("http://images.craigslist.org/", pic.base, sep = "") | |
pic.tags <- paste('<img src="', pic.url, '" alt="', pic.url, '" ', | |
'style="width:100%"/>', sep = "") | |
adds <- paste(pic.tags, '<BR>', adds) | |
add.mat <- suppressWarnings(matrix(adds, ncol = ncol, byrow = TRUE)) | |
html <- HTMLInitFile() | |
HTML(add.mat, innerBorder = 1, file = html) | |
HTMLEndFile(html) | |
browseURL(paste("file://", html, sep = "")) | |
} |
Finally, an interactive function for wrapping everything together:
search.craigslist <- function(site.url = "http://atlanta.craigslist.org", | |
ncol = 3L) { | |
## This function prompts the user with questions for searching Craigslist | |
## (search items, price, etc.) and displays the search results into an image | |
## gallery. | |
## | |
## Inputs: | |
## - site.url: Craigslist site URL | |
## - ncol: the number of columns for the output image gallery | |
## | |
## Output: none. As a side effect, a browser is opened. | |
query <- readline("search Craigslist for: ") | |
category <- .pick.category() | |
title.only <- readline("search titles only (press 1 or ENTER to skip): ") | |
min.price <- readline("min price (press ENTER to skip): ") | |
max.price <- readline("max price (press ENTER to skip): ") | |
pic.only <- readline("pic only (press 1 or ENTER to skip): ") | |
title.only <- title.only == "1" | |
min.price <- as.integer(min.price) | |
max.price <- as.integer(max.price) | |
pic.only <- pic.only == "1" | |
url <- search.url(query = query, | |
site.url = site.url, | |
category = category, | |
title.only = title.only, | |
min.price = min.price, | |
max.price = max.price, | |
pic.only = pic.only) | |
image.gallery(url, ncol = ncol) | |
} |
.pick.category <- function() { | |
## This function prompts a menu for selecting a search category and returns | |
## the corresponding 3-letter code used by Craigslist | |
categories <- read.table(textConnection(" | |
CODE = DESCRIPTION | |
sss = all for sale / wanted | |
ata = antiques | |
ppa = appliances | |
ara = arts+crafts | |
pta = auto parts | |
baa = baby+kids | |
bar = barter | |
haa = beauty+health | |
bia = bikes | |
boo = boats | |
bka = books | |
bfa = business | |
cta = cars+trucks | |
ema = cd/dvd/vhs | |
moa = cell phones | |
cla = clothing+accessories | |
cba = collectibles | |
sya = computers | |
ela = electronics | |
gra = farm+garden | |
zip = free stuff | |
fua = furniture | |
gms = garage sales | |
foa = general for sale | |
hsa = household | |
wan = items wanted | |
jwa = jewelry | |
maa = materials | |
mca = motorcycles | |
msa = musical instruments | |
pha = photo+video | |
rva = recreational vehicles | |
sga = sporting goods | |
tia = tickets | |
tla = tools | |
taa = toys+games | |
vga = video gaming"), header = TRUE, sep = "=", strip.white = TRUE, | |
stringsAsFactors = FALSE) | |
selected.idx <- menu(categories$DESCRIPTION, title = "pick a category:") | |
return(categories$CODE[selected.idx]) | |
} |
R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.