#!/usr/bin/python
# -*- coding: utf-8 -*-
# For Automator, use:
# PYTHONIOENCODING=UTF-8 bin/goodreads… etc.

import argparse, sqlite3
import re
import sys
from datetime import date, timedelta
import dateutil.parser
from csv import DictReader
from calendar import month_name
months = [month.lower() for month in month_name[1:]]

sortFields = {
	'title': 'title ASC',
	'author': 'authorLastFirst ASC',
	'rating': 'rating DESC',
	'date': 'dateRead DESC',
	'recent': 'dateRead ASC',
}

bookKindFixes = {
	'': 'unknown',
	'6x9 Paperback': 'paperback',
	'Audio CD': 'audio',
	'Audiobook': 'audio',
	'Dungeons & Dragons softcover adventure': 'gaming',
	'Electronic': 'ebook',
	'Hardcover': 'hardcover',
	'Kindle Edition': 'ebook',
	'Library Binding': 'hardcover',
	'Mass Market Paperback': 'paperback',
	'Nook': 'ebook',
	'Paberback': 'paperback',
	'Paperback': 'paperback',
	'Perfect Paperback': 'paperback',
	'Plastic Comb': 'spiral-bound',
	'Spiral-bound': 'spiral-bound',
	'Staple Bound': 'paperback',
	'Textbook Binding': 'hardcover',
	'Unknown Binding': 'unknown',
	'ebook': 'ebook',
	'paper': 'paperback',
	'paperback': 'paperback',
	'paperback ': 'paperback',
}
bookKinds = list(set(bookKindFixes.values()))
bookKinds.sort()

ratings = [
	'I did not like it.',
	'It was okay.',
	'I liked it.',
	'I really liked it.',
	'It was amazing.',
]

def makeDateFromString(text):
	datetime = dateutil.parser.parse(text)
	return datetime.date()

parser = argparse.ArgumentParser(description='Search Goodreads export file.')
parser.add_argument('search', metavar='TERM', nargs='?', help='search through titles and authors')
parser.add_argument('--csvfile', metavar='CSVFILE', type=argparse.FileType('r'), nargs='?', help='a Goodreads file to search', default='/Users/USER/Documents/FOLDER/goodreads_library_export.csv')
parser.add_argument('--format', choices=['alarums', 'markdown', 'table'], default='list')
parser.add_argument('--author')
parser.add_argument('--bookshelf', help='limit to bookshelf starting with text')
parser.add_argument('--bookshelves', action='store_true', help='show all bookshelf names')
parser.add_argument('--kind', choices=bookKinds)
parser.add_argument('--links', action='store_true')
parser.add_argument('--rating', type=int, choices=range(1, 6))
parser.add_argument('--rating-exact', type=int, choices=range(1, 6))
parser.add_argument('--reverse', action='store_true')
parser.add_argument('--review', action='store_true')
parser.add_argument('--since', metavar='DATE', type=makeDateFromString, help="from this date on")
parser.add_argument('--title')
parser.add_argument('--period', choices=['week', 'month', 'year', 'week-to-date', 'month-to-date', 'year-to-date'])
parser.add_argument('--days', type=int)
parser.add_argument('--month', type=str.lower, choices=months)
parser.add_argument('--year', type=int)
parser.add_argument('--order', default='date', choices=sortFields.keys())
parser.add_argument('--showId', action='store_true')
parser.add_argument('--verbose', action='store_true')
args = parser.parse_args()

# import data
# csv is not unicode
#http://stackoverflow.com/questions/5004687/python-csv-dictreader-with-utf-8-data
def UnicodeDictReader(utf8_data):
	csvReader = DictReader(utf8_data)
	if args.verbose:
		print csvReader.fieldnames

	for row in csvReader:
		yield {key: massageValues(key, value) for key, value in row.iteritems()}

def massageValues(key, value):
	if key == 'Date Read':
		if value:
			(year, month, day) = value.split('/')
			return date(int(year), int(month), int(day))
		else:
			return None

	if key == 'ISBN':
		value = value.strip('="')

	#Goodreads uses extra spaces to differentiate different authors
	#The extra spaces in titles may be an attempt at the same thing, or it may be just typos
	if key in ['Title', 'Author', 'Additional Authors']:
		value = re.sub(r'  +', ' ', value)

	return unicode(value, 'utf-8')

def testQuery(query):
	results = cursor.execute(query)
	for row in results:
		print row

csvfile = args.csvfile
goodreads = UnicodeDictReader(csvfile)
allShelves = {}

# create database
sq3connection = sqlite3.connect(':memory:')
sq3connection.row_factory = sqlite3.Row
cursor = sq3connection.cursor()
cursor.execute('CREATE TABLE books (id INTEGER PRIMARY KEY, title, author, authorLastFirst, additionalAuthors, review, rating int, dateRead date, kind, goodreadsId, ISBN)')
cursor.execute('CREATE TABLE shelves (id INTEGER PRIMARY KEY, shelf)')
cursor.execute('CREATE TABLE bookshelves (bookId int, shelfId int)')

for row in goodreads:
	if row['Binding'] not in bookKindFixes:
		print 'NEW BINDING FOUND:', row['Binding']
		print 'In row:', row
		kind = 'unknown'
	else:
		kind = bookKindFixes[row['Binding']]

	review = row['My Review']
	if row['Private Notes']:
		review += '\n\n<hr />\n\n' + row['Private Notes']

	book = {
		'title': row['Title'],
		'author': row['Author'],
		'authorLastFirst': row['Author l-f'],
		'additionalAuthors': row['Additional Authors'],
		'kind': kind,
		'review': review,
		'rating': row['My Rating'],
		'dateRead': row['Date Read'],
		'goodreadsId': row['Book Id'],
		'isbn': row['ISBN'] or row['ISBN13'],
	}

	cursor.execute('INSERT INTO books VALUES (NULL, :title, :author, :authorLastFirst, :additionalAuthors, :review, :rating, :dateRead, :kind, :goodreadsId, :isbn)', book)
	bookId = cursor.lastrowid

	shelves = [row['Exclusive Shelf']]
	if row['Bookshelves']:
		extraShelves = row['Bookshelves'].split(', ')
		for extraShelf in extraShelves:
			if extraShelf not in shelves:
				shelves.append(extraShelf)
	
	for shelf in shelves:
		if shelf not in allShelves:
			cursor.execute('INSERT INTO shelves VALUES (NULL, ?)', (shelf,))
			shelfId = cursor.lastrowid
			allShelves[shelf] = shelfId
			if args.verbose:
				print 'Found bookshelf:', shelf, shelfId
		shelfId = allShelves[shelf]
		cursor.execute('INSERT INTO bookshelves VALUES (?, ?)', (bookId, shelfId))

if args.bookshelves:
	print "\n".join(allShelves.keys())
	sys.exit()

# a book class to make it easier to access and display book data
class Book:
	def __init__(self, row):
		self.data = row

	def __getattr__(self, name):
		if name in self.data.keys():
			return self.data[name]
		else:
			return None

	def rawstars(self):
		return self.rating * '*'

	def stars(self):
		stars = self.rawstars()
		stars = stars.rjust(5)
		return stars

	def authors(self):
		authors = self.author
		if self.additionalAuthors:
			authors += ', ' + self.additionalAuthors
		return authors

	def shelves(self):
		query = 'SELECT shelf FROM shelves LEFT JOIN bookshelves ON bookshelves.shelfId=shelves.id WHERE shelves.shelf != "read" AND bookshelves.bookId=?'
		results = cursor.execute(query, (self.id,))
		shelves = []
		for shelf in results:
			shelves.append(shelf[0].replace('-', ' ').title())
	
		return shelves

	def links(self, prefix="\n- "):
		links = prefix + 'https://www.goodreads.com/book/show/%s' % self.goodreadsId
		links += prefix + '[book:%s|%s]' % (self.title, self.goodreadsId)
		return links

	def line(self):
		if args.format == 'list':
			line = '%s %s' % (self.stars(), self.title)
		elif args.format in ['markdown', 'alarums']:
			line = self.title

		if args.format == 'markdown':
			line = '# ' + line
			line += '\n- %s' % self.author
			if self.additionalAuthors:
				line += '\n- %s' % self.additionalAuthors
			if self.rating:
				line += '\n- %s' % ratings[self.rating-1]
			for shelf in self.shelves():
				line += '\n- %s' % shelf
			if self.dateRead:
				line += '\n- %s' % self.dateRead
			line += self.links()
			if args.showId:
				line += '\n- %s' % self.goodreadsId
			line += '\n'
			if self.review:
				line += '\n'
		elif args.format == 'table':
			cells = [self.title, self.rating, self.authors(), self.dateRead]
			if args.showId:
				cells.append(self.goodreadsId)
			cells = ['' if cell==None else unicode(cell) for cell in cells]
			line = "\t".join(cells)
		elif self.author and self.author not in self.title:
			if args.format == 'alarums':
				line += ' (%s):' % self.author
			else:
				line += ' by %s' % self.author
		else:
			line += ':'
		if args.format == 'list':
			if args.showId:
				idList = [self.goodreadsId]
				if self.ISBN:
					idList.append(self.ISBN)
				line += ' (' + ', '.join(idList) + ')'
			elif self.dateRead:
				line +=  ' (%s)' % self.dateRead
			elif self.bookshelf and not args.bookshelf:
				line += ' (%s)' % self.bookshelf

			if args.links:
				line += self.links(prefix='\n\t')

		return line

	def markdownQuotes(self, quote):
		quote = quote.group(1)
		quote = quote.strip()
		lines = quote.splitlines()
		quoted = []
		for line in lines:
			line = line.lstrip()
			if line:
				quoted.append('> ' + line)
			else:
				quoted.append('')
		return "\n".join(quoted)

	def markdownGoodreads(self, reference):
		reference = reference.group(1)
		parts = reference.split('|')
		if len(parts) == 1:
			#not enough info to go on
			return '*' + reference + '*'
		else:
			href = 'https://www.goodreads.com/book/show/' + parts[1]
			link = '[' + parts[0] + '](' + href + ')'
			return link

	def formattedReview(self):
		review = self.review
		review = review.replace('<br/>', "\n")
		review = review.strip()

		if args.format in ['markdown', 'alarums']:
			#emphasis
			review = review.replace('<em>', '*')
			review = review.replace('</em>', '*')
			review = review.replace('<strong>', '**')
			review = review.replace('</strong>', '**')
			review = review.replace('<sup>', '^')
			review = review.replace('</sup>', '')
			review = review.replace('<i>', '*')
			review = review.replace('</i>', '*')
			review = review.replace('<b>', '**')
			review = review.replace('</b>', '**')
			review = review.replace('\n<hr />\n', '\n* * *\n')
			review = review.replace('<hr />', '\n* * *\n')

			#line breaks
			review = review.replace('<br />', "  \n")

			#blockquotes
			if args.format == 'markdown':
				review = re.sub(r'<blockquote>(.*?)</blockquote>', self.markdownQuotes, review, flags=re.DOTALL)

			#links
			if args.format == 'markdown':
				review = re.sub(r'<a href="([^"]+)">(.*?)</a>', '[\\2](\\1)', review)
			else:
				#replace links with just the emphasized text
				review = re.sub(r'<a href="[^"]+">(.*?)</a>', '*\\1*', review)

			#book and author references
			if args.format == 'markdown':
				#link Goodreads references
				review = re.sub(r'\[[a-z]+:(.*?)]', self.markdownGoodreads, review)
			else:
				#replace Goodreads references with just the emphasized text
				review = re.sub(r'\[[a-z]+:([^\]|]+).*]', '*\\1*', review)

		if args.links:
			review += "\n\n* * *\n" + self.links(prefix="\n")
		return review

# construct query
query = 'SELECT * FROM books'
searches = []
searchValues = []
if args.author:
	searches.append('author LIKE ?')
	searchValues.append('%'+args.author+'%')

if args.bookshelf:
	query += ' LEFT JOIN bookshelves ON books.id=bookshelves.bookId LEFT JOIN shelves ON bookshelves.shelfId=shelves.id'
	searches.append('shelf LIKE ?')
	searchValues.append(args.bookshelf+'%')

if args.kind:
	searches.append('kind=?')
	searchValues.append(args.kind)

if args.rating:
	searches.append('rating >= ?')
	searchValues.append(args.rating)

if args.rating_exact:
	searches.append('rating IS ?')
	searchValues.append(args.rating_exact)

if args.search:
	searches.append('(title LIKE ? OR author LIKE ?)')
	searchValues.append('%'+args.search+'%')
	searchValues.append('%'+args.search+'%')

if args.title:
	searches.append('title LIKE ?')
	searchValues.append('%'+args.title+'%')

if args.days:
	searches.append('dateRead >= ?')
	searchValues.append(date.today() - timedelta(days=args.days))

if args.period:
	searches.append('dateRead BETWEEN ? AND ?')
	today = date.today()
	if args.period.endswith('-to-date'):
		endDate = today
		if args.period.startswith('year-'):
			startDate = today.replace(month=1, day=1)
		elif args.period.startswith('month-'):
			startDate = today.replace(day=1)
		else:
			weekSoFar = today.weekday()+1
			if weekSoFar == 7:
				weekSoFar = 0
			startDate = today - timedelta(days=weekSoFar)
	else:
		if args.period == 'year':
			startDate = date(today.year-1, 1, 1)
			endDate = date(today.year-1, 12, 31)
		elif args.period == 'month':
			month = today.month - 1
			year = today.year
			if month == 0:
				month = 12
				year = year - 1
			startDate = date(year, month, 1)
			endDate = today.replace(day=1) - timedelta(days=1)
		else:
			weekSoFar = today.weekday()+2
			if weekSoFar == 8:
				weekSoFar = 1
			endDate = today - timedelta(days=weekSoFar)
			startDate = endDate - timedelta(days=7)

	searchValues.append(startDate)
	searchValues.append(endDate)

if args.year:
	searches.append('CAST(strftime("%Y", dateRead) as INTEGER) = ?')
	searchValues.append(args.year)

if args.month:
	month = months.index(args.month)+1
	searches.append('CAST(strftime("%m", dateRead) as INTEGER) = ?')
	searchValues.append(month)

if args.since:
	searches.append('dateRead >= ?')
	searchValues.append(args.since.strftime('%Y-%m-%d'))

if searches:
	query += ' WHERE ' + ' AND '.join(searches)

if args.format != 'list':
	sortField = sortFields['recent']
else:
	sortField = sortFields[args.order]

query += ' ORDER BY ' + sortField

# perform search
if args.verbose:
	print query
	print

resultCount = 0
results = cursor.execute(query, searchValues)
results = list(results)
if args.reverse:
	results.reverse()

for row in results:
	book = Book(row)
	resultCount += 1
	if args.format in ['markdown', 'alarums']:
		print book.line(), book.formattedReview()
		if book.review:
			print
	else:
		print book.line()
		if args.review and book.review:
			print
			print book.formattedReview()
			print

if args.format != 'table':
	print "Found %i book%s." % (resultCount, 's'[resultCount==1:])

# clean up
# commit is necessary for testing, if writing to a real file
sq3connection.commit()
sq3connection.close()
