Browse Source

backfill movie data

tmdb
sipp11 10 years ago
parent
commit
9ccf8a19dd
  1. 3
      flasky.py
  2. 0
      modules/__init__.py
  3. 146
      movies.py
  4. 1
      pip_requirements.txt
  5. 3
      settings.py

3
flasky.py

@ -7,6 +7,7 @@ import bson
from datetime import timedelta
from functools import update_wrapper
from settings import DATABASE
def crossdomain(origin=None, methods=None, headers=None,
@ -52,7 +53,7 @@ def crossdomain(origin=None, methods=None, headers=None,
app = Flask(__name__)
client = MongoClient(**{'host': 'localhost', 'port': 27017})
client = MongoClient(**DATABASE)
db = client.showtimes
miscObjHandler = lambda obj: (

0
modules/__init__.py

146
movies.py

@ -0,0 +1,146 @@
from __future__ import absolute_import, print_function
from pymongo import MongoClient
from settings import DATABASE, TMDB_APIKEY
from datetime import date
import tmdbsimple as tmdb
import sys
import time
client = MongoClient(**DATABASE)
db = client.showtimes
tmdb.API_KEY = TMDB_APIKEY
def update_showtimes(_id, movie):
key = {'_id': _id}
update_dict = {
'movie_id': movie['id'],
'poster_path': movie['poster_path'],
}
result = db.showtimes.update(key, {'$set': update_dict}, True)
## try adding to movie if not exist
add_movie(movie)
return result
def add_movie(movie):
movie_collection = db.movies
q = {
'id': movie['id']
}
_m = movie_collection.find_one(q)
if _m:
return _m
'''
## adding new one with full info
{
u'poster_path': u'/otcv3wWKz0vJyicOZgz2AheJ5UV.jpg',
u'production_countries': [
{u'iso_3166_1': u'US', u'name': u'United States of America'}
],
u'revenue': 0,
u'overview': u'........................',
u'id': 245891,
u'genres': [
{u'id': 28, u'name': u'Action'},
{u'id': 53, u'name': u'Thriller'}],
u'title': u'John Wick',
u'tagline': u'',
u'vote_count': 5,
u'homepage': u'',
u'belongs_to_collection': None,
u'status': u'Released',
u'spoken_languages': [],
u'imdb_id': u'tt2911666',
u'adult': False,
u'backdrop_path': u'/umC04Cozevu8nn3JTDJ1pc7PVTn.jpg',
u'production_companies': [],
u'release_date': u'2014-10-24',
u'popularity': 3.044875809,
u'original_title': u'John Wick',
u'budget': 0,
u'vote_average': 8.4,
u'runtime': 0}
'''
identity = tmdb.Movies(movie['id'])
m_info = identity.info()
new_movie = movie_collection.insert(m_info)
return new_movie
def find_movie(qword):
'''
find in db.movies first, then fallback to tmdb
'''
q = {
'title': qword
}
_mx = db.movies.find_one(q)
if _mx:
print('FOUND')
print(_mx)
else:
print('NOT FOUND')
return []
'''
return something like this if found
{
u'poster_path': u'/otcv3wWKz0vJyicOZgz2AheJ5UV.jpg',
u'title': u'John Wick',
u'release_date': u'2014-10-24',
u'popularity': 3.044875809,
u'original_title': u'John Wick',
u'backdrop_path': u'/umC04Cozevu8nn3JTDJ1pc7PVTn.jpg',
u'vote_count': 5,
u'adult': False,
u'vote_average': 8.4,
u'id': 245891
}
'''
search = tmdb.Search()
response = search.movie(query=qword)
if not search.results:
return []
'''
dang we pick the first match..
since I don't know how to deal with multiple result yet
'''
return search.results[0]
def main(argv):
'''
Mainly we are getting movie name from db.showtimes
& check if there is any info on db.movies.
If not, we are trying to fetch for new info
'''
print('Fetching Movie without information')
q = {
'$or': [
{'movie_id': {'$exists': False}},
{'movie_id': ''},
{'poster_path': ''},
]
}
result = db.showtimes.find(q)
cannot_find = []
for _ in result:
# skip if we know that we aren't going get any info anyway
if _['movie'] in cannot_find:
print('Nah -- you will not find this: ', _['movie'])
continue
time.sleep(1)
found = find_movie(_['movie'])
if found:
update_showtimes(_['_id'], found)
else:
cannot_find.append(_['movie'])
if __name__ == '__main__':
main(sys.argv[1:])

1
pip_requirements.txt

@ -2,3 +2,4 @@ requests==2.4.0
tornado>=4.0
flask
python-dateutil
tmdbsimple

3
settings.py

@ -5,7 +5,8 @@ from tornado.options import define, options
import os
import logconfig
TMDB_APIKEY = '4885bf6b6de87bc56599ad0147a0818f'
DATABASE = {'host': 'localhost', 'port': 27017}
# Make filepaths relative to settings.
path = lambda root, *a: os.path.join(root, *a)
ROOT = os.path.dirname(os.path.abspath(__file__))

Loading…
Cancel
Save