sqlite> .tables akas crew episodes people ratings titles
为每个表创建索引
1 2 3 4 5 6 7 8
CREATE INDEX ix_people_name ON people (name); CREATE INDEX ix_titles_type ON titles (type); CREATE INDEX ix_titles_primary_title ON titles (primary_title); CREATE INDEX ix_titles_original_title ON titles (original_title); CREATE INDEX ix_akas_title_id ON akas (title_id); CREATE INDEX ix_akas_title ON akas (title); CREATE INDEX ix_crew_title_id ON crew (title_id); CREATE INDEX ix_crew_person_id ON crew (person_id);
检查数据库模式
people表的属性有身份证号,名字,出生日期,死亡日期,索引为姓名。
1 2 3 4 5 6 7 8
sqlite> .schema people CREATE TABLE people ( person_id VARCHAR PRIMARY KEY, name VARCHAR, born INTEGER, died INTEGER ); CREATE INDEX ix_people_name ON people (name);
其中的一项为
1
nm0000003|Brigitte Bardot|1934|
title表为电影/电视剧/短视频/视频的具体信息
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
sqlite> .schema titles CREATE TABLE titles ( title_id VARCHAR PRIMARY KEY, type VARCHAR, primary_title VARCHAR, original_title VARCHAR, is_adult INTEGER, premiered INTEGER, ended INTEGER, runtime_minutes INTEGER, genres VARCHAR ); CREATE INDEX ix_titles_type ON titles (type); CREATE INDEX ix_titles_primary_title ON titles (primary_title); CREATE INDEX ix_titles_original_title ON titles (original_title);
akas表为标题的语言,地区等信息,上映电影在其余国家的信息,标题可能会翻译成上映国家的版本。
1 2 3 4 5 6 7 8 9 10 11
CREATE TABLE akas ( title_id VARCHAR, -- REFERENCES titles (title_id), title VARCHAR, region VARCHAR, language VARCHAR, types VARCHAR, attributes VARCHAR, is_original_title INTEGER ); CREATE INDEX ix_akas_title_id ON akas (title_id); CREATE INDEX ix_akas_title ON akas (title);
crew表为某剧对应的人物信息。
1 2 3 4 5 6 7 8
CREATE TABLE crew ( title_id VARCHAR, -- REFERENCES titles (title_id), person_id VARCHAR, -- REFERENCES people (person_id), category VARCHAR, job VARCHAR ); CREATE INDEX ix_crew_title_id ON crew (title_id); CREATE INDEX ix_crew_person_id ON crew (person_id);
WITH types(type, runtime_minutes) AS ( SELECT type, MAX(runtime_minutes) FROM titles GROUP BY type ) SELECT titles.type, titles.primary_title, titles.runtime_minutes FROM titles JOIN types ON titles.runtime_minutes == types.runtime_minutes AND titles.type == types.type ORDER BY titles.type, titles.primary_title ;
可通过with子句定义临时关系,再通过join运算限定title条目。
查询结果如下
1 2 3 4 5 6 7 8 9 10 11
movie|Logistics|51420 short|Kuriocity|461 tvEpisode|Téléthon 2012|1800 tvMiniSeries|Kôya no yôjinbô|1755 tvMovie|ArtQuench Presents Spirit Art|2112 tvSeries|The Sharing Circle|8400 tvShort|Paul McCartney Backstage at Super Bowl XXXIX|60 tvShort|The People Next Door|60 tvSpecial|Katy Perry Live: Witness World Wide|5760 video|Midnight Movie Madness: 50 Movie Mega Pack|5135 videoGame|Flushy Fish VR: Just Squidding Around|1500
q3
列出所有类型的title,并给出相关条目的数量
按照类型升序
1
SELECT type, count(*) AS title_count FROM titles GROUP BY type ORDER BY title_count ASC;
SELECT CAST(premiered/10*10 AS TEXT) || 's' AS decade, COUNT(*) AS num_movies FROM titles WHERE premiered is not null GROUP BY decade ORDER BY num_movies DESC ;
SELECT CAST(premiered/10*10 AS TEXT) || 's' AS decade, ROUND(CAST(COUNT(*) AS REAL) / (SELECT COUNT(*) FROM titles) * 100.0, 4) as percentage FROM titles WHERE premiered is not null GROUP BY decade ORDER BY percentage DESC, decade ASC ;
WITH translations AS ( SELECT title_id, count(*) as num_translations FROM akas GROUP BY title_id ORDER BY num_translations DESC, title_id LIMIT 10 ) SELECT titles.primary_title, translations.num_translations FROM translations JOIN titles ON titles.title_id == translations.title_id ORDER BY translations.num_translations DESC ;
查询结果如下
1 2 3 4 5 6 7 8 9 10
Mutant Virus: Vol. 1|126 The Good, the Bad and the Ugly|73 Star Wars: Episode V - The Empire Strikes Back|71 Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb|68 Raiders of the Lost Ark|62 Star Wars: Episode VII - The Force Awakens|62 The Shawshank Redemption|61 Once Upon a Time in the West|60 Indiana Jones and the Kingdom of the Crystal Skull|60 Airplane!|59
q7
列出IMDB中电影权重最高的250个
权重的计算方式为
1
Weighted rating (WR) = (v/(v+m)) * R + (m/(v+m)) * C
R为电影平均评分
v为评分人数
m为25000
C为所有电影的平均权重
weight的计算放入select语句中。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
WITH av(average_rating) AS ( SELECT SUM(rating * votes) / SUM(votes) FROM ratings JOIN titles ON titles.title_id == ratings.title_id AND titles.type == "movie" ), mn(min_rating) AS (SELECT 25000.0) SELECT primary_title, (votes / (votes + min_rating)) * rating + (min_rating / (votes + min_rating)) * average_rating as weighed_rating FROM ratings, av, mn JOIN titles ON titles.title_id == ratings.title_id and titles.type == "movie" ORDER BY weighed_rating DESC LIMIT 250 ;
The Shawshank Redemption|9.27408375213064 Pulp Fiction|8.87296068706845 The Lord of the Rings: The Return of the King|8.87023538818413 Schindler's List|8.85944837786486 12 Angry Men|8.8274654312365 Forrest Gump|8.77394665505175 The Good, the Bad and the Ugly|8.73410047935658 The Matrix|8.67379043068514 The Lord of the Rings: The Two Towers|8.67073273467584 Star Wars: Episode V - The Empire Strikes Back|8.66258036565461 Goodfellas|8.65679704067696 One Flew Over the Cuckoo's Nest|8.65294416076491 The Mountain II|8.61725087838868 City of God|8.54342805417796 Gladiator|8.47143742708625 Back to the Future|8.46332853886572 Léon: The Professional|8.46292056565729 Avengers: Infinity War|8.4506845289751 Psycho|8.4373696976068 Rear Window|8.41674755149826 Once Upon a Time in the West|8.38089735258473 WALL·E|8.36472733136252 Raiders of the Lost Ark|8.36071764723241 The Shining|8.35937143360229 Alien|8.35552530090484 Cinema Paradiso|8.3444698366033 Apocalypse Now|8.34271721766924 Grave of the Fireflies|8.34191265540178 Oldboy|8.33209348956415 Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb|8.3250687044724 Once Upon a Time in America|8.29149205920628 Spider-Man: Into the Spider-Verse|8.27944770565499 Inglourious Basterds|8.27361869242351 American Beauty|8.26993451202779 Reservoir Dogs|8.26439627161116 Requiem for a Dream|8.25798876833464 A Clockwork Orange|8.25742341547537 Toy Story 3|8.25734107724815 Scarface|8.25590438979589 Full Metal Jacket|8.25210853826772 Sunset Blvd.|8.23933898857354 Amadeus|8.21589004599775 Vertigo|8.21216347202948 Like Stars on Earth|8.20481373841464 Your Name.|8.19983566375586 North by Northwest|8.1975172347387 Batman Begins|8.17704665931913 V for Vendetta|8.17129916924205 Die Hard|8.16240649720068 Lock, Stock and Two Smoking Barrels|8.14585507926427 Casino|8.13629764272654 Unforgiven|8.12365074764615 The Apartment|8.12061011853697 Chinatown|8.10376481333931 My Neighbor Totoro|8.09442185609386 Green Book|8.09074160238658 Finding Nemo|8.07145216202776 Catch Me If You Can|8.06592479759894 The Grand Budapest Hotel|8.06129693202741 Blade Runner|8.06098971608469 Fargo|8.05647629910042 Prisoners|8.05390469414222 The Kid|8.05287532584739 Rush|8.03898582269203 Hacksaw Ridge|8.03551702468201 The Seventh Seal|8.03407390030423 The Princess Bride|8.03359206319394 Platoon|8.03211473247254 PK|8.02907703728921 Monty Python's Life of Brian|8.02901738104703 Gone with the Wind|8.01162552164356 Before Sunrise|8.00325016258364 Ben-Hur|7.98708858673506 The Elephant Man|7.98656566725627 The Treasure of the Sierra Madre|7.98183759819495 Aynabaji|7.97740941967294 Yojimbo|7.97326922828868 Star Wars: Episode VII - The Force Awakens|7.97147281541043 Slumdog Millionaire|7.97003387503103 Monsters, Inc.|7.96993549888079 Sin City|7.96864677307258 X-Men: Days of Future Past|7.96426564397762 Ratatouille|7.96198738026476 Jaws|7.95693515580935 Her|7.95337602084149 Cool Hand Luke|7.95284437581594 Akira|7.9466594640111 In the Name of the Father|7.94653731881204 Big Fish|7.94417520197508 Zootopia|7.94391037425039 Barry Lyndon|7.93804803727936 La Haine|7.93713874007203 Network|7.93673499353362 Ikiru|7.93587006088359 Harakiri|7.93442534185635 On the Waterfront|7.93384958766431 Drishyam|7.9316664137302 The Chaos Class Failed the Class|7.92209636316521 Magnolia|7.92151783450757 Swades|7.91791802696218 Children of Heaven|7.91488724265187 Tosun Pasa|7.91175384887872 Dog Day Afternoon|7.90497209143548 Before Sunset|7.90135907545106 Memories of Murder|7.89983909706083 Ip Man|7.89530989242938 Chak de! India|7.89380214739519 Munna Bhai M.B.B.S.|7.88766873353758 The 400 Blows|7.88236058087742 Elite Squad|7.88128931930257 Cinderella Man|7.87893854172733 Judgment at Nuremberg|7.87703872647609 Ayla: The Daughter of War|7.87111640888977 Toy Story 4|7.86727079414856 District 9|7.86716566170036 Edge of Tomorrow|7.86401183134919 Searching for Sugar Man|7.86361014282595 Thor: Ragnarok|7.86067171002219 Harry Potter and the Prisoner of Azkaban|7.86009831177006 The Handmaiden|7.86005695016839 Toy Story 2|7.85903363434152 The Bourne Identity|7.85871357953152 Three Colors: Red|7.8571771641382 Edward Scissorhands|7.85308436175456 Nightcrawler|7.85238182329928 Castle in the Sky|7.8494569785608 Paris, Texas|7.84540229570821 The Breakfast Club|7.84009539226878 Ghost in the Shell|7.8358170251987 Mulholland Dr.|7.83476463500115 Strangers on a Train|7.8340265331899 Papillon|7.83230035374453 The Raid 2|7.82177754544252 The Artist|7.81451845636192 The Man Who Shot Liberty Valance|7.8078751132418 True Romance|7.80358074659557 All the President's Men|7.80348137201329 A Streetcar Named Desire|7.80156298305092 My Name Is Khan|7.80054788180083 Patton|7.79504441225479 Tokyo Story|7.79422849952735 The Blues Brothers|7.79298260954459 The Man from Earth|7.78857212574483 Capernaum|7.78563829871188 Spider-Man: Far from Home|7.78561846386015 Vikram Vedha|7.78373473224346 Titanic|7.78157715805354 The Hobbit: An Unexpected Journey|7.77554863836764 The Passion of Joan of Arc|7.77408091487195 High and Low|7.77213704112255 Sing Street|7.76915812623775 The Legend of 1900|7.76815835496063 Elite Squad: The Enemy Within|7.76812044894699 Fanny and Alexander|7.76800472456941 Taken|7.76769012246336 The Straight Story|7.76752395340217 Kahaani|7.76720011579587 Ocean's Eleven|7.76364685071376 The Sea Inside|7.76289022497323 O.J.: Made in America|7.76250676256312 Before Midnight|7.76132220796731 Home|7.76025351500213 Back to the Future Part II|7.76009005121133 Spring, Summer, Fall, Winter... and Spring|7.7592446422894 Manhattan|7.7591232226754 Crash|7.757187612945 The Girl with the Dragon Tattoo|7.75600754037541 Big Hero 6|7.7544767707715 Predator|7.75096576853116 Spartacus|7.75079774492003 Isle of Dogs|7.7494869658895 The Fighter|7.74677127042658 The Game|7.7466716184388 The Big Short|7.7461656518056 Moonrise Kingdom|7.74269388072724 Pather Panchali|7.74145616936336 Doctor Zhivago|7.74112370242397 Apocalypto|7.73848649857271 About Time|7.7377698449424 Gattaca|7.73647859452984 What Ever Happened to Baby Jane?|7.73539364991903 The Marathon Family|7.73428987458458 Crouching Tiger, Hidden Dragon|7.73132118708057 Sunrise|7.72522570382843 Manchester by the Sea|7.72481321224648 La Dolce Vita|7.72473610906825 Halloween|7.72149520973176 Sherlock Jr.|7.72122901074544 What's Eating Gilbert Grape|7.71837046863796 Diabolique|7.71738924368521 Remember the Titans|7.7131373150204 Amour|7.71206424086562 Earthlings|7.71112168098164 Hidden Figures|7.70836846509536 The Boy in the Striped Pajamas|7.70750686722645 Throne of Blood|7.70486190012118 Nosferatu|7.70479615314119 The Best Years of Our Lives|7.70404091573498 La Strada|7.7035894202021 Blue Velvet|7.70348849922288 Ed Wood|7.70011913851453 Carry On, Munna Bhai|7.6960703370653 The Insider|7.69589363902872 Rio Bravo|7.69547902962826 Do the Right Thing|7.69470637286005 G.O.R.A.|7.69143691201405 John Wick: Chapter 3 - Parabellum|7.69042140474976 The Big Sleep|7.69022732193082 The Best of Youth|7.68939089676166 The Wild Bunch|7.68732321899666 Evil Dead II|7.68698809399903 Airlift|7.68482585880285 Who's Singin' Over There?|7.68275319170322 The King of the Street Cleaners|7.68007124396815 Blue Is the Warmest Color|7.67888277323536 300|7.67811080177943 The Hangover|7.67745136243525 The Chaos Class Is on Vacation|7.67701815540533 Kramer vs. Kramer|7.6734998783239 Special 26|7.67149062884843 Harold and Maude|7.67125075421178 Breathless|7.66994880180493 3-Iron|7.66894856428393 Glory|7.66830445780438 Awakenings|7.66753775055519 Star Trek Into Darkness|7.66655348768595 To Be or Not to Be|7.66618772896992 A Silent Voice|7.66616985054611 Get Out|7.66418042868228 The Fifth Element|7.66340814153364 Zodiac|7.66276470401316 Empire of the Sun|7.66123857830932 Lost in Translation|7.66075124608843 Man with a Movie Camera|7.66048361595785 Tangled|7.66012044434097 Once|7.65980150013791 Midnight in Paris|7.65893069763627 The King of Kong: A Fistful of Quarters|7.65818054427474 Wreck-It Ralph|7.6578968805438 The Salt of the Earth|7.6573311856226 The Fault in Our Stars|7.65410038215701 Time of the Gypsies|7.65354312829129 Lucky Number Slevin|7.64972806598217 Whisper of the Heart|7.64876091986559 O Brother, Where Art Thou?|7.64584931637816 Talvar|7.64560053612369 Donnie Brasco|7.6439766475949 As Good as It Gets|7.64393017265743 The Motorcycle Diaries|7.64367118785382 The Girl with the Red Scarf|7.64023075719264
榜单第一是肖申克的救赎,惊了!
q8
列出和Mark Hamill合作过的男演员/女演员的数量
仅输出数量
1 2 3 4 5 6 7 8 9 10
WITH hamill_titles AS ( SELECT DISTINCT(crew.title_id) FROM people JOIN crew ON crew.person_id == people.person_id AND people.name == "Mark Hamill" AND people.born == 1951 ) SELECT COUNT(DISTINCT(crew.person_id)) FROM crew WHERE (crew.category == "actor" OR crew.category == "actress") AND crew.title_id in hamill_titles ;
WITH hamill_movies(title_id) AS ( SELECT crew.title_id FROM crew JOIN people ON crew.person_id == people.person_id AND people.name == "Mark Hamill" AND people.born == 1951 ) SELECT titles.primary_title FROM crew JOIN people ON crew.person_id == people.person_id AND people.name == "George Lucas" AND people.born == 1944 AND crew.title_id IN hamill_movies JOIN titles ON crew.title_id == titles.title_id AND titles.type == "movie" ORDER BY titles.primary_title ;
查询结果为
1
Star Wars: Episode V - The Empire Strikes Back
仅有一部电影
q10
列出所有不同的标签和相关的titles数目
不同标签之间以逗号间隔,所以需要从genre中提取出对应的标签信息。
1 2 3 4 5 6 7 8 9 10 11 12 13
WITH RECURSIVE split(genre, rest) AS ( SELECT '', genres || ',' FROM titles WHERE genres != "\N" UNION ALL SELECT substr(rest, 0, instr(rest, ',')), substr(rest, instr(rest, ',')+1) FROM split WHERE rest != '' ) SELECT genre, count(*) as genre_count FROM split WHERE genre != '' GROUP BY genre ORDER BY genre_count DESC;