SQL Queries and Database Schema Examples

Q5 (WorldDB): Return the number of cities that are in Australian territories (i.e., cities
in countries where the form of government is ‘Territory of Australia’). 
select count(*)
from city
where city.countrycode  in
(select code
from country
where governmentform='Territory of Australia');

Q6 (WorldDB): Return the number of countries in South America that have no official
languages. 
select count(*) from(
select name, count(isofficial) 
from country left outer join countrylanguage
on code=countrycode and isofficial='T' 
where continent ='South America' 
group by name
having count(isofficial)=0) x;

elfrykBm8UAAAAASUVORK5CYII=

Q1) CREATE TABLE makes(pname varchar(10), 
                category varchar(10), 
                cname varchar(10) references company, 
                year varchar(10), 
                primary key(pname,category), 
                foreign key(pname,category) references product);

B5RbzQ+bYjfrAAAAAElFTkSuQmCC

Q2) CREATE TABLE article(id varchar(10) primary key references publication, 
                journal varchar(10), 
                volume varchar(10), 
                number varchar(10));

agcBEAABEAABEHA4gebmZioqKqLVq1fLGSVRUVGyxX19fXJ7YHaKdHV10fz58x1OAs0DARAAARAAAWsTgIPE2vqBdCAAAiAAAiAAAg4g0NnZSdnZ2ZSWlkZtbW30yCOPUFZWFqWkpFB7ezvFxMQ4oJVoAgiAAAiAAAjYmwAcJPbWH6QHARAAARAAARCwCYHe3l666KKL5DbALPKqVauoqamJeHkNEgiAAAiAAAiAgPkE4CAxXweQAARAAARAAARAwCUEBgcH6dJLL5WOkhtuuGFaTBKXYEAzQQAEQAAEQMCSBOAgsaRaIBQIgAAIgAAIgAAIgAAIgAAIgAAIgICRBOAgMZI26gIBEAABEAABEAABEAABEAABEAABELAkAThILKkWCAUCIAACIAACIAACIAACIAACIAACIGAkAThIjKSNukAABEAABEAABEAABEAABEAABEAABCxJAA4SS6oFQoEACIAACIAACIAACIAACIAACIAACBhJAA4SI2mjLhAAARAAARAAARAAARAAARAAARAAAUsSgIPEkmqBUCAAAiAAAiAAAiAAAiAAAiAAAiAAAkYS+B8DvwRDCB9yYgAAAABJRU5ErkJggg==

Q3) SELECT 5;

YnTj5QgggIBTAgRgTknSDgIIIIAAAggggAACCCCAAAIIIICAKwUIwFx5WugUAggggAACCCCAAAIIIIAAAggggIBTAgRgTknSDgIIIIAAAggggAACCCCAAAIIIICAKwUIwFx5WugUAggggAACCCCAAAIIIIAAAggggIBTAgRgTknSDgIIIIAAAggggAACCCCAAAIIIICAKwUIwFx5WugUAggggAACCCCAAAIIIIAAAggggIBTAgRgTknSDgIIIIAAAggggAACCCCAAAIIIICAKwUIwFx5WugUAggggAACCCCAAAIIIIAAAggggIBTAgRgTknSDgIIIIAAAggggAACCCCAAAIIIICAKwUIwFx5WugUAggggAACCCCAAAIIIIAAAggggIBTAgRgTknSDgIIIIAAAggggAACCCCAAAIIIICAKwUIwFx5WugUAggggAACCCCAAAIIIIAAAggggIBTAgRgTknSDgIIIIAAAggggAACCCCAAAIIIICAKwUIwFx5WugUAggggAACCCCAAAIIIIAAAggggIBTAgRgTknSDgIIIIAAAggggAACCCCAAAIIIICAKwX+H8ohcqATeBsqAAAAAElFTkSuQmCC

Q1)
create table B(c int, d int, primary key(c));
create table A(a int, b int, c int, primary key(a, c), foreign key(c) references B);
create table C(a int, c int, e int, primary key(a, c), foreign key(a, c) references A);
create table D(g int, h int, primary key(g));
create table S(a int, c int, g int, f int, primary key(a, c, g), foreign key(a,c) 
references c, foreign key(g) references D);

Q2) Assume two tables T1 and T2 defined with the following statements:
CREATE TABLE T1 (a int PRIMARY KEY, b int);
CREATE TABLE T2 (c int PRIMARY KEY,
                d int NOT NULL REFERENCES T1(a));

T1 has 10 tuples and T2 has 5 tuples. Answer the following questions:
How many tuples are in the result of the following query?
SELECT * FROM T1, T2 WHERE a=d;
select 5;

Q3) How many tuples are in the result of the following query?
SELECT * FROM T1 RIGHT OUTER JOIN T2 ON a=d;
select 5;

Q4) How many tuples are in the result of the following query?
SELECT a, sum(b) FROM T1 GROUP BY a;
select 10;

CREATE TABLE employees (id varchar(5),
                        fname varchar(15),
                        lname varchar(15),
                        dept varchar(15),
                        salary int,
                        rank int)

Q1) Create an index that will make the following query faster.
SELECT id FROM employees WHERE rank >=32;
create index emprank on employees(rank);

Q2) Create an index that will make the following query faster.
SELECT fname FROM employees WHERE lname = ’Smith’;
create index emplname on employees(lname);

Q3) Create an index that will make both of the following queries faster. Choose 
the best index for full credit.
SELECT id FROM employees WHERE lname = ’Blake’ and fname = ’Joe’;
SELECT id FROM employees WHERE lname = ’Crain’;
create index emplnamefname on employees(lname, fname);

Q4) Indicate whether the following statement is true or false: If you create two indexes
on a relation, you can cluster both of them.
SELECT false;

Q1) Consider a social network database about people and their relationships. The database
has two relations:
Person ( pid, name )
Relationship ( pid1, rel, pid2 )
The key fields are underlined: Person.pid is the key for Person, and Relationship.pid1
and Relationship.pid2 are foreign keys to Person. rel is a string representing the
relationship type, and it can only be ’friend’ or ’enemy’. So, a tuple (1,’friend’,2),
means that the person with id 1 considers the person with id 2 a friend. Note that the
relationship is not symmetric: if Alice is friend with Bob, it does not imply that Bob is
friend with Alice.

Write a SQL query that computes, for every person, the total number of the
people they consider friends. Your query should return results containing the pid,
the name, and the count. Note that your query must return exactly one answer for
every person in Person.

SELECT x.pid, x.name, count(pid2)
FROM Person x LEFT OUTER JOIN Relationship y
ON x.pid = y.pid1 AND y.rel = ’friend’
GROUP BY x.pid, x.name

Write a SQL query that returns pairs of people (A,B), such that A considers B a
friend, and B considers A an enemy. Your query should return results containing
pairs of names.

SELECT x.name, y.name
FROM Person x, Person y,
     Relationship r1, Relationship r2
WHERE r1.pid1 = x.pid AND r1.pid2 = y.pid
AND r1.rel = ’friend’
AND r2.pid1 = y.pid AND r2.pid2 = x.pid
AND r2.rel = ’enemy’

Q2) Consider the following schema:
Vehicle ( VIN, model, year )
Driver ( license, name, age )
Insured ( license, VIN, premium )
The key fields are underlined: VIN is the key for Vehicle, license is the key for Driver,
and VIN and license together form the key for Insured. Also, Insured.license is a
foreign key that references Driver.license, and Insured.VIN is a foreign key that
references Vehicle.VIN.

Find the VINs of vehicles that are insured for a driver between 20 and 30 years of age.
SELECT I.VIN
FROM Insured I, Driver D
WHERE D.age > 20 AND D.age < 30 AND D.license = I.license

Find the VINs of vehicles that are insured for some driver under 25 years of age
and another driver who is over 50 years of age.
SELECT I1.VIN
FROM Insured I1, Driver D1, Insured I2, Driver D2
WHERE D1.age < 25 AND D2.age > 50 AND D2.license = I2.license
      AND I1.VIN = I2.VIN AND D1.license = I1.license

Some vehicles are operated by more than one driver, and a different premium may
be charged for each driver for the same vehicle. Find pairs of license numbers such
that the driver with the first license number gets charged a higher premium for the
same vehicle compared to the driver with the second license number.
SELECT I1.license, I2.license
FROM Insured I1, Insured I2
WHERE I1.VIN = I2.VIN
      AND I1.premium > I2.premium

For every driver, find how many vehicles are insured under their license.
SELECT D.license, count(I.VIN)
FROM Driver D LEFT OUTER JOIN Insured I ON D.license = I.license
GROUP BY D.license

+opnq2nQQAAAAASUVORK5CYII=

CREATE TABLE Employee (eid int primary key,
name text);
CREATE TABLE Manager (eid int primary key references Employee,
unit int);
CREATE TABLE Reports (eid int PRIMARY KEY references Employee,
mid int references Manager(eid));

Indexes
Consider the following update:
UPDATE Part SET price = price * 1.1
WHERE pid in (SELECT U.pid
        FROM Supply U, Supplier S
        WHERE S.sid = U.sid AND S.city = ‘Toronto’)

a) The optimizer will consider the use of a clustered hash index on Supply(sid). True 
b) The optimizer will consider the use of a clustered B+tree index on Part(price). False
c) An index on Supply(sid) could slow down the update. False
d) An index on Part(price) could slow down the update. True


Company(cname varchar(30) primary key, country varchar(30)); 
Product(pname varchar(30) primary key, price float, category varchar(30), manufacturer varchar(30)
references Company); 
Sales(pname varchar(30) references Product, sold int, month varchar(10), primary key (pname, month));
Employees(empID integer primary key, name varchar(50), phone varchar(12), managerID integer);
Projects(empID integer, project varchar(40)); 

Q5) List all employee names and the projects that each is involved in. An employee may
appear multiple times in the result if they are involved in multiple projects. Order the
result alphabetically by employee name, and secondarily by project name. 
SELECT e.name, p.project
FROM Employees e, Projects p
WHERE e.empID = p.empID
ORDER BY e.name, p.project;

Q6) Return a list of all employee names who work on both the ‘Web archive’, and ‘Phone
app’ projects. Order the results alphabetically by employee name.
SELECT e.name
FROM Employees e, Projects p1, Projects p2
WHERE e.empid = p1.empid AND e.empid = p2.empid AND p1.project = 'Web archive' AND
p2.project = 'Phone app' 
ORDER BY e.name;

Q7) Retrieve the average price of all products. Your query should return a single value
SELECT avg(price)
FROM Product;

Q8) Retrieve the average price for each category of products. Your query should return
a list of categories and the average price of all products within each category
SELECT category, avg(price)
FROM Product
GROUP BY category;

Q9) Compute the average price of all products cheaper than $150, for each category of products.
SELECT category, avg(price)
FROM Product
WHERE price < 150 GROUP BY category;

Q10) Retrieve the list of countries with companies that have sold products during the
month of ‘June’. Make sure that the same country does not appear multiple times on your list.
SELECT DISTINCT c.country
FROM Company c, Product p, Sales s
WHERE p.manufacturer = c.cname AND p.pname = s.pname AND s.month = 'June';

Q11) Return a list of names of all employees who work on two (or more) distinct projects. != not equal to
SELECT DISTINCT e.name
FROM Employees e, Projects p1, Projects p2
WHERE e.empid = p1.empid AND e.empid = p2.empid AND p1.project != p2.project;

Q12) During which months of the year have companies from Korea sold products? Your
query should return a list of the appropriate months.
SELECT DISTINCT s.month
FROM Sales s, Product p, Company c
WHERE s.pname = p.pname AND p.manufacturer = c.cname AND c.country = 'Korea';

Q13) Return a list of all employee names and the names of their managers. The first
column in your answer should give the name of the employee, and the second column
the name of the corresponding manager. Do not omit any employees even if they have no manager.
SELECT e1.name, e2.name AS manager
FROM Employees e1 LEFT OUTER JOIN Employees e2 ON e1.managerid = e2.empid;

Q14) Compute the total number of sales for each product, only for those products that
have sold more than 2 items in total. In your result, the first column should have the
product name, and the second column should list the number of sold items for that product.
SELECT pname, sum(sold)
FROM sales 
GROUP BY pname
HAVING sum(sold)>2;

Q15) For each country, compute how many ‘Gadgets’ products it produces. In your
result, the first column should have the name of the country, and the second column the
number of Gadgets products for that country. 
SELECT country, count(*)
FROM product, company
WHERE cname = manufacturer AND category = 'Gadgets'
GROUP BY country;

Q16) For each category, compute how many countries produce products in that category.
In your result, the first column should have the category, and the second column should
have the appropriate number. 
SELECT p.category, count(distinct c.country)
FROM Product p, Company c
WHERE p.manufacturer = c.cname
GROUP BY p.category;

Q17) For each project, list how many employees are assigned to it. In your result, the
first column should list all projects, and the second column should have the appropriate numbers.
SELECT p.project, count(e.name)
FROM employees e RIGHT OUTER JOIN projects p ON e.empid = p.empid
GROUP BY p.project;

Q18) For each product, find the month during which the product had the most sales out
of the year. For example, if out of all the months of the year, Gizmo had the most sales
in February, then the tuple (Gizmo, February) should be in the result. 
SELECT s1.pname, s1.month
FROM sales s1
WHERE s1.sold = (SELECT max(s2.sold) 
                FROM sales s2 
                WHERE s2.pname = s1.pname);

actor (id, fname, lname, gender)
movie (id, name, year)
directors (id, fname, lname)
casts (pid, mid, role)
movie_directors (did, mid)
genre (mid, genre)
• actor.id, movie.id, and director.id are primary keys for the corresponding tables
• casts.pid is a foreign key to actor.id
• casts.mid is a foreign key to movie.id
• movie directors.did is a foreign key to directors.id
• movie directors.mid is a foreign key to movie.id
• genre.mid is a foreign key to movie.id

Q1) Return the names of movies directed by Frank Darabont. Eliminate any duplicates
in the result. 
select distinct m.name 
from directors d, movie_directors md, movie m
where d.id=md.did and md.mid=m.id and d.fname = 'Frank' and d.lname = 'Darabont';

Q2) Compute the number of roles that were cast in ‘The Shawshank Redemption’. Do
not eliminate duplicates in case of multiple roles with the same name, or multiple movies
with the same title.
select count(c.role)
from Casts c, Movie m
where c.mid = m.id AND m.name='The Shawshank Redemption';

Q3) Compute how many actors appeared in a movie released in 1995.
select count(distinct x.id)
from actor x, casts xy, movie y
where x.id = xy.pid AND xy.mid = y.id AND y.year = 1995;

Q4) Compute how many actors appeared in a movie released in 1995 and also in a movie
released in 2010.
select count(distinct x.id)
from actor x, casts xy, casts xz, movie y, movie z
where x.id = xy.pid AND x.id = xz.pid AND xy.mid = y.id
AND xz.mid = z.id AND y.year = 1995 AND z.year = 2010;

Q5) Return the number of actors who have played two (or more) roles in the same
movie. (If an actor has played multiple roles in several movies, they should be counted
only once.)
select count(distinct a.id) 
from actor a, casts c1, casts c2 
where a.id=c1.pid and a.id=c2.pid and c1.mid=c2.mid and c1.role != c2.role;

Q6) Compute how many movies were filmed in 1890. The output of your query should
be a number.
select count(*) 
from movie 
where year=1890;

Q7) For each year between 1890 and 1899 (including 1890 and 1899), compute how many
movies were filmed that year. Your query should return the year in the first column and
the appropriate number in the second column. Order the results in increasing order of the year.
select year, count(*) 
from movie 
where year>=1890 and year <= 1899 group by year 
order by year;

Q8) List all directors who directed 40 or more ‘Thriller’ movies, in descending order of
the number of thrillers they directed. Return the directors’ first and last names and the
number of thrillers each of them directed.
select x.fname, x.lname, count(*) as c
from directors x, movie_directors y, genre g
where x.id = y.did AND g.genre = 'Thriller' AND g.mid = y.mid
group by x.id, x.fname, x.lname
having count(*) >= 40
order by c desc;

Q9) List all actors who appeared in 40 or more ‘Thriller’ movies, in descending order of
the number of thrillers they appeared in. Return the actors’ first and last names and
the number of thrillers each of them appeared in. Output format is the same as Q3.
select x.fname, x.lname, count(distinct y.mid) as c
from actor x, casts y, genre g
where x.id = y.pid AND g.genre = 'Thriller' AND g.mid = y.mid
group by x.id, x.fname, x.lname
having count(distinct y.mid) >= 40
order by c desc;

Q10) Return the maximum number of thrillers that any director has directed. The output
of your query should be a number
select max(c) from
(select count(*) as c
from movie_directors y, genre g
where g.genre = 'Thriller' AND g.mid = y.mid
group by y.did) m;

Q11) We want to find actors who played exactly five distinct roles in the same movie
during the year 1990. Write a query that returns the actors’ first name, last name, and
movie name. 
select x.fname, x.lname, y.name
from actor x, casts xy, movie y
where x.id = xy.pid AND xy.mid = y.id
    AND y.year = 1990
group by x.id, x.fname, x.lname, y.id, y.name
having count(distinct xy.role) = 5;

vavBPwgEgSAQBP7ZCPichR1uWckZqi17SAoCQaCrgv2ggw7q1q9f323YsKHz+75IyxJIwwKJIxM2KQgEgf8JpIiijIQgEASCQBCYNQK4VwzUs0Y5+c8LArjXSneqW6yNKxZIi2Wac0EgCASBIBAEgkAQCAJBIAgEgXlEIAJpHnstdQ4CQSAIBIEgEASCQBAIAkFgJghEIM0E1mQaBIJAEAgCQSAIBIEgEASCwDwiEIE0j72WOgeBIBAEgkAQCAJBIAgEgSAwEwQikGYCazINAkEgCASBIBAEgkAQCAJBYB4RiECax15LnYNAEAgCQSAIBIEgEASCQBCYCQIRSDOBNZkGgSAQBIJAEAgCQSAIBIEgMI8IRCDNY6+lzkEgCASBIBAEgkAQCAJBIAjMBIEIpJnAmkyDQBAIAkEgCASBIBAEgkAQmEcEIpDmsddS5yAQBIJAEAgCQSAIBIEgEARmgkAE0kxgTaZBIAgEgSAQBIJAEAgCQSAIzCMCEUjz2GupcxAIAkEgCASBIBAEgkAQCAIzQSACaSawJtMgEASCQBAIAkEgCASBIBAE5hGBCKR57LXUOQgEgSAQBIJAEAgCQSAIBIGZIBCBNBNYk2kQCAJBIAgEgSAQBIJAEAgC84hABNI89lrqHASCQBAIAkEgCASBIBAEgsBMEIhAmgmsyTQIBIEgEASCQBAIAkEgCASBeUQgAmkeey11DgJBIAgEgSAQBIJAEAgCQWAmCEQgzQTWZBoEgkAQCAJBIAgEgSAQBILAPCIQgTSPvZY6B4EgEASCQBAIAkEgCASBIDATBCKQZgJrMg0CQSAIBIEgEASCQBAIAkFgHhH4L2pTUbpODQrWAAAAAElFTkSuQmCC

wNakFGexPztngAAAABJRU5ErkJggg==

ag4w3EAAAAAElFTkSuQmCC

 Q1 (IMDB): Return the year during which the most movies were released.
select year
from movie where year is not NULL
group by year
order by count(*) desc limit 1;

Q2 (DBLP): Return the number of authors have published an article in January but
don’t have a homepage.
select count(distinct author.id) 
from author, article, authored 
where authored.id = author.id and authored.pubid = article.pubid
and month = 'January' and homepage IS NULL;

Q3 (WorldDB): Return the name of the city that has the largest population in a country
which has exactly 11 languages. 
SELECT ct.name 
FROM (SELECT name, code FROM country, countrylanguage 
    WHERE country.code = countrylanguage.countrycode 
    GROUP BY name, code HAVING count(language) = 11) c, city ct 
WHERE ct.countrycode = c.code 
ORDER BY ct.population desc LIMIT 1;

Q4 (IMDB): Return the first and last name of the actor who appeared in the most movies
in the year 2000.
SELECT actor.fname, actor.lname
FROM actor
JOIN casts ON actor.id = casts.pid
JOIN movie ON casts.mid = movie.id
WHERE movie.year=2000
GROUP BY actor.id, actor.fname, actor.lname
ORDER BY COUNT(distinct casts.mid) DESC
LIMIT 1;