دسته‌ها
Buiness Analysis Data Science Programming Python علمی

Pandas V.S SQL

If you knew SQL before and want to migrate to Python, you can use this article.

۱) Simple select

—-

SELECT total_bill, tip, smoker, time
FROM tips
LIMIT ۵;

—–

tips[[‘total_bill’‘tip’‘smoker’‘time’]].head(۵)

—————

—————

۲) where 

——

SELECT *
FROM tips
WHERE time = ‘Dinner’
LIMIT ۵;

———

tips[tips[‘time’== ‘Dinner’].head(۵)

————

۳) Multiple conditions

— tips of more than $5.00 at Dinner meals

SELECT *
FROM tips
WHERE time = ‘Dinner’ AND tip > ۵.۰۰;

—–

tips[(tips[‘time’== ‘Dinner’& (tips[‘tip’> ۵.۰۰)]

SELECT *
FROM frame
WHERE col2 IS NULL;

frame[frame[‘col2’].isna()]

————————–

۴) Group By 

SELECT sex, count(*)
FROM tips
GROUP BY sex;

———

tips.groupby(‘sex’).size()

sex

Female 87

Male 157

dtype: int64

Notice that in the pandas code we used size() and not count(). This is because count() applies the function to each column, returning the number of not null records within each.

tips.groupby(‘sex’).count()

        total_bill tip smoker day time size
sex
Female        ۸۷   ۸۷   ۸۷     ۸۷   ۸۷   ۸۷
Male         ۱۵۷   ۱۵۷  ۱۵۷   ۱۵۷  ۱۵۷  ۱۵۷

Alternatively, we could have applied the count() method to an individual column:

tips.groupby(‘sex’)[‘total_bill’].count()

Sex

Female 87

Male 157

Name: total_bill, dtype: int64

————–

SELECT dayAVG(tip), COUNT(*)
FROM tips
GROUP BY day;

—-

tips.groupby(‘day’).agg({‘tip’: np.mean, ‘day’: np.size})

——–

SELECT smoker, dayCOUNT(*), AVG(tip)
FROM tips
GROUP BY smoker, day;

—-

tips.groupby([‘smoker’‘day’]).agg({‘tip’: [np.size, np.mean]})

INNER JOIN

In [22]: df1 = pd.DataFrame({‘key’: [‘A’‘B’‘C’‘D’],
….: ‘value’: np.random.randn(۴)})

….:

In [23]: df2 = pd.DataFrame({‘key’: [‘B’‘D’‘D’‘E’],
….: ‘value’: np.random.randn(۴)})

SELECT *
FROM df1
INNER JOIN df2
ON df1.key = df2.key;

pd.merge(df1, df2, on=‘key’)

merge() also offers parameters for cases when you’d like to join one DataFrame’s column with another DataFrame’s index.

indexed_df2 = df2.set_index(‘key’)

pd.merge(df1, indexed_df2, left_on=‘key’, right_index=True)

LEFT OUTER JOIN

SELECT *
FROM df1
LEFT OUTER JOIN df2
ON df1.key = df2.key;

pd.merge(df1, df2, on=‘key’, how=‘left’)

RIGHT JOIN

SELECT *
FROM df1
RIGHT OUTER JOIN df2
ON df1.key = df2.key;

pd.merge(df1, df2, on=‘key’, how=‘right’)

FULL JOIN

SELECT *
FROM df1
FULL OUTER JOIN df2
ON df1.key = df2.key;

pd.merge(df1, df2, on=‘key’, how=‘outer’)

UNION

SELECT city, rank
FROM df1
UNION ALL
SELECT city, rank
FROM df2;

pd.concat([df1, df2])

SQL’s UNION is similar to UNION ALL, however UNION will remove duplicate rows.

SELECT city, rank
FROM df1
UNION
SELECT city, rank
FROM df2;

pd.concat([df1, df2]).drop_duplicates()

Top N rows with offset

SELECT * FROM tips
ORDER BY tip DESC
LIMIT ۱۰ OFFSET ۵;

tips.nlargest(۱۰ + ۵, columns=‘tip’).tail(۱۰)

Top N rows per group

— Oracle’s ROW_NUMBER() analytic function

SELECT * FROM (
SELECT
t.*,
ROW_NUMBER() OVER(PARTITION BY day ORDER BY total_bill DESCAS rn
FROM tips t
)
WHERE rn < ۳
ORDER BY day, rn;

(tips.assign(rn=tips.sort_values([‘total_bill’], ascending=False).groupby([‘day’]).cumcount() + ۱) .query(‘rn < 3’).sort_values([‘day’‘rn’]))

UPDATE

UPDATE tips
SET tip = tip*۲
WHERE tip < ۲;

tips.loc[tips[‘tip’< ۲‘tip’*= ۲

DELETE

DELETE FROM tips
WHERE tip > ۹;

tips = tips.loc[tips[‘tip’<= ۹]

دیدگاهتان را بنویسید

نشانی ایمیل شما منتشر نخواهد شد. بخش‌های موردنیاز علامت‌گذاری شده‌اند *

Time limit is exhausted. Please reload CAPTCHA.