Python-c06-udpate data

1
2
3
4
5
people = {
"first": ["Corey", 'Jane', 'John'],
"last": ["Schafer", 'Doe', 'Doe'],
"email": ["CoreyMSchafer@gmail.com", 'JaneDoe@email.com', 'JohnDoe@email.com']
}
1
import pandas as pd
1
df = pd.DataFrame(people)
1
df.columns

Index([‘first’, ‘last’, ‘email’], dtype=’object’)

1
2
# change all of the column names
df.columns=['first_name', 'last_name', 'email']
1
df

first_name last_name email
0 Corey Schafer CoreyMSchafer@gmail.com
1 Jane Doe JaneDoe@email.com
2 John Doe JohnDoe@email.com
1
2
# change all column names to upper case
df.columns = [x.upper() for x in df.columns]
1
df

FIRST_NAME LAST_NAME EMAIL
0 Corey Schafer CoreyMSchafer@gmail.com
1 Jane Doe JaneDoe@email.com
2 John Doe JohnDoe@email.com
1
2
3
# replace _ by space in all of the column names and vice versa
df.columns = df.columns.str.replace('_', ' ')
df

FIRST NAME LAST NAME EMAIL
0 Corey Schafer CoreyMSchafer@gmail.com
1 Jane Doe JaneDoe@email.com
2 John Doe JohnDoe@email.com
1
2
3
df.columns = df.columns.str.replace(' ', '_')
df.columns = [x.lower() for x in df.columns]
df

first_name last_name email
0 Corey Schafer CoreyMSchafer@gmail.com
1 Jane Doe JaneDoe@email.com
2 John Doe JohnDoe@email.com
1
2
3
# change only some columns: pass a dictionary for the key as the old name and the value as the new name
df.rename(columns={'first_name': 'first', 'last_name': 'last'}, inplace=True)
df

first last email
0 Corey Schafer CoreyMSchafer@gmail.com
1 Jane Doe JaneDoe@email.com
2 John Doe JohnDoe@email.com
1
# change the data values
1
2
3
# change all column values for a specific row
df.loc[2] = ['John', 'Smith', 'JohnSmith@email.com']
df

first last email
0 Corey Schafer CoreyMSchafer@gmail.com
1 Jane Doe JaneDoe@email.com
2 John Smith JohnSmith@email.com
1
2
3
# change only some columns in the row 2
df.loc[2, ['last', 'email']] = ['Smith', 'JohnSmith@email.com']
df

first last email
0 Corey Schafer CoreyMSchafer@gmail.com
1 Jane Doe JaneDoe@email.com
2 John Smith JohnSmith@email.com
1
2
3
# or we can also use at to change a specific column value
df.at[2, 'last'] = 'Doe'
df

first last email
0 Corey Schafer CoreyMSchafer@gmail.com
1 Jane Doe JaneDoe@email.com
2 John Doe JohnSmith@email.com
1
2
3
4
# change value with filter: we have to use loc
filt = (df['email'] == 'JohnSmith@email.com')
df.loc[filt, 'last'] = 'Smith'
df

first last email
0 Corey Schafer CoreyMSchafer@gmail.com
1 Jane Doe JaneDoe@email.com
2 John Smith JohnSmith@email.com
1
2
3
# change email to lower case
df['email'] = df['email'].str.lower()
df

first last email
0 Corey Schafer coreymschafer@gmail.com
1 Jane Doe janedoe@email.com
2 John Smith johnsmith@email.com
1
2
3
# apply
# return the email length of each row
df['email'].apply(len)

0 23
1 17
2 19
Name: email, dtype: int64

1
2
3
4
5
def update_email(email):
return email.upper()

df['email'] = df['email'].apply(update_email)
df

first last email
0 Corey Schafer COREYMSCHAFER@GMAIL.COM
1 Jane Doe JANEDOE@EMAIL.COM
2 John Smith JOHNSMITH@EMAIL.COM
1
2
3
# use lambda function
df['email'] = df['email'].apply(lambda x: x.lower())
df

first last email
0 Corey Schafer coreymschafer@gmail.com
1 Jane Doe janedoe@email.com
2 John Smith johnsmith@email.com
1
2
3
# run apply on a dataframe
# this gives us the value count of each column
df.apply(len)

first 3
last 3
email 3
dtype: int64

1
2
# this gives us the column count of each row
df.apply(len, axis='columns')

0 3
1 3
2 3
dtype: int64

1
2
# get the min of each column
df.apply(pd.Series.min)

first Corey
last Doe
email coreymschafer@gmail.com
dtype: object

1
2
# applymap works only on every element of the DataFrame
df.applymap(len)

first last email
0 5 7 23
1 4 3 17
2 4 5 19
1
2
3
# map works on the Series
# replace Corey by Chris, Jane by Mary and put NaN to email
df['first'].map({'Corey': 'Chris', 'Jane': 'Mary'})

0 Chris
1 Mary
2 NaN
Name: first, dtype: object

1
2
# this replace just the first and last, but not the email
df['first'].replace({'Corey': 'Chris', 'Jane': 'Mary'})

0 Chris
1 Mary
2 John
Name: first, dtype: object