2022-10-10

Python-c06-udpate data

people = {
    "first": ["Corey", 'Jane', 'John'], 
    "last": ["Schafer", 'Doe', 'Doe'], 
    "email": ["CoreyMSchafer@gmail.com", 'JaneDoe@email.com', 'JohnDoe@email.com']
}

1	import pandas as pd

1	df = pd.DataFrame(people)

1	df.columns

Index([‘first’, ‘last’, ‘email’], dtype=’object’)

1 2	# change all of the column names df.columns=['first_name', 'last_name', 'email']

df

	first_name	last_name	email
0	Corey	Schafer	CoreyMSchafer@gmail.com
1	Jane	Doe	JaneDoe@email.com
2	John	Doe	JohnDoe@email.com

1 2	# change all column names to upper case df.columns = [x.upper() for x in df.columns]

df

	FIRST_NAME	LAST_NAME	EMAIL
0	Corey	Schafer	CoreyMSchafer@gmail.com
1	Jane	Doe	JaneDoe@email.com
2	John	Doe	JohnDoe@email.com

1
2
3

# replace _ by space in all of the column names and vice versa
df.columns = df.columns.str.replace('_', ' ')
df

	FIRST NAME	LAST NAME	EMAIL
0	Corey	Schafer	CoreyMSchafer@gmail.com
1	Jane	Doe	JaneDoe@email.com
2	John	Doe	JohnDoe@email.com

1
2
3

df.columns = df.columns.str.replace(' ', '_')
df.columns = [x.lower() for x in df.columns]
df

	first_name	last_name	email
0	Corey	Schafer	CoreyMSchafer@gmail.com
1	Jane	Doe	JaneDoe@email.com
2	John	Doe	JohnDoe@email.com

1
2
3

# change only some columns: pass a dictionary for the key as the old name and the value as the new name
df.rename(columns={'first_name': 'first', 'last_name': 'last'}, inplace=True)
df

	first	last	email
0	Corey	Schafer	CoreyMSchafer@gmail.com
1	Jane	Doe	JaneDoe@email.com
2	John	Doe	JohnDoe@email.com

1	# change the data values

1
2
3

# change all column values for a specific row
df.loc[2] = ['John', 'Smith', 'JohnSmith@email.com']
df

	first	last	email
0	Corey	Schafer	CoreyMSchafer@gmail.com
1	Jane	Doe	JaneDoe@email.com
2	John	Smith	JohnSmith@email.com

1
2
3

# change only some columns in the row 2
df.loc[2, ['last', 'email']] = ['Smith', 'JohnSmith@email.com']
df

	first	last	email
0	Corey	Schafer	CoreyMSchafer@gmail.com
1	Jane	Doe	JaneDoe@email.com
2	John	Smith	JohnSmith@email.com

1
2
3

# or we can also use at to change a specific column value
df.at[2, 'last'] = 'Doe'
df

	first	last	email
0	Corey	Schafer	CoreyMSchafer@gmail.com
1	Jane	Doe	JaneDoe@email.com
2	John	Doe	JohnSmith@email.com

# change value with filter: we have to use loc
filt = (df['email'] == 'JohnSmith@email.com')
df.loc[filt, 'last'] = 'Smith'
df

	first	last	email
0	Corey	Schafer	CoreyMSchafer@gmail.com
1	Jane	Doe	JaneDoe@email.com
2	John	Smith	JohnSmith@email.com

1
2
3

# change email to lower case
df['email'] = df['email'].str.lower()
df

	first	last	email
0	Corey	Schafer	coreymschafer@gmail.com
1	Jane	Doe	janedoe@email.com
2	John	Smith	johnsmith@email.com

1
2
3

# apply
# return the email length of each row
df['email'].apply(len)

0 23
1 17
2 19
Name: email, dtype: int64

def update_email(email):
    return email.upper()

df['email'] = df['email'].apply(update_email)
df

	first	last	email
0	Corey	Schafer	COREYMSCHAFER@GMAIL.COM
1	Jane	Doe	JANEDOE@EMAIL.COM
2	John	Smith	JOHNSMITH@EMAIL.COM

1
2
3

# use lambda function
df['email'] = df['email'].apply(lambda x: x.lower())
df

	first	last	email
0	Corey	Schafer	coreymschafer@gmail.com
1	Jane	Doe	janedoe@email.com
2	John	Smith	johnsmith@email.com

1
2
3

# run apply on a dataframe
# this gives us the value count of each column
df.apply(len)

first 3
last 3
email 3
dtype: int64

1 2	# this gives us the column count of each row df.apply(len, axis='columns')

0 3
1 3
2 3
dtype: int64

1 2	# get the min of each column df.apply(pd.Series.min)

first Corey
last Doe
email coreymschafer@gmail.com
dtype: object

1 2	# applymap works only on every element of the DataFrame df.applymap(len)

	first	last	email
0	5	7	23
1	4	3	17
2	4	5	19

1
2
3

# map works on the Series
# replace Corey by Chris, Jane by Mary and put NaN to email
df['first'].map({'Corey': 'Chris', 'Jane': 'Mary'})

0 Chris
1 Mary
2 NaN
Name: first, dtype: object

1 2	# this replace just the first and last, but not the email df['first'].replace({'Corey': 'Chris', 'Jane': 'Mary'})

0 Chris
1 Mary
2 John
Name: first, dtype: object

MA Jian's Blog

Enthussiasm in developing

Python-c06-udpate data