Simple Python: How to use Pandas

# load data from csv
import pandas as pd
df = pd.read_csv("test.scv")

# get the front 3 lines
df.tail(3)

# Converting column names to lowercase
df.columns = [c.lower() for c in df.columns]

# or
# df.rename(columns=lambda x : x.lower())

# Renaming Particular Columns
df = df.rename(columns={'p': 'points', 
                        'gp': 'games',
                        'sot': 'shots_on_target',
                        'g': 'goals',
                        'ppg': 'points_per_game',
                        'a': 'assists',})

# Changing Values in a Column, salary just equal $14.0m, remome the char of '$' & 'm'
df['salary'] = df['salary'].apply(lambda x: x.strip('$m'))

# Adding a New Column, colunm name is team, colunm value is '', index is the df's index.
df['team'] = pd.Series('', index=df.index)
# or
df.insert(loc=8, column='position', value='')

# player colunm
# Cesc Fàbregas\n Midfield — Chelsea
# Saido Berahino\n Forward — West Brom
# Steven Gerrard\n Midfield — Liverpool
# <-  name    -> <-position-> <-team->, split player to three part and add to the df as new colunm
def process_player_col(text):
    name, rest = text.split('\n')
    position, team = [x.strip() for x in rest.split(' — ')]
    return pd.Series([name, team, position])

df[['player', 'team', 'position']] = df.player.apply(process_player_col)

# or 
for idx,row in df.iterrows():
    name, position, team = process_player_col(row['player'])
    df.ix[idx, 'player'], df.ix[idx, 'position'], df.ix[idx, 'team'] = name, position, team

# Applying Functions to Multiple Columns, convert the colunms to lower
cols = ['player', 'position', 'team']
df[cols] = df[cols].applymap(lambda x: x.lower())

# Counting Rows with NaNs, with the method called DataFrame.dropna()
nans = df.shape[0] - df.dropna().shape[0]
print('%d rows have missing values' % nans)

# Selecting NaN Rows
# Selecting all rows that have NaNs in the `assists` column
df[df['assists'].isnull()]

# Selecting non-NaN Rows
df[df['assists'].notnull()]

# Filling NaN Rows
# Filling NaN cells with default value 0
df.fillna(value=0, inplace=True)

# Adding an "empty" row to the DataFrame
import numpy as np
df = df.append(pd.Series(
                [np.nan]*len(df.columns), # Fill cells with NaNs
                index=df.columns),    
                ignore_index=True)

# Filling cells with data
df.loc[df.index[-1], 'player'] = 'new player'
df.loc[df.index[-1], 'salary'] = 12.3

# Sorting the DataFrame by a certain column (from highest to lowest)
df.sort('goals', ascending=False, inplace=True)

# Optional reindexing of the DataFrame after sorting
df.index = range(1,len(df.index)+1)

# Creating a dummy DataFrame with changes in the `salary` column
df_2 = df.copy()
df_2.loc[0:2, 'salary'] = [20.0, 15.0]

# Temporarily use the `player` columns as indices to 
# apply the update functions
df.set_index('player', inplace=True)
df_2.set_index('player', inplace=True)

# Update the `salary` column
df.update(other=df_2['salary'], overwrite=True)

# Reset the indices
df.reset_index(inplace=True)

# Selecting only those players that either playing for Arsenal 'or' Chelsea
df[ (df['team'] == 'arsenal') | (df['team'] == 'chelsea') ]

# Selecting forwards from Arsenal only
df[ (df['team'] == 'arsenal') & (df['position'] == 'forward') ]

# Printing Column Types
types = df.columns.to_series().groupby(df.dtypes).groups

# select string columns
df.loc[:, (df.dtypes == np.dtype('O')).values].head()

# Converting Column Types
df['salary'] = df['salary'].astype(float)