Tip: Puedes ver este post en GitHub o ejecutarlo en Binder o Google Colab, pulsa el icono.

Fuentes:
Cursos Machine Learning - Kaggle

import pandas as pd

Creación e acceso

Dataframe: Es una tabla. Index = lista de nombres de filas

tabla=pd.DataFrame({'España': [100, 150,80], 
              'Portugal': [120, 130,75],
              'Francia': [90, 105,46],
              'color': ['rojo', 'verde','rojo'],
              'id': ['a23', 'b34','a12']},
             index=['Producto A', 'Producto B', 'Producto C'])
tabla

datos = pd.read_csv("../tabla.csv")

---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
/tmp/ipykernel_2827/3781744174.py in <module>
----> 1 datos = pd.read_csv("../tabla.csv")

~/.local/lib/python3.8/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
    309                     stacklevel=stacklevel,
    310                 )
--> 311             return func(*args, **kwargs)
    312 
    313         return wrapper

~/.local/lib/python3.8/site-packages/pandas/io/parsers/readers.py in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
    584     kwds.update(kwds_defaults)
    585 
--> 586     return _read(filepath_or_buffer, kwds)
    587 
    588 

~/.local/lib/python3.8/site-packages/pandas/io/parsers/readers.py in _read(filepath_or_buffer, kwds)
    480 
    481     # Create the parser.
--> 482     parser = TextFileReader(filepath_or_buffer, **kwds)
    483 
    484     if chunksize or iterator:

~/.local/lib/python3.8/site-packages/pandas/io/parsers/readers.py in __init__(self, f, engine, **kwds)
    809             self.options["has_index_names"] = kwds["has_index_names"]
    810 
--> 811         self._engine = self._make_engine(self.engine)
    812 
    813     def close(self):

~/.local/lib/python3.8/site-packages/pandas/io/parsers/readers.py in _make_engine(self, engine)
   1038             )
   1039         # error: Too many arguments for "ParserBase"
-> 1040         return mapping[engine](self.f, **self.options)  # type: ignore[call-arg]
   1041 
   1042     def _failover_to_python(self):

~/.local/lib/python3.8/site-packages/pandas/io/parsers/c_parser_wrapper.py in __init__(self, src, **kwds)
     49 
     50         # open handles
---> 51         self._open_handles(src, kwds)
     52         assert self.handles is not None
     53 

~/.local/lib/python3.8/site-packages/pandas/io/parsers/base_parser.py in _open_handles(self, src, kwds)
    220         Let the readers open IOHandles after they are done with their potential raises.
    221         """
--> 222         self.handles = get_handle(
    223             src,
    224             "r",

~/.local/lib/python3.8/site-packages/pandas/io/common.py in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
    699         if ioargs.encoding and "b" not in ioargs.mode:
    700             # Encoding
--> 701             handle = open(
    702                 handle,
    703                 ioargs.mode,

FileNotFoundError: [Errno 2] No such file or directory: '../tabla.csv'

Series: Es un Dataframe de una columna

pd.Series([1, 2, 3, 4, 5]) # el indice de fila empieza por 0

0    1
1    2
2    3
3    4
4    5
dtype: int64

columna= pd.Series([100, 150,80], index=['Producto A', 'Producto B', 'Producto C'], name='España')
columna

Producto A    100
Producto B    150
Producto C     80
Name: España, dtype: int64

tabla.shape,columna.shape

((3, 5), (3,))

tabla.head()

tabla.España # =  tabla['España']

Producto A    100
Producto B    150
Producto C     80
Name: España, dtype: int64

tabla['España'][0]

100

tabla.iloc[0,:]

España       100
Portugal     120
Francia       90
color       rojo
id           a23
Name: Producto A, dtype: object

tabla.iloc[[0, 1], [1, 2]] # 0:10 = 0..9

tabla.loc[:,'Portugal']   # 0:10 = 0..10

Producto A    120
Producto B    130
Producto C     75
Name: Portugal, dtype: int64

tabla.set_index('id')  # cambia la columna indice de la tabla

tabla.color == 'rojo'

Producto A     True
Producto B    False
Producto C     True
Name: color, dtype: bool

tabla.loc[(tabla.color == 'rojo') & (tabla.Portugal < 120)]

tabla.loc[tabla.color.isin(['rojo'])]

tabla.loc[tabla.Francia.notnull()]

tabla['Francia'][1]=20
tabla

/tmp/ipykernel_2827/923708451.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tabla['Francia'][1]=20

tabla['id']= range(len(tabla),0,-1)
tabla

Funciones

tabla.describe()

print(tabla.España.mean())
print(tabla.color.unique())

110.0
['rojo' 'verde']

tabla.color.value_counts()

rojo     2
verde    1
Name: color, dtype: int64

tabla.Portugal.map(lambda p:p - tabla.Portugal.mean())

Producto A    11.666667
Producto B    21.666667
Producto C   -33.333333
Name: Portugal, dtype: float64

tabla.Portugal - tabla.Portugal.mean()

Producto A    11.666667
Producto B    21.666667
Producto C   -33.333333
Name: Portugal, dtype: float64

def remean_Portugal(row):
    row.Portugal = row.Portugal - tabla.Portugal.mean()
    return row

tabla.apply(remean_Portugal, axis='columns')

Agrupaciones

tabla.groupby('color').Portugal.min()

color
rojo      75
verde    130
Name: Portugal, dtype: int64

tabla.groupby('color').apply(lambda df: df.Portugal.iloc[0])

color
rojo     120
verde    130
dtype: int64

tabla.groupby(['color']).Portugal.agg([len, min, max])

tabla_revisada = tabla.groupby(['color', 'id']).España.agg([max])
tabla_revisada

tabla_revisada.reset_index()

Ordenaciones

tabla_revisada.sort_values(by='max', ascending=False)

tabla.sort_index()

tabla.sort_values(by=['España', 'id'])

Valores faltantes

tabla.España.dtype, tabla.color.dtype

(dtype('int64'), dtype('O'))

tabla.dtypes

España       int64
Portugal     int64
Francia      int64
color       object
id           int64
dtype: object

tabla.Francia.astype('float64')   # cambiar el tipo de dato

Producto A    90.0
Producto B    20.0
Producto C    46.0
Name: Francia, dtype: float64

tabla[pd.isnull(tabla.Francia)]

tabla.España.fillna("Unknown")

tabla.color.replace("rojo", "amarillo")

Producto A    amarillo
Producto B       verde
Producto C    amarillo
Name: color, dtype: object

Renombrar

tabla.rename(columns={'Francia': 'Alemania'})

tabla.rename(index={'Producto A': 'Producto D'})

tabla.rename_axis("Productos", axis='rows').rename_axis("Paises", axis='columns')

Combinaciones

canadian_youtube = pd.read_csv("../input/youtube-new/CAvideos.csv")
british_youtube = pd.read_csv("../input/youtube-new/GBvideos.csv")

pd.concat([canadian_youtube, british_youtube])

left = canadian_youtube.set_index(['title', 'trending_date'])
right = british_youtube.set_index(['title', 'trending_date'])

left.join(right, lsuffix='_CAN', rsuffix='_UK')

	España	Portugal	Francia	id
count	3.000000	3.000000	3.000000	3.0
mean	110.000000	108.333333	52.000000	2.0
std	36.055513	29.297326	35.383612	1.0
min	80.000000	75.000000	20.000000	1.0
25%	90.000000	97.500000	33.000000	1.5
50%	100.000000	120.000000	46.000000	2.0
75%	125.000000	125.000000	68.000000	2.5
max	150.000000	130.000000	90.000000	3.0

	España	Portugal	Francia	color	id
Producto A	100	120	90	rojo	a23
Producto B	150	130	105	verde	b34
Producto C	80	75	46	rojo	a12

	España	Portugal	Francia	color	id
Producto A	100	11.666667	90	rojo	3
Producto B	150	21.666667	20	verde	2
Producto C	80	-33.333333	46	rojo	1

	España	Portugal	Francia	color	id
Producto D	100	120	90	rojo	3
Producto B	150	130	20	verde	2
Producto C	80	75	46	rojo	1

Paises	España	Portugal	Francia	color	id
Productos
Producto A	100	120	90	rojo	3
Producto B	150	130	20	verde	2
Producto C	80	75	46	rojo	1