Tip: Puedes ver este post en GitHub o ejecutarlo en Binder o Google Colab, pulsa el icono.

Conocimiento previo

Los modelos de espacio vectorial capturan el significado semántico y las relaciones entre palabras.

Diseño palabra por palabra

Matriz: cada elemento muestra el número de veces que cada palabra aparece junto a otra palabra. v=[2,1,1,0]

Diseño palabra por documento

Registra el número de veces que apareció cada palabra en el documento.

Euclidian Distance

Semejanza del coseno

Manipulación de palabras en espacios vectoriales

import pandas as pd # Library for Dataframes 
import numpy as np # Library for math functions
import pickle # Python object serialization library. Not secure

word_embeddings = pickle.load( open( "./data/word_embeddings_subset.p", "rb" ) )
len(word_embeddings) # there should be 243 words that will be used in this assignment
243
countryVector = word_embeddings['country'] # Get the vector representation for the word 'country'
print(type(countryVector)) # Print the type of the vector. Note it is a numpy array
print(countryVector) # Print the values of the vector.  
<class 'numpy.ndarray'>
[-0.08007812  0.13378906  0.14355469  0.09472656 -0.04736328 -0.02355957
 -0.00854492 -0.18652344  0.04589844 -0.08154297 -0.03442383 -0.11621094
  0.21777344 -0.10351562 -0.06689453  0.15332031 -0.19335938  0.26367188
 -0.13671875 -0.05566406  0.07470703 -0.00070953  0.09375    -0.14453125
  0.04296875 -0.01916504 -0.22558594 -0.12695312 -0.0168457   0.05224609
  0.0625     -0.1484375  -0.01965332  0.17578125  0.10644531 -0.04760742
 -0.10253906 -0.28515625  0.10351562  0.20800781 -0.07617188 -0.04345703
  0.08642578  0.08740234  0.11767578  0.20996094 -0.07275391  0.1640625
 -0.01135254  0.0025177   0.05810547 -0.03222656  0.06884766  0.046875
  0.10107422  0.02148438 -0.16210938  0.07128906 -0.16210938  0.05981445
  0.05102539 -0.05566406  0.06787109 -0.03759766  0.04345703 -0.03173828
 -0.03417969 -0.01116943  0.06201172 -0.08007812 -0.14941406  0.11914062
  0.02575684  0.00302124  0.04711914 -0.17773438  0.04101562  0.05541992
  0.00598145  0.03027344 -0.07666016 -0.109375    0.02832031 -0.10498047
  0.0100708  -0.03149414 -0.22363281 -0.03125    -0.01147461  0.17285156
  0.08056641 -0.10888672 -0.09570312 -0.21777344 -0.07910156 -0.10009766
  0.06396484 -0.11962891  0.18652344 -0.02062988 -0.02172852  0.29296875
 -0.00793457  0.0324707  -0.15136719  0.00227356 -0.03540039 -0.13378906
  0.0546875  -0.03271484 -0.01855469 -0.10302734 -0.13378906  0.11425781
  0.16699219  0.01361084 -0.02722168 -0.2109375   0.07177734  0.08691406
 -0.09960938  0.01422119 -0.18261719  0.00741577  0.01965332  0.00738525
 -0.03271484 -0.15234375 -0.26367188 -0.14746094  0.03320312 -0.03344727
 -0.01000977  0.01855469  0.00183868 -0.10498047  0.09667969  0.07910156
  0.11181641  0.13085938 -0.08740234 -0.1328125   0.05004883  0.19824219
  0.0612793   0.16210938  0.06933594  0.01281738  0.01550293  0.01531982
  0.11474609  0.02758789  0.13769531 -0.08349609  0.01123047 -0.20507812
 -0.12988281 -0.16699219  0.20410156 -0.03588867 -0.10888672  0.0534668
  0.15820312 -0.20410156  0.14648438 -0.11572266  0.01855469 -0.13574219
  0.24121094  0.12304688 -0.14550781  0.17578125  0.11816406 -0.30859375
  0.10888672 -0.22363281  0.19335938 -0.15722656 -0.07666016 -0.09082031
 -0.19628906 -0.23144531 -0.09130859 -0.14160156  0.06347656  0.03344727
 -0.03369141  0.06591797  0.06201172  0.3046875   0.16796875 -0.11035156
 -0.03833008 -0.02563477 -0.09765625  0.04467773 -0.0534668   0.11621094
 -0.15039062 -0.16308594 -0.15527344  0.04638672  0.11572266 -0.06640625
 -0.04516602  0.02331543 -0.08105469 -0.0255127  -0.07714844  0.0016861
  0.15820312  0.00994873 -0.06445312  0.15722656 -0.03112793  0.10644531
 -0.140625    0.23535156 -0.11279297  0.16015625  0.00061798 -0.1484375
  0.02307129 -0.109375    0.05444336 -0.14160156  0.11621094  0.03710938
  0.14746094 -0.04199219 -0.01391602 -0.03881836  0.02783203  0.10205078
  0.07470703  0.20898438 -0.04223633 -0.04150391 -0.00588989 -0.14941406
 -0.04296875 -0.10107422 -0.06176758  0.09472656  0.22265625 -0.02307129
  0.04858398 -0.15527344 -0.02282715 -0.04174805  0.16699219 -0.09423828
  0.14453125  0.11132812  0.04223633 -0.16699219  0.10253906  0.16796875
  0.12597656 -0.11865234 -0.0213623  -0.08056641  0.24316406  0.15527344
  0.16503906  0.00854492 -0.12255859  0.08691406 -0.11914062 -0.02941895
  0.08349609 -0.03100586  0.13964844 -0.05151367  0.00765991 -0.04443359
 -0.04980469 -0.03222656 -0.00952148 -0.10888672 -0.10302734 -0.15722656
  0.19335938  0.04858398  0.015625   -0.08105469 -0.11621094 -0.01989746
  0.05737305  0.06103516 -0.14550781  0.06738281 -0.24414062 -0.07714844
  0.04760742 -0.07519531 -0.14941406 -0.04418945  0.09716797  0.06738281]
def vec(w):
    return word_embeddings[w]

Visualizar los "word embeddings" de algunas palabras

Word embbedings = representaciones de las palabras o frases del lenguaje natural como vectores de números reales

import matplotlib.pyplot as plt # Import matplotlib
%matplotlib inline

words = ['oil', 'gas', 'happy', 'sad', 'city', 'town', 'village', 'country', 'continent', 'petroleum', 'joyful']

bag2d = np.array([vec(word) for word in words]) # Convert each word to its vector representation

fig, ax = plt.subplots(figsize = (10, 10)) # Create custom size image

col1 = 3 # Select the column for the x axis
col2 = 2 # Select the column for the y axis

# Print an arrow for each word
for word in bag2d:
    ax.arrow(0, 0, word[col1], word[col2], head_width=0.005, head_length=0.005, fc='r', ec='r', width = 1e-5)

    
ax.scatter(bag2d[:, col1], bag2d[:, col2]); # Plot a dot for each word

# Add the word label over each dot in the scatter plot
for i in range(0, len(words)):
    ax.annotate(words[i], (bag2d[i, col1], bag2d[i, col2]))


plt.show()

Distancia entre palabras

words = ['sad', 'happy', 'town', 'village']

bag2d = np.array([vec(word) for word in words]) # Convert each word to its vector representation

fig, ax = plt.subplots(figsize = (10, 10)) # Create custom size image

col1 = 3 # Select the column for the x axe
col2 = 2 # Select the column for the y axe

# Print an arrow for each word
for word in bag2d:
    ax.arrow(0, 0, word[col1], word[col2], head_width=0.0005, head_length=0.0005, fc='r', ec='r', width = 1e-5)
    
# print the vector difference between village and town
village = vec('village')
town = vec('town')
diff = town - village
ax.arrow(village[col1], village[col2], diff[col1], diff[col2], fc='b', ec='b', width = 1e-5)

# print the vector difference between village and town
sad = vec('sad')
happy = vec('happy')
diff = happy - sad
ax.arrow(sad[col1], sad[col2], diff[col1], diff[col2], fc='b', ec='b', width = 1e-5)


ax.scatter(bag2d[:, col1], bag2d[:, col2]); # Plot a dot for each word

# Add the word label over each dot in the scatter plot
for i in range(0, len(words)):
    ax.annotate(words[i], (bag2d[i, col1], bag2d[i, col2]))


plt.show()

Aplicar suma y resta de vectores

keys = word_embeddings.keys()
data = []
for key in keys:
    data.append(word_embeddings[key])

embedding = pd.DataFrame(data=data, index=keys)
# Define a function to find the closest word to a vector:
def find_closest_word(v, k = 1):
    # Calculate the vector difference from each word to the input vector
    diff = embedding.values - v 
    # Get the norm of each difference vector. 
    # It means the squared euclidean distance from each word to the input vector
    delta = np.sum(diff * diff, axis=1)
    # Find the index of the minimun distance in the array
    i = np.argmin(delta)
    # Return the row name for this item
    return embedding.iloc[i].name
embedding.head(10)
0 1 2 3 4 5 6 7 8 9 ... 290 291 292 293 294 295 296 297 298 299
country -0.080078 0.133789 0.143555 0.094727 -0.047363 -0.023560 -0.008545 -0.186523 0.045898 -0.081543 ... -0.145508 0.067383 -0.244141 -0.077148 0.047607 -0.075195 -0.149414 -0.044189 0.097168 0.067383
city -0.010071 0.057373 0.183594 -0.040039 -0.029785 -0.079102 0.071777 0.013306 -0.143555 0.011292 ... 0.024292 -0.168945 -0.062988 0.117188 -0.020508 0.030273 -0.247070 -0.122559 0.076172 -0.234375
China -0.073242 0.135742 0.108887 0.083008 -0.127930 -0.227539 0.151367 -0.045654 -0.065430 0.034424 ... 0.140625 0.087402 0.152344 0.079590 0.006348 -0.037842 -0.183594 0.137695 0.093750 -0.079590
Iraq 0.191406 0.125000 -0.065430 0.060059 -0.285156 -0.102539 0.117188 -0.351562 -0.095215 0.200195 ... -0.100586 -0.077148 -0.123047 0.193359 -0.153320 0.089355 -0.173828 -0.054688 0.302734 0.105957
oil -0.139648 0.062256 -0.279297 0.063965 0.044434 -0.154297 -0.184570 -0.498047 0.047363 0.110840 ... -0.195312 -0.345703 0.217773 -0.091797 0.051025 0.061279 0.194336 0.204102 0.235352 -0.051025
town 0.123535 0.159180 0.030029 -0.161133 0.015625 0.111816 0.039795 -0.196289 -0.039307 0.067871 ... -0.007935 -0.091797 -0.265625 0.029297 0.089844 -0.049805 -0.202148 -0.079590 0.068848 -0.164062
Canada -0.136719 -0.154297 0.269531 0.273438 0.086914 -0.076172 -0.018677 0.006256 0.077637 -0.211914 ... 0.105469 0.030762 -0.039307 0.183594 -0.117676 0.191406 0.074219 0.020996 0.285156 -0.257812
London -0.267578 0.092773 -0.238281 0.115234 -0.006836 0.221680 -0.251953 -0.055420 0.020020 0.149414 ... -0.008667 -0.008484 -0.053223 0.197266 -0.296875 0.064453 0.091797 0.058350 0.022583 -0.101074
England -0.198242 0.115234 0.062500 -0.058350 0.226562 0.045898 -0.062256 -0.202148 0.080566 0.021606 ... 0.135742 0.109375 -0.121582 0.008545 -0.171875 0.086914 0.070312 0.003281 0.069336 0.056152
Australia 0.048828 -0.194336 -0.041504 0.084473 -0.114258 -0.208008 -0.164062 -0.269531 0.079102 0.275391 ... 0.021118 0.171875 0.042236 0.221680 -0.239258 -0.106934 0.030884 0.006622 0.051270 -0.135742

10 rows × 300 columns

capital = vec('France') - vec('Paris')
country = vec('Madrid') + capital
find_closest_word(country)
'Spain'

Análisis de componentes principales - Principal component analysis (PCA)

  • Algoritmo de aprendizaje no supervisado que se puede utilizar para reducir la dimensión de sus datos.
  • El modelo colapsa los datos a través de los componentes principales.

  • Utiliza transformaciones ortogonales para mapear un conjunto de variables en un conjunto de variables linealmente no correlacionadas llamadas componentes principales

  • PCA se basa en la descomposición de valor singular (SVD) de la matriz de covarianza del conjunto de datos original.

+info en wikipedia

+info en este blog)

  • Vector propio (Eigenvector): los vectores resultantes, también conocidos como características no correlacionadas de sus datos = matriz de rotación

  • Valor propio (Eigenvalue): la cantidad de información retenida por cada característica nueva. Puedes pensar en ello como la varianza en el vector propio.

  • Además, cada valor propio tiene un vector propio correspondiente. El valor propio te dice cuánta varianza hay en el vector propio.

Pasos para calcular PCA:

  • Halla la media para normalizar sus datos
  • Calcular la matriz de covarianza
  • Calcule SVD en su matriz de covarianza. Esto devuelve [U S V] = svd(Σ). Las tres matrices U, S, V se dibujan arriba. U está etiquetado con vectores propios y S está etiquetado con valores propios.

  • Luego puede usar las primeras n columnas del vector U para obtener sus nuevos datos multiplicando XU[:, 0:n].

import matplotlib.pyplot as plt            # library for visualization
from sklearn.decomposition import PCA      # PCA library
import math                                # Library for math functions
import random                              # Library for pseudo random numbers
import matplotlib.lines as mlines
import matplotlib.transforms as mtransforms
np.random.seed(100)

std1 = 1     # The desired standard deviation of our first random variable
std2 = 0.333 # The desired standard deviation of our second random variable

x = np.random.normal(0, std1, 1000) # Get 1000 samples from x ~ N(0, std1)
y = np.random.normal(0, std2, 1000)  # Get 1000 samples from y ~ N(0, std2)
#y = y + np.random.normal(0,1,1000)*noiseLevel * np.sin(0.78)

# PCA works better if the data is centered
x = x - np.mean(x) # Center x 
y = y - np.mean(y) # Center y

#Define a pair of dependent variables with a desired amount of covariance
n = 1 # Magnitude of covariance. 
angle = np.arctan(1 / n) # Convert the covariance to and angle
print('angle: ',  angle * 180 / math.pi)

# Create a rotation matrix using the given angle
rotationMatrix = np.array([[np.cos(angle), np.sin(angle)],
                 [-np.sin(angle), np.cos(angle)]])


print('rotationMatrix')
print(rotationMatrix)

xy = np.concatenate(([x] , [y]), axis=0).T # Create a matrix with columns x and y

# Transform the data using the rotation matrix. It correlates the two variables
data = np.dot(xy, rotationMatrix) # Return a nD array

# Print the rotated data
plt.scatter(data[:,0], data[:,1])
plt.show()
angle:  45.0
rotationMatrix
[[ 0.70710678  0.70710678]
 [-0.70710678  0.70710678]]
# result of the PCA in the same plot alongside with the 2 Principal 
# Component vectors in red and blue

plt.scatter(data[:,0], data[:,1]) # Print the original data in blue

# Apply PCA. In theory, the Eigenvector matrix must be the 
# inverse of the original rotationMatrix. 
pca = PCA(n_components=2)  # Instantiate a PCA. Choose to get 2 output variables

# Create the transformation model for this data. Internally it gets the rotation 
# matrix and the explained variance
pcaTr = pca.fit(data)

# Create an array with the transformed data
dataPCA = pcaTr.transform(data)

print('Eigenvectors or principal component: First row must be in the direction of [1, n]')
print(pcaTr.components_)

print()
print('Eigenvalues or explained variance')
print(pcaTr.explained_variance_)

# Print the rotated data
plt.scatter(dataPCA[:,0], dataPCA[:,1])

# Plot the first component axe. Use the explained variance to scale the vector
plt.plot([0, rotationMatrix[0][0] * std1 * 3], [0, rotationMatrix[0][1] * std1 * 3], 'k-', color='red')
# Plot the second component axe. Use the explained variance to scale the vector
plt.plot([0, rotationMatrix[1][0] * std2 * 3], [0, rotationMatrix[1][1] * std2 * 3], 'k-', color='green')

plt.show()
Eigenvectors or principal component: First row must be in the direction of [1, n]
[[ 0.70827652  0.7059351 ]
 [-0.7059351   0.70827652]]

Eigenvalues or explained variance
[1.09488457 0.11243237]
C:\Users\juan_\AppData\Local\Temp/ipykernel_18540/2727426745.py:29: UserWarning: color is redundantly defined by the 'color' keyword argument and the fmt string "k-" (-> color='k'). The keyword argument will take precedence.
  plt.plot([0, rotationMatrix[0][0] * std1 * 3], [0, rotationMatrix[0][1] * std1 * 3], 'k-', color='red')
C:\Users\juan_\AppData\Local\Temp/ipykernel_18540/2727426745.py:31: UserWarning: color is redundantly defined by the 'color' keyword argument and the fmt string "k-" (-> color='k'). The keyword argument will take precedence.
  plt.plot([0, rotationMatrix[1][0] * std2 * 3], [0, rotationMatrix[1][1] * std2 * 3], 'k-', color='green')
  • La matriz de rotación utilizada para crear nuestras variables correlacionadas tomó las variables originales no correlacionadas x e y y las transformó en los puntos azules.
  • La transformación PCA descubre la matriz de rotación utilizada para crear nuestras variables correlacionadas (puntos azules).
  • Usando el modelo PCA para transformar nuestros datos, vuelve a colocar las variables como nuestras variables originales no correlacionadas.

PCA como estrategia para la reducción de la dimensionalidad

Los primeros componentes conservan la mayor parte del poder de los datos para explicar los patrones que generalizan los datos.

PCA como estrategia para mostra graficos de datos complejos

Las imágenes en bruto se componen de cientos o incluso miles de características. Sin embargo, PCA nos permite reducir tantas funciones a solo dos. En ese espacio reducido de variables no correlacionadas, podemos separar fácilmente perros y gatos.

Predecir relaciones entre palabras

Ejemplo: capitales de países

def get_vectors(embeddings, words):
    """
    Input:
        embeddings: a word 
        fr_embeddings:
        words: a list of words
    Output: 
        X: a matrix where the rows are the embeddings corresponding to the rows on the list
        
    """
    m = len(words)
    X = np.zeros((1, 300))
    for word in words:
        english = word
        eng_emb = embeddings[english]
        X = np.row_stack((X, eng_emb))
    X = X[1:,:]
    return X
data = pd.read_csv('./data/capitals.txt', delimiter=' ')
data.columns = ['city1', 'country1', 'city2', 'country2']

# print first five elements in the DataFrame
data.head(5)
city1 country1 city2 country2
0 Athens Greece Bangkok Thailand
1 Athens Greece Beijing China
2 Athens Greece Berlin Germany
3 Athens Greece Bern Switzerland
4 Athens Greece Cairo Egypt
print("dimension: {}".format(word_embeddings['Spain'].shape[0]))
dimension: 300
def cosine_similarity(A, B):
    '''
    Input:
        A: a numpy array which corresponds to a word vector
        B: A numpy array which corresponds to a word vector
    Output:
        cos: numerical number representing the cosine similarity between A and B.
    '''
    dot = np.dot(A,B)  
    norma = np.linalg.norm(A)
    normb = np.linalg.norm(B)    
    cos = dot/(norma*normb)
    return cos
king = word_embeddings['king']
queen = word_embeddings['queen']
cosine_similarity(king, queen)
0.6510957
def euclidean(A, B):
    """
    Input:
        A: a numpy array which corresponds to a word vector
        B: A numpy array which corresponds to a word vector
    Output:
        d: numerical number representing the Euclidean distance between A and B.
    """ 
    d = np.sqrt(np.sum((A-B)**2))
    return d
euclidean(king, queen)
2.4796925

Encontrar el país por la capital

def get_country(city1, country1, city2, embeddings, cosine_similarity=cosine_similarity):
    """
    Input:
        city1: a string (the capital city of country1)
        country1: a string (the country of capital1)
        city2: a string (the capital city of country2)
        # CODE REVIEW COMMENT: Embedding incomplete code comment, should add "and values are their emmbeddings"
        embeddings: a dictionary where the keys are words and
    Output:
        countries: a dictionary with the most likely country and its similarity score
    """
    group = {city1,country1,city2}   # store the city1, country 1, and city 2 in a set called group
    city1_emb = embeddings[city1]   # get embeddings of city 1
    country1_emb = embeddings[country1] # get embedding of country 1   
    city2_emb = embeddings[city2]   # get embedding of city 2

    vec = country1_emb - city1_emb + city2_emb

    # Initialize the similarity to -1 (it will be replaced by a similarities that are closer to +1)
    similarity = -1

    # initialize country to an empty string
    country = ''

    # loop through all words in the embeddings dictionary
    for word in embeddings.keys():

        # first check that the word is not already in the 'group'
        if word not in group:

            # get the word embedding
            word_emb = embeddings[word]

            # calculate cosine similarity between embedding of country 2 and the word in the embeddings dictionary
            cur_similarity = cosine_similarity(vec, word_emb)

            # if the cosine similarity is more similar than the previously best similarity...
            if cur_similarity > similarity:

                # update the similarity to the new, better similarity
                similarity = cur_similarity

                # store the country as a tuple, which contains the word and the similarity
                country = (word,similarity )

    return country
get_country('Athens', 'Greece', 'Lisbon', word_embeddings)
('Portugal', 0.70290697)

Precisión del modelo

Precisión = Número correcto. de predicciones / Número total de predicciones

def get_accuracy(word_embeddings, data, get_country=get_country):
    '''
    Input:
    # CODE REVIEW COMMENT: This comment seems incomplete it should be data: a pandas dataframe containing all the country and capital city pairs
        word_embeddings: a dictionary where the key is a word and the value is its embedding
        data: a pandas data frame as
    '''
    # initialize num correct to zero
    num_correct = 0
    # loop through the rows of the dataframe
    for i, row in data.iterrows():    
        city1 = row[0]   # get city1        
        country1 = row[1]  # get country1       
        city2 = row[2]    # get city2        
        country2 = row[3]   # get country2

        # use get_country to find the predicted country2
        predicted_country2, _ = get_country(city1, country1, city2, word_embeddings, cosine_similarity=cosine_similarity)

        # if the predicted country2 is the same as the actual country2...
        if predicted_country2 == country2:
            # increment the number of correct by 1
            num_correct += 1

    # get the number of rows in the data dataframe (length of dataframe)
    m = len(data)

    # calculate the accuracy by dividing the number correct by m
    accuracy = num_correct / m

    return accuracy
accuracy = get_accuracy(word_embeddings, data)
print(f"Accuracy is {accuracy:.2f}")
Accuracy is 0.92

Visualizar vectores usando PCA

def compute_pca(X, n_components=2):
    """
    Input:
        X: of dimension (m,n) where each row corresponds to a word vector
        n_components: Number of components you want to keep.
    Output:
        X_reduced: data transformed in 2 dims/columns + regenerated original data
    pass in: data as 2D NumPy array
    """
    # mean center the data
    X_demeaned = X - np.mean(X,  axis=0)    
    # calculate the covariance matrix
    covariance_matrix = np.cov(X_demeaned.T)    
    # calculate eigenvectors & eigenvalues of the covariance matrix
    eigen_vals , eigen_vecs  = np.linalg.eigh(covariance_matrix)    
    
    # sort eigenvalue in increasing order (get the indices from the sort)   
    idx_sorted = np.argsort(eigen_vals) 
    
    # reverse the order so that it's from highest to lowest.
    idx_sorted_decreasing = idx_sorted[::-1]
    
    # sort the eigen values by idx_sorted_decreasing
    eigen_vals_sorted = eigen_vals[idx_sorted_decreasing]
    
    # sort eigenvectors using the idx_sorted_decreasing indices    
    eigen_vecs_sorted = eigen_vecs[:,idx_sorted_decreasing]    
        
    # select the first n eigenvectors (n is desired dimension
    # of rescaled data array, or dims_rescaled_data)
    eigen_vecs_subset = eigen_vecs_sorted [:,:n_components]
        
    # transform the data by multiplying the transpose of the eigenvectors with the transpose of the de-meaned data
    # Then take the transpose of that product.
    X_reduced = np.dot(eigen_vecs_subset.T,X_demeaned.T).T

    return X_reduced
np.random.seed(1)
X = np.random.rand(3, 10)
X_reduced = compute_pca(X, n_components=2)
print("Your original matrix was " + str(X.shape) + " and it became:")
print(X_reduced)
Your original matrix was (3, 10) and it became:
[[ 0.43437323  0.49820384]
 [ 0.42077249 -0.50351448]
 [-0.85514571  0.00531064]]
words = ['oil', 'gas', 'happy', 'sad', 'city', 'town',
         'village', 'country', 'continent', 'petroleum', 'joyful']

# given a list of words and the embeddings, it returns a matrix with all the embeddings
X = get_vectors(word_embeddings, words)

print('You have 11 words each of 300 dimensions thus X.shape is:', X.shape)
You have 11 words each of 300 dimensions thus X.shape is: (11, 300)
result = compute_pca(X, 2)
plt.scatter(result[:, 0], result[:, 1])
for i, word in enumerate(words):
    plt.annotate(word, xy=(result[i, 0] - 0.05, result[i, 1] + 0.1))

plt.show()