Examples - Person Search API
Code examples and walkthroughs using the Person Search API
Examples
All code is in Python, cURL, Ruby, Go and JavaScript.
Basic Usage
"I want to make a query and save the results to a file."
import json
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Create a client, specifying an API key
client = PDLPY(
api_key="YOUR API KEY",
)
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"location_country": "mexico"}},
{"term": {"job_title_role": "health"}},
{"exists": {"field": "phone_numbers"}}
]
}
}
}
PARAMS = {
'query': ES_QUERY,
'size': 10,
'pretty': True
}
response = client.person.search(**PARAMS).json()
if response["status"] == 200:
data = response['data']
with open("my_pdl_search.jsonl", "w") as out:
for record in data:
out.write(json.dumps(record) + "\n")
print(f"successfully grabbed {len(data)} records from pdl")
print(f"{response['total']} total pdl records exist matching this query")
else:
print("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
print("Error:", response)
import json
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Create a client, specifying an API key
client = PDLPY(
api_key="YOUR API KEY",
)
SQL_QUERY = \
"""
SELECT * FROM person
WHERE location_country='mexico'
AND job_title_role='health'
AND phone_numbers IS NOT NULL;
"""
PARAMS = {
'sql': SQL_QUERY,
'size': 10,
'pretty': True
}
response = client.person.search(**PARAMS).json()
if response["status"] == 200:
data = response['data']
with open("my_pdl_search.jsonl", "w") as out:
for record in data:
out.write(json.dumps(record) + "\n")
print(f"successfully grabbed {len(data)} records from pdl")
print(f"{response['total']} total pdl records exist matching this query")
else:
print("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
print("Error:", response)
# Elasticsearch
curl -X GET 'https://api.peopledatalabs.com/v5/person/search' \
-H 'X-Api-Key: xxxx' \
--data-raw '{
"size": 10,
"query": {
"bool": {
"must": [
{"term": {"location_country": "mexico"}},
{"term": {"job_title_role": "health"}},
{"exists": {"field": "phone_numbers"}}
]
}
}
}'
# SQL
curl -X GET \
'https://api.peopledatalabs.com/v5/person/search' \
-H 'X-Api-Key: xxxx' \
--data-raw '{
"size": 10,
"sql": "SELECT * FROM person WHERE location_country='\''mexico'\'' AND job_title_role='\''health'\'' AND phone_numbers IS NOT NULL;"
}'
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
import fs from 'fs';
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
const esQuery = {
query: {
bool: {
must:[
{term: {location_country: "mexico"}},
{term: {job_title_role: "health"}},
{exists: {field: "phone_numbers"}}
]
}
}
}
const params = {
searchQuery: esQuery,
size: 10,
pretty: true
}
PDLJSClient.person.search.elastic(params).then((data) => {
fs.writeFile("my_pdl_search.jsonl", Buffer.from(JSON.stringify(data.data)), (err) => {
if (err) throw err;
});
console.log(`successfully grabbed ${data.data.length} records from pdl`);
console.log(`${data["total"]} total pdl records exist matching this query`)
}).catch((error) => {
console.log("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
console.log(error);
});
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
import fs from 'fs';
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
const sqlQuery = `SELECT * FROM person
WHERE location_country='mexico'
AND job_title_role='health'
AND phone_numbers IS NOT NULL;`
const params = {
searchQuery: sqlQuery,
size: 10,
pretty: true
}
PDLJSClient.person.search.sql(params).then((data) => {
fs.writeFile("my_pdl_search.jsonl", Buffer.from(JSON.stringify(data.data)), (err) => {
if (err) throw err;
});
console.log(`successfully grabbed ${data.data.length} records from pdl`);
console.log(`${data["total"]} total pdl records exist matching this query`)
}).catch((error) => {
console.log("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
console.log(error);
});
require 'json'
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
Peopledatalabs.api_key = 'YOUR API KEY'
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"location_country": "mexico"}},
{"term": {"job_title_role": "health"}},
{"exists": {"field": "phone_numbers"}}
]
}
}
}
response = Peopledatalabs::Search.people(searchType: 'elastic', query: ES_QUERY, size: 10, pretty: true)
if response['status'] == 200
data = response['data']
File.open("my_pdl_search.jsonl", "w") do |out|
data.each { |record| out.write(JSON.dump(record) + "\n") }
end
puts "successfully grabbed #{data.length()} records from pdl"
puts "#{response['total']} total pdl records exist matching this query"
else
puts "NOTE. The carrier pigeons lost motivation in flight. See error and try again."
puts "Error: #{response}"
end
require 'json'
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
Peopledatalabs.api_key = 'YOUR API KEY'
SQL_QUERY = \
"""
SELECT * FROM person
WHERE location_country='mexico'
AND job_title_role='health'
AND phone_numbers IS NOT NULL;
"""
response = Peopledatalabs::Search.people(searchType: 'sql', query: SQL_QUERY, size: 10, pretty: true)
if response['status'] == 200
data = response['data']
File.open("my_pdl_search.jsonl", "w") do |out|
data.each { |record| out.write(JSON.dump(record) + "\n") }
end
puts "successfully grabbed #{data.length()} records from pdl"
puts "#{response['total']} total pdl records exist matching this query"
else
puts "NOTE. The carrier pigeons lost motivation in flight. See error and try again."
puts "Error: #{response}"
end
package main
import (
"fmt"
"os"
"encoding/json"
)
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
apiKey := "YOUR API KEY"
// Set API KEY as env variable
// apiKey := os.Getenv("API_KEY")
client := pdl.New(apiKey)
elasticSearchQuery := map[string]interface{} {
"query": map[string]interface{} {
"bool": map[string]interface{} {
"must": []map[string]interface{} {
{"term": map[string]interface{}{"location_country": "mexico"}},
{"term": map[string]interface{}{"job_title_role": "health"}},
{"exists": map[string]interface{}{"field": "phone_numbers"}},
},
},
},
}
params := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: 10,
Pretty: true,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
Query: elasticSearchQuery,
},
}
response, err := client.Person.Search(params)
if err == nil {
data := response.Data
out, outErr := os.Create("my_pdl_search.jsonl")
defer out.Close()
if (outErr == nil) {
for i := range data {
record, jsonErr := json.Marshal(data[i])
if (jsonErr == nil) {
out.WriteString(string(record) + "\n")
}
}
out.Sync()
}
fmt.Printf("successfully grabbed %d records from pdl\n", len(data))
fmt.Printf("%d total pdl records exist matching this query\n", response.Total)
} else {
fmt.Println("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
fmt.Println("Error:", err)
}
}
package main
import (
"fmt"
"os"
"encoding/json"
)
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
apiKey := "YOUR API KEY"
// Set API KEY as env variable
// apiKey := os.Getenv("API_KEY")
client := pdl.New(apiKey)
sqlQuery := "SELECT * FROM person" +
" WHERE location_country='mexico'" +
" AND job_title_role='health'" +
" AND phone_numbers IS NOT NULL;"
params := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: 10,
Pretty: true,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
SQL: sqlQuery,
},
}
response, err := client.Person.Search(params)
if err == nil {
data := response.Data
out, outErr := os.Create("my_pdl_search.jsonl")
defer out.Close()
if (outErr == nil) {
for i := range data {
record, jsonErr := json.Marshal(data[i])
if (jsonErr == nil) {
out.WriteString(string(record) + "\n")
}
}
out.Sync()
}
fmt.Printf("successfully grabbed %d records from pdl\n", len(data))
fmt.Printf("%d total pdl records exist matching this query\n", response.Total)
} else {
fmt.Println("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
fmt.Println("Error:", err)
}
}
import requests, json
API_KEY = # YOUR API KEY
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"location_country": "mexico"}},
{"term": {"job_title_role": "health"}},
{"exists": {"field": "phone_numbers"}}
]
}
}
}
PARAMS = {
'query': json.dumps(ES_QUERY),
'size': 10,
'pretty': True
}
response = requests.get(
PDL_URL,
headers=HEADERS,
params=PARAMS
).json()
if response["status"] == 200:
data = response['data']
with open("my_pdl_search.jsonl", "w") as out:
for record in data:
out.write(json.dumps(record) + "\n")
print(f"successfully grabbed {len(data)} records from pdl")
print(f"{response['total']} total pdl records exist matching this query")
else:
print("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
print("Error:", response)
import requests, json
API_KEY = # YOUR API KEY
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
SQL_QUERY = \
"""
SELECT * FROM person
WHERE location_country='mexico'
AND job_title_role='health'
AND phone_numbers IS NOT NULL;
"""
PARAMS = {
'sql': SQL_QUERY,
'size': 10,
'pretty': True
}
response = requests.get(
PDL_URL,
headers=HEADERS,
params=PARAMS
).json()
if response["status"] == 200:
data = response['data']
with open("my_pdl_search.jsonl", "w") as out:
for record in data:
out.write(json.dumps(record) + "\n")
print(f"successfully grabbed {len(data)} records from pdl")
print(f"{response['total']} total pdl records exist matching this query")
else:
print("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
print("error:", response)
Using POST Requests
"I want to use POST requests instead of GET requests so that I can make queries with a lot of parameters."
Difference between GET and POST requests
See here for a comparison of the differences between GET and POST requests. The biggest difference is that POST requests don't have any limits on the amount of data that you can pass in the request.
import requests, json
API_KEY = # YOUR API KEY
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"location_country": "mexico"}},
{"term": {"job_title_role": "health"}},
{"exists": {"field": "phone_numbers"}}
]
}
}
}
PARAMS = {
'query': ES_QUERY, # This is different from using GET requests
'size': 10,
'pretty': True
}
response = requests.post( # Using POST method
PDL_URL,
headers=HEADERS,
json=PARAMS # Passing the data directly as a JSON object
# data=json.dumps(P) # This is an alternative way of passing data using a string
).json()
if response["status"] == 200:
data = response['data']
with open("my_pdl_search.jsonl", "w") as out:
for record in data:
out.write(json.dumps(record) + "\n")
print(f"successfully grabbed {len(data)} records from pdl")
print(f"{response['total']} total pdl records exist matching this query")
else:
print("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
print("Error:", response)
import requests, json
API_KEY = # YOUR API KEY
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
SQL_QUERY = \
"""
SELECT * FROM person
WHERE location_country='mexico'
AND job_title_role='health'
AND phone_numbers IS NOT NULL;
"""
PARAMS = {
'sql': SQL_QUERY, # This is different from using GET requests
'size': 10,
'pretty': True
}
response = requests.post( # Using POST method
PDL_URL,
headers=HEADERS,
json=PARAMS # Passing the data directly as a JSON object
# data=json.dumps(P) # This is an alternative way of passing data using a string
).json()
if response["status"] == 200:
data = response['data']
with open("my_pdl_search.jsonl", "w") as out:
for record in data:
out.write(json.dumps(record) + "\n")
print(f"successfully grabbed {len(data)} records from pdl")
print(f"{response['total']} total pdl records exist matching this query")
else:
print("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
print("Error:", response)
# Elasticsearch
curl -X POST 'https://api.peopledatalabs.com/v5/person/search' \
-H 'X-Api-Key: xxxx' \
--data-raw '{
"size": 10,
"query": {
"bool": {
"must": [
{"term": {"location_country": "mexico"}},
{"term": {"job_title_role": "health"}},
{"exists": {"field": "phone_numbers"}}
]
}
}
}'
# SQL
curl -X POST \
'https://api.peopledatalabs.com/v5/person/search' \
-H 'X-Api-Key: xxxx' \
--data-raw '{
"size": 10,
"sql": "SELECT * FROM person WHERE location_country='\''mexico'\'' AND job_title_role='\''health'\'' AND phone_numbers IS NOT NULL;"
}'
Searching Specific Datasets
"I want to run a simple query against PDL's phone dataset."
Maintaining Backwards Compatibility
The
dataset
parameter was introduced with the July 2021 release, which also changed the default dataset fromall
toresume
. For users that want to maintain the same performance in their queries prior to this change, set thedataset
parameter toall
as shown in the example below.
import json
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Create a client, specifying an API key
client = PDLPY(
api_key="YOUR API KEY",
)
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"location_country": "mexico"}},
{"term": {"job_title_role": "health"}}
]
}
}
}
PARAMS = {
'query': ES_QUERY,
'size': 10,
'pretty': True,
'dataset': "phone" # Use search against all PDL records with a phone number
}
response = client.person.search(**PARAMS).json()
if response["status"] == 200:
data = response['data']
with open("my_pdl_search.jsonl", "w") as out:
for record in data:
out.write(json.dumps(record) + "\n")
print(f"successfully grabbed {len(data)} records from pdl")
print(f"{response['total']} total pdl records exist matching this query")
else:
print("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
print("Error:", response)
import json
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Create a client, specifying an API key
client = PDLPY(
api_key="YOUR API KEY",
)
SQL_QUERY = \
"""
SELECT * FROM person
WHERE location_country='mexico'
AND job_title_role='health'
AND phone_numbers IS NOT NULL;
"""
PARAMS = {
'sql': SQL_QUERY,
'size': 10,
'pretty': True,
'dataset': "phone" # Use all to search against all PDL records with a phone number
}
response = client.person.search(**PARAMS).json()
if response["status"] == 200:
data = response['data']
with open("my_pdl_search.jsonl", "w") as out:
for record in data:
out.write(json.dumps(record) + "\n")
print(f"successfully grabbed {len(data)} records from pdl")
print(f"{response['total']} total pdl records exist matching this query")
else:
print("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
print("Error:", response)
# Elasticsearch
curl -X GET 'https://api.peopledatalabs.com/v5/person/search' \
-H 'X-Api-Key: xxxx' \
--data-raw '{
"size": 10,
"dataset": "all",
"query": {
"bool": {
"must": [
{"term": {"location_country": "mexico"}},
{"term": {"job_title_role": "health"}},
]
}
}
}'
# SQL
curl -X GET \
'https://api.peopledatalabs.com/v5/person/search' \
-H 'X-Api-Key: xxxx' \
--data-raw '{
"size": 10,
"dataset: "phone",
"sql": "SELECT * FROM person WHERE location_country='\''mexico'\'' AND job_title_role='\''health'\'' AND phone_numbers IS NOT NULL;"
}'
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
import fs from 'fs';
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
const esQuery = {
query: {
bool: {
must:[
{term: {location_country: "mexico"}},
{term: {job_title_role: "health"}},
]
}
}
}
const params = {
searchQuery: esQuery,
size: 10,
pretty: true,
dataset: "phone" // Use search against all PDL records with a phone number
}
PDLJSClient.person.search.elastic(params).then((data) => {
fs.writeFile("my_pdl_search.jsonl", Buffer.from(JSON.stringify(data.data)), (err) => {
if (err) throw err;
});
console.log(`successfully grabbed ${data.data.length} records from pdl`);
console.log(`${data["total"]} total pdl records exist matching this query`)
}).catch((error) => {
console.log("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
console.log(error);
});
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
import fs from 'fs';
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
const sqlQuery = `SELECT * FROM person
WHERE location_country='mexico'
AND job_title_role='health';`
const params = {
searchQuery: sqlQuery,
size: 10,
pretty: true,
dataset: "phone" // Use search against all PDL records with a phone number
}
PDLJSClient.person.search.sql(params).then((data) => {
fs.writeFile("my_pdl_search.jsonl", Buffer.from(JSON.stringify(data.data)), (err) => {
if (err) throw err;
});
console.log(`successfully grabbed ${data.data.length} records from pdl`);
console.log(`${data["total"]} total pdl records exist matching this query`)
}).catch((error) => {
console.log("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
console.log(error);
});
require 'json'
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
Peopledatalabs.api_key = 'YOUR API KEY'
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"location_country": "mexico"}},
{"term": {"job_title_role": "health"}}
]
}
}
}
response = Peopledatalabs::Search.people(searchType: 'elastic', query: ES_QUERY, size: 10, pretty: true, 'dataset': 'phone')
if response['status'] == 200
data = response['data']
File.open("my_pdl_search.jsonl", "w") do |out|
data.each { |record| out.write(JSON.dump(record) + "\n") }
end
puts "successfully grabbed #{data.length()} records from pdl"
puts "#{response['total']} total pdl records exist matching this query"
else
puts "NOTE. The carrier pigeons lost motivation in flight. See error and try again."
puts "Error: #{response}"
end
require 'json'
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
Peopledatalabs.api_key = 'YOUR API KEY'
SQL_QUERY = \
"""
SELECT * FROM person
WHERE location_country='mexico'
AND job_title_role='health'
AND phone_numbers IS NOT NULL;
"""
response = Peopledatalabs::Search.people(searchType: 'sql', query: SQL_QUERY, size: 10, pretty: true, 'dataset': 'phone')
if response['status'] == 200
data = response['data']
File.open("my_pdl_search.jsonl", "w") do |out|
data.each { |record| out.write(JSON.dump(record) + "\n") }
end
puts "successfully grabbed #{data.length()} records from pdl"
puts "#{response['total']} total pdl records exist matching this query"
else
puts "NOTE. The carrier pigeons lost motivation in flight. See error and try again."
puts "Error: #{response}"
end
package main
import (
"fmt"
"os"
"encoding/json"
)
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
apiKey := "YOUR API KEY"
// Set API KEY as env variable
// apiKey := os.Getenv("API_KEY")
client := pdl.New(apiKey)
elasticSearchQuery := map[string]interface{} {
"query": map[string]interface{} {
"bool": map[string]interface{} {
"must": []map[string]interface{} {
{"term": map[string]interface{}{"location_country": "mexico"}},
{"term": map[string]interface{}{"job_title_role": "health"}},
},
},
},
}
params := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: 10,
Pretty: true,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
Query: elasticSearchQuery,
Dataset: "phone", // Use search against all PDL records with a phone number
},
}
response, err := client.Person.Search(params)
if err == nil {
data := response.Data
out, outErr := os.Create("my_pdl_search.jsonl")
defer out.Close()
if (outErr == nil) {
for i := range data {
record, jsonErr := json.Marshal(data[i])
if (jsonErr == nil) {
out.WriteString(string(record) + "\n")
}
}
out.Sync()
}
fmt.Printf("successfully grabbed %d records from pdl\n", len(data))
fmt.Printf("%d total pdl records exist matching this query\n", response.Total)
} else {
fmt.Println("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
fmt.Println("Error:", err)
}
}
package main
import (
"fmt"
"os"
"encoding/json"
)
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
apiKey := "YOUR API KEY"
// Set API KEY as env variable
// apiKey := os.Getenv("API_KEY")
client := pdl.New(apiKey)
sqlQuery := "SELECT * FROM person" +
" WHERE location_country='mexico'" +
" AND job_title_role='health';"
params := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: 10,
Pretty: true,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
SQL: sqlQuery,
Dataset: "phone", // Use search against all PDL records with a phone number
},
}
response, err := client.Person.Search(params)
if err == nil {
data := response.Data
out, outErr := os.Create("my_pdl_search.jsonl")
defer out.Close()
if (outErr == nil) {
for i := range data {
record, jsonErr := json.Marshal(data[i])
if (jsonErr == nil) {
out.WriteString(string(record) + "\n")
}
}
out.Sync()
}
fmt.Printf("successfully grabbed %d records from pdl\n", len(data))
fmt.Printf("%d total pdl records exist matching this query\n", response.Total)
} else {
fmt.Println("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
fmt.Println("Error:", err)
}
}
import requests, json
API_KEY = # YOUR API KEY
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"location_country": "mexico"}},
{"term": {"job_title_role": "health"}}
]
}
}
}
PARAMS = {
'query': json.dumps(ES_QUERY),
'size': 10,
'pretty': True,
'dataset': "phone" # Use search against all PDL records with a phone number
}
response = requests.get(
PDL_URL,
headers=HEADERS,
params=PARAMS
).json()
if response["status"] == 200:
data = response['data']
with open("my_pdl_search.jsonl", "w") as out:
for record in data:
out.write(json.dumps(record) + "\n")
print(f"successfully grabbed {len(data)} records from pdl")
print(f"{response['total']} total pdl records exist matching this query")
else:
print("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
print("Error:", response)
import requests, json
API_KEY = # YOUR API KEY
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
SQL_QUERY = \
"""
SELECT * FROM person
WHERE location_country='mexico'
AND job_title_role='health'
AND phone_numbers IS NOT NULL;
"""
PARAMS = {
'sql': SQL_QUERY,
'size': 10,
'pretty': True,
'dataset': "phone" # Use all to search against all PDL records with a phone number
}
response = requests.get(
PDL_URL,
headers=HEADERS,
params=PARAMS
).json()
if response["status"] == 200:
data = response['data']
with open("my_pdl_search.jsonl", "w") as out:
for record in data:
out.write(json.dumps(record) + "\n")
print(f"successfully grabbed {len(data)} records from pdl")
print(f"{response['total']} total pdl records exist matching this query")
else:
print("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
print("error:", response)
Excluding Datasets
"I want to run a simple query against all PDL datasets except the email and phone datasets."
import json
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Create a client, specifying an API key
client = PDLPY(
api_key="YOUR API KEY",
)
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"location_country": "mexico"}},
{"term": {"job_title_role": "health"}}
]
}
}
}
PARAMS = {
'query': ES_QUERY,
'size': 10,
'pretty': True,
'dataset': "-email,phone" # Use search against all PDL datasets EXCEPT the email and phone slices
}
response = client.person.search(**PARAMS).json()
if response["status"] == 200:
data = response['data']
with open("my_pdl_search.jsonl", "w") as out:
for record in data:
out.write(json.dumps(record) + "\n")
print(f"successfully grabbed {len(data)} records from pdl")
print(f"{response['total']} total pdl records exist matching this query")
else:
print("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
print("Error:", response)
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
import fs from 'fs';
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
const esQuery = {
query: {
bool: {
must:[
{term: {location_country: "mexico"}},
{term: {job_title_role: "health"}},
]
}
}
}
const params = {
searchQuery: esQuery,
size: 10,
pretty: true,
dataset: "-email,phone" // Use search against all PDL datasets EXCEPT the email and phone slices
}
PDLJSClient.person.search.elastic(params).then((data) => {
fs.writeFile("my_pdl_search.jsonl", Buffer.from(JSON.stringify(data.data)), (err) => {
if (err) throw err;
});
console.log(`successfully grabbed ${data.data.length} records from pdl`);
console.log(`${data["total"]} total pdl records exist matching this query`)
}).catch((error) => {
console.log("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
console.log(error);
});
require 'json'
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
Peopledatalabs.api_key = 'YOUR API KEY'
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"location_country": "mexico"}},
{"term": {"job_title_role": "health"}}
]
}
}
}
response = Peopledatalabs::Search.people(searchType: 'elastic', query: ES_QUERY, size: 10, pretty: true, 'dataset': '-email,phone')
if response['status'] == 200
data = response['data']
File.open("my_pdl_search.jsonl", "w") do |out|
data.each { |record| out.write(JSON.dump(record) + "\n") }
end
puts "successfully grabbed #{data.length()} records from pdl"
puts "#{response['total']} total pdl records exist matching this query"
else
puts "NOTE. The carrier pigeons lost motivation in flight. See error and try again."
puts "Error: #{response}"
end
package main
import (
"fmt"
"os"
"encoding/json"
)
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
apiKey := "YOUR API KEY"
// Set API KEY as env variable
// apiKey := os.Getenv("API_KEY")
client := pdl.New(apiKey)
elasticSearchQuery := map[string]interface{} {
"query": map[string]interface{} {
"bool": map[string]interface{} {
"must": []map[string]interface{} {
{"term": map[string]interface{}{"location_country": "mexico"}},
{"term": map[string]interface{}{"job_title_role": "health"}},
},
},
},
}
params := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: 10,
Pretty: true,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
Query: elasticSearchQuery,
Dataset: "-email,phone", // Use search against all PDL records with a phone number
},
}
response, err := client.Person.Search(params)
if err == nil {
data := response.Data
out, outErr := os.Create("my_pdl_search.jsonl")
defer out.Close()
if (outErr == nil) {
for i := range data {
record, jsonErr := json.Marshal(data[i])
if (jsonErr == nil) {
out.WriteString(string(record) + "\n")
}
}
out.Sync()
}
fmt.Printf("successfully grabbed %d records from pdl\n", len(data))
fmt.Printf("%d total pdl records exist matching this query\n", response.Total)
} else {
fmt.Println("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
fmt.Println("Error:", err)
}
}
import requests, json
API_KEY = # YOUR API KEY
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"location_country": "mexico"}},
{"term": {"job_title_role": "health"}}
]
}
}
}
PARAMS = {
'query': json.dumps(ES_QUERY),
'size': 10,
'pretty': True,
'dataset': "-email,phone" # Use search against all PDL datasets EXCEPT the email and phone slices
}
response = requests.get(
PDL_URL,
headers=HEADERS,
params=PARAMS
).json()
if response["status"] == 200:
data = response['data']
with open("my_pdl_search.jsonl", "w") as out:
for record in data:
out.write(json.dumps(record) + "\n")
print(f"successfully grabbed {len(data)} records from pdl")
print(f"{response['total']} total pdl records exist matching this query")
else:
print("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
print("Error:", response)
Bulk Retrieval
"I want to pull all current employees at Amazon and save their profiles to a CSV file."
High Credit Usage Code Below
The code example below illustrates pulling all the employee profiles in a large company, and is meant primarily for demonstrating the use of the
scroll_token
parameter when retrieving large amounts of records. As a result this code mostly illustrative meaning it can use up a lot of credits, and doesn't have any error handling. TheMAX_NUM_RECORDS_LIMIT
parameter in the example below sets the maximum number of profiles (e.g. credits) that will be pulled, so please set that accordingly when testing this example.
import json, time, csv
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Limit the number of records to pull (to prevent accidentally using up
# more credits than expected when testing out this code).
MAX_NUM_RECORDS_LIMIT = 150 # The maximum number of records to retrieve
USE_MAX_NUM_RECORDS_LIMIT = True # Set to False to pull all available records
# Create a client, specifying an API key
client = PDLPY(
api_key="YOUR API KEY",
)
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"job_company_id": "amazon"}}
]
}
}
}
PARAMS = {
'query': ES_QUERY,
'size': 100,
'pretty': True
}
# Pull all results in multiple batches
batch = 1
all_records = []
start_time = time.time()
found_all_records = False
continue_scrolling = True
while continue_scrolling and not found_all_records:
# Check if we reached the maximum number of records we wanted to pull
if USE_MAX_NUM_RECORDS_LIMIT:
num_records_to_request = MAX_NUM_RECORDS_LIMIT - len(all_records)
PARAMS['size'] = max(0, min(100, num_records_to_request))
if num_records_to_request == 0:
print(f"Stopping - reached maximum number of records to pull "
f"[MAX_NUM_RECORDS_LIMIT = {MAX_NUM_RECORDS_LIMIT}]")
break
# Send Response
response = client.person.search(**PARAMS).json()
# Check response status code:
if response['status'] == 200:
all_records.extend(response['data'])
print(f"Retrieved {len(response['data'])} records in batch {batch} "
f"- {response['total'] - len(all_records)} records remaining")
else:
print(f"Error retrieving some records:\n\t"
f"[{response['status']} - {response['error']['type']}] "
f"{response['error']['message']}")
# Get scroll_token from response
if 'scroll_token' in response:
PARAMS['scroll_token'] = response['scroll_token']
else:
continue_scrolling = False
print(f"Unable to continue scrolling")
batch += 1
found_all_records = (len(all_records) == response['total'])
time.sleep(6) # avoid hitting rate limit thresholds
end_time = time.time()
runtime = end_time - start_time
print(f"Successfully recovered {len(all_records)} profiles in "
f"{batch} batches [{round(runtime, 2)} seconds]")
# Save profiles to csv (utility function)
def save_profiles_to_csv(profiles, filename, fields=[], delim=','):
# Define header fields
if fields == [] and len(profiles) > 0:
fields = profiles[0].keys()
# Write csv file
with open(filename, 'w') as csvfile:
writer = csv.writer(csvfile, delimiter=delim)
# Write Header:
writer.writerow(fields)
# Write Body:
count = 0
for profile in profiles:
writer.writerow([ profile[field] for field in fields ])
count += 1
print(f"Wrote {count} lines to: '{filename}'")
# Use utility function to save profiles to csv
csv_header_fields = ['work_email', 'full_name', "linkedin_url",
'job_title', 'job_company_name']
csv_filename = "all_employee_profiles.csv"
save_profiles_to_csv(all_records, csv_filename, csv_header_fields)
import json, time, csv
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Limit the number of records to pull (to prevent accidentally using up
# more credits than expected when testing out this code).
MAX_NUM_RECORDS_LIMIT = 150 # The maximum number of records to retrieve
USE_MAX_NUM_RECORDS_LIMIT = True # Set to False to pull all available records
# Create a client, specifying an API key
client = PDLPY(
api_key="YOUR API KEY",
)
SQL_QUERY = \
"""
SELECT * FROM person
WHERE job_company_id='amazon';
"""
PARAMS = {
'sql': SQL_QUERY,
'size': 100,
'pretty': True
}
# Pull all results in multiple batches
batch = 1
all_records = []
start_time = time.time()
found_all_records = False
continue_scrolling = True
while continue_scrolling and not found_all_records:
# Check if we reached the maximum number of records we wanted to pull
if USE_MAX_NUM_RECORDS_LIMIT:
num_records_to_request = MAX_NUM_RECORDS_LIMIT - len(all_records)
PARAMS['size'] = max(0, min(100, num_records_to_request))
if num_records_to_request == 0:
print(f"Stopping - reached maximum number of records to pull "
f"[MAX_NUM_RECORDS_LIMIT = {MAX_NUM_RECORDS_LIMIT}]")
break
# Send Response
response = client.person.search(**PARAMS).json()
# Check response status code:
if response['status'] == 200:
all_records.extend(response['data'])
print(f"Retrieved {len(response['data'])} records in batch {batch} "
f"- {response['total'] - len(all_records)} records remaining")
else:
print(f"Error retrieving some records:\n\t"
f"[{response['status']} - {response['error']['type']}] "
f"{response['error']['message']}")
# Get scroll_token from response
if 'scroll_token' in response:
PARAMS['scroll_token'] = response['scroll_token']
else:
continue_scrolling = False
print(f"Unable to continue scrolling")
batch += 1
found_all_records = (len(all_records) == response['total'])
time.sleep(6) # avoid hitting rate limit thresholds
end_time = time.time()
runtime = end_time - start_time
print(f"Successfully recovered {len(all_records)} profiles in "
f"{batch} batches [{round(runtime, 2)} seconds]")
# Save profiles to csv (utility function)
def save_profiles_to_csv(profiles, filename, fields=[], delim=','):
# Define header fields
if fields == [] and len(profiles) > 0:
fields = profiles[0].keys()
# Write csv file
with open(filename, 'w') as csvfile:
writer = csv.writer(csvfile, delimiter=delim)
# Write Header:
writer.writerow(fields)
# Write Body:
count = 0
for profile in profiles:
writer.writerow([ profile[field] for field in fields ])
count += 1
print(f"Wrote {count} lines to: '{filename}'")
# Use utility function to save profiles to csv
csv_header_fields = ['work_email', 'full_name', "linkedin_url",
'job_title', 'job_company_name']
csv_filename = "all_employee_profiles.csv"
save_profiles_to_csv(all_records, csv_filename, csv_header_fields)
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
// See https://www.npmjs.com/package/csv-writer
import * as csvwriter from 'csv-writer';
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
// Limit the number of records to pull (to prevent accidentally using up
// more credits than expected when testing out this code).
const maxNumRecordsLimit = 150; // The maximum number of records to retrieve
const useMaxNumRecordsLimit = true; // Set to false to pull all available records
const esQuery = {
query: {
bool: {
must:[
{term: {job_company_id: "amazon"}},
]
}
}
}
var params = {
searchQuery: esQuery,
size: 100,
scroll_token: null,
pretty: true
}
// Pull all results in multiple batches
var batch = 1;
var allRecords = [];
var startTime = Date.now();
var foundAllRecords = false;
var continueScrolling = true;
var numRetrieved = 0;
var paramQueue = [];
var scrollToken = null;
var numRecordsToRequest = 100;
while (numRecordsToRequest > 0) {
// Check if we reached the maximum number of records we wanted to pull
if (useMaxNumRecordsLimit) {
numRecordsToRequest = maxNumRecordsLimit - numRetrieved;
params.size = Math.max(0, Math.min(100, numRecordsToRequest));
numRetrieved += params.size;
// Add batch to the parameter queue
if (params.size > 0) {
paramQueue.push(JSON.parse(JSON.stringify(params)));
}
} else {
break;
}
}
// Run initial batch
runBatch();
function runBatch() {
// Get the parameters for the batch
let currParams = useMaxNumRecordsLimit ? paramQueue[batch-1] : params;
// Set the scroll_token from the previous batch
currParams.scroll_token = scrollToken;
batch++;
PDLJSClient.person.search.elastic(currParams).then((data) => {
Array.prototype.push.apply(allRecords, data.data);
// Get the scroll_token
if (data['scroll_token']) {
scrollToken = data['scroll_token'];
} else {
continueScrolling = false;
console.log("Unable to continue scrolling");
}
foundAllRecords = (allRecords.length == data['total']);
console.log(`Retrieved ${data.data.length} records in batch ${(batch-1)}` +
` - ${(data['total'] - allRecords.length)} records remaining`);
// Run next batch, if any
if (!foundAllRecords && (batch <= paramQueue.length || !useMaxNumRecordsLimit)) {
runBatch();
} else {
console.log(`Stopping - reached maximum number of records to pull [maxNumRecordsLimit = ` +
`${maxNumRecordsLimit}]`);
let endTime = Date.now();
let runTime = endTime - startTime;
console.log (`Successfully recovered ${allRecords.length} profiles in ` +
`${(batch-1)} batches [${Math.round(runTime/1000)} seconds]`);
// Output profiles to CSV
let csvHeaderFields = [
{id: "work_email", title: "work_email"},
{id: "full_name", title: "full_name"},
{id: "linkedin_url", title: "linkedin_url"},
{id: "job_title", title: "job_title"},
{id: "job_company_name", title: "job_company_name"}
];
let csvFilename = "all_employee_profiles.csv";
saveProfilesToCSV(allRecords, csvFilename, csvHeaderFields);
}
}).catch((error) => {
console.log(error);
});
}
// Write out CSV file using csv-writer (https://www.npmjs.com/package/csv-writer)
// $ npm i -s csv-writer
function saveProfilesToCSV(profiles, filename, fields) {
const createCsvWriter = csvwriter.createObjectCsvWriter;
const csvWriter = createCsvWriter({
path: filename,
header: fields
});
let data = [];
for (let i = 0; i < profiles.length; i++) {
let record = profiles[i];
data[i] = {};
for (let field in fields) {
data[i][fields[field].id] = record[fields[field].id];
}
}
csvWriter
.writeRecords(data)
.then(()=> console.log('The CSV file was written successfully'));
}
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
// See https://www.npmjs.com/package/csv-writer
import * as csvwriter from 'csv-writer';
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
// Limit the number of records to pull (to prevent accidentally using up
// more credits than expected when testing out this code).
const maxNumRecordsLimit = 150; // The maximum number of records to retrieve
const useMaxNumRecordsLimit = true; // Set to false to pull all available records
const sqlQuery = `SELECT * FROM person
WHERE job_company_id='amazon';`;
var params = {
searchQuery: sqlQuery,
size: 100,
scroll_token: null,
pretty: true
}
// Pull all results in multiple batches
var batch = 1;
var allRecords = [];
var startTime = Date.now();
var foundAllRecords = false;
var continueScrolling = true;
var numRetrieved = 0;
var paramQueue = [];
var scrollToken = null;
var numRecordsToRequest = 100;
while (numRecordsToRequest > 0) {
// Check if we reached the maximum number of records we wanted to pull
if (useMaxNumRecordsLimit) {
numRecordsToRequest = maxNumRecordsLimit - numRetrieved;
params.size = Math.max(0, Math.min(100, numRecordsToRequest));
numRetrieved += params.size;
// Add batch to the parameter queue
if (params.size > 0) {
paramQueue.push(JSON.parse(JSON.stringify(params)));
}
} else {
break;
}
}
// Run initial batch
runBatch();
function runBatch() {
// Get the parameters for the batch
let currParams = useMaxNumRecordsLimit ? paramQueue[batch-1] : params;
// Set the scroll_token from the previous batch
currParams.scroll_token = scrollToken;
batch++;
PDLJSClient.person.search.sql(currParams).then((data) => {
Array.prototype.push.apply(allRecords, data.data);
// Get the scroll_token
if (data['scroll_token']) {
scrollToken = data['scroll_token'];
} else {
continueScrolling = false;
console.log("Unable to continue scrolling");
}
foundAllRecords = (allRecords.length == data['total']);
console.log(`Retrieved ${data.data.length} records in batch ${(batch-1)}` +
` - ${(data['total'] - allRecords.length)} records remaining`);
// Run next batch, if any
if (!foundAllRecords && (batch <= paramQueue.length || !useMaxNumRecordsLimit)) {
runBatch();
} else {
console.log(`Stopping - reached maximum number of records to pull [maxNumRecordsLimit = ` +
`${maxNumRecordsLimit}]`);
let endTime = Date.now();
let runTime = endTime - startTime;
console.log (`Successfully recovered ${allRecords.length} profiles in ` +
`${(batch-1)} batches [${Math.round(runTime/1000)} seconds]`);
// Output profiles to CSV
let csvHeaderFields = [
{id: "work_email", title: "work_email"},
{id: "full_name", title: "full_name"},
{id: "linkedin_url", title: "linkedin_url"},
{id: "job_title", title: "job_title"},
{id: "job_company_name", title: "job_company_name"}
];
let csvFilename = "all_employee_profiles.csv";
saveProfilesToCSV(allRecords, csvFilename, csvHeaderFields);
}
}).catch((error) => {
console.log(error);
});
}
// Write out CSV file using csv-writer (https://www.npmjs.com/package/csv-writer)
// $ npm i -s csv-writer
function saveProfilesToCSV(profiles, filename, fields) {
const createCsvWriter = csvwriter.createObjectCsvWriter;
const csvWriter = createCsvWriter({
path: filename,
header: fields
});
let data = [];
for (let i = 0; i < profiles.length; i++) {
let record = profiles[i];
data[i] = {};
for (let field in fields) {
data[i][fields[field].id] = record[fields[field].id];
}
}
csvWriter
.writeRecords(data)
.then(()=> console.log('The CSV file was written successfully'));
}
require 'json'
require 'csv'
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
Peopledatalabs.api_key = 'YOUR API KEY'
# Limit the number of records to pull (to prevent accidentally using up
# more credits than expected when testing out this code).
MAX_NUM_RECORDS_LIMIT = 150 # The maximum number of records to retrieve
USE_MAX_NUM_RECORDS_LIMIT = true # Set to false to pull all available records
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"job_company_id": "amazon"}}
]
}
}
}
# Pull all results in multiple batches
batch = 1
all_records = []
start_time = Time.now
found_all_records = false
continue_scrolling = true
scroll_token = {}
while continue_scrolling && !found_all_records do
# Check if we reached the maximum number of records we wanted to pull
if USE_MAX_NUM_RECORDS_LIMIT
num_records_to_request = MAX_NUM_RECORDS_LIMIT - all_records.length()
size = [0, [100, num_records_to_request].min].max
if num_records_to_request == 0
puts "Stopping - reached maximum number of records to pull "
puts "[MAX_NUM_RECORDS_LIMIT = #{MAX_NUM_RECORDS_LIMIT}]"
break
end
end
# Send Response
response = Peopledatalabs::Search.people(searchType: 'elastic', query: ES_QUERY, size: size, scroll_token: scroll_token, pretty: true)
# Check response status code:
if response['status'] == 200
all_records += response['data']
puts "Retrieved #{response['data'].length()} records in batch #{batch} "
puts "- #{response['total'] - all_records.length()} records remaining"
else
puts "Error retrieving some records:\n\t"
puts "[#{response['status']} - #{response['error']['type']}] "
puts response['error']['message']
end
# Get scroll_token from response
if response.key?('scroll_token')
scroll_token = response['scroll_token']
else
continue_scrolling = false
puts "Unable to continue scrolling"
end
batch += 1
found_all_records = (all_records.length() == response['total'])
sleep(6) # avoid hitting rate limit thresholds
end
end_time = Time.now
runtime = end_time - start_time
puts "Successfully recovered #{all_records.length()} profiles in "
puts "#{batch} batches [#{runtime.round(2)} seconds]"
# Save profiles to csv (utility function)
def save_profiles_to_csv(profiles, filename, fields=[], delim=',')
# Define header fields
if fields == [] && profiles.length() > 0
fields = profiles[0].keys
end
count = 0
# Write csv file
CSV.open(filename, 'w') do |writer|
# Write Header:
writer << fields
# Write Body:
profiles.each do |profile|
record = []
fields.each do |field|
record << profile[field]
count += 1
end
writer << record
end
end
puts "Wrote #{count} lines to: '#{filename}'"
end
# Use utility function to save profiles to csv
csv_header_fields = ['work_email', 'full_name', "linkedin_url",
'job_title', 'job_company_name']
csv_filename = "all_company_profiles.csv"
save_profiles_to_csv(all_records, csv_filename, csv_header_fields)
require 'json'
require 'csv'
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
Peopledatalabs.api_key = 'YOUR API KEY'
# Limit the number of records to pull (to prevent accidentally using up
# more credits than expected when testing out this code).
MAX_NUM_RECORDS_LIMIT = 150 # The maximum number of records to retrieve
USE_MAX_NUM_RECORDS_LIMIT = true # Set to false to pull all available records
SQL_QUERY = \
"""
SELECT * FROM person
WHERE job_company_id='amazon';
"""
# Pull all results in multiple batches
batch = 1
all_records = []
start_time = Time.now
found_all_records = false
continue_scrolling = true
scroll_token = {}
while continue_scrolling && !found_all_records do
# Check if we reached the maximum number of records we wanted to pull
if USE_MAX_NUM_RECORDS_LIMIT
num_records_to_request = MAX_NUM_RECORDS_LIMIT - all_records.length()
size = [0, [100, num_records_to_request].min].max
if num_records_to_request == 0
puts "Stopping - reached maximum number of records to pull "
puts "[MAX_NUM_RECORDS_LIMIT = #{MAX_NUM_RECORDS_LIMIT}]"
break
end
end
# Send Response
response = Peopledatalabs::Search.people(searchType: 'sql', query: SQL_QUERY, size: size, scroll_token: scroll_token, pretty: true)
# Check response status code:
if response['status'] == 200
all_records += response['data']
puts "Retrieved #{response['data'].length()} records in batch #{batch} "
puts "- #{response['total'] - all_records.length()} records remaining"
else
puts "Error retrieving some records:\n\t"
puts "[#{response['status']} - #{response['error']['type']}] "
puts response['error']['message']
end
# Get scroll_token from response
if response.key?('scroll_token')
scroll_token = response['scroll_token']
else
continue_scrolling = false
puts "Unable to continue scrolling"
end
batch += 1
found_all_records = (all_records.length() == response['total'])
sleep(6) # avoid hitting rate limit thresholds
end
end_time = Time.now
runtime = end_time - start_time
puts "Successfully recovered #{all_records.length()} profiles in "
puts "#{batch} batches [#{runtime.round(2)} seconds]"
# Save profiles to csv (utility function)
def save_profiles_to_csv(profiles, filename, fields=[], delim=',')
# Define header fields
if fields == [] && profiles.length() > 0
fields = profiles[0].keys
end
count = 0
# Write csv file
CSV.open(filename, 'w') do |writer|
# Write Header:
writer << fields
# Write Body:
profiles.each do |profile|
record = []
fields.each do |field|
record << profile[field]
count += 1
end
writer << record
end
end
puts "Wrote #{count} lines to: '#{filename}'"
end
# Use utility function to save profiles to csv
csv_header_fields = ['work_email', 'full_name', "linkedin_url",
'job_title', 'job_company_name']
csv_filename = "all_company_profiles.csv"
save_profiles_to_csv(all_records, csv_filename, csv_header_fields)
package main
import (
"fmt"
"time"
"os"
"math"
"reflect"
"encoding/json"
"encoding/csv"
)
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
apiKey := "YOUR API KEY"
// Set API KEY as env variable
// apiKey := os.Getenv("API_KEY")
client := pdl.New(apiKey)
// Limit the number of records to pull (to prevent accidentally using up
// more credits than expected when testing out this code).
const maxNumRecordsLimit = 150 // The maximum number of records to retrieve
const useMaxNumRecordsLimit = true // Set to False to pull all available records
elasticSearchQuery := map[string]interface{} {
"query": map[string]interface{} {
"bool": map[string]interface{} {
"must": []map[string]interface{} {
{"term": map[string]interface{}{"job_company_id": "amazon"}},
},
},
},
}
p := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: 100,
Pretty: true,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
Query: elasticSearchQuery,
},
}
// Pull all results in multiple batches
batch := 1
var allRecords []pdlmodel.Person
startTime := time.Now()
foundAllRecords := false
continueScrolling := true
var numRecordsToRequest int
for continueScrolling && !foundAllRecords {
// Check if we reached the maximum number of records we wanted to pull
if useMaxNumRecordsLimit {
numRecordsToRequest = maxNumRecordsLimit - len(allRecords)
p.BaseParams.Size = (int) (math.Max(0.0, math.Min(100.0, (float64) (numRecordsToRequest))))
if numRecordsToRequest == 0 {
fmt.Printf("Stopping - reached maximum number of records to pull " +
"[MAX_NUM_RECORDS_LIMIT = %d]\n", maxNumRecordsLimit)
break
}
}
// Send Response
response, err := client.Person.Search(p)
// Check response status code:
if err == nil {
fmt.Printf("Retrieved %d records in batch %d - %d records remaining\n",
len(response.Data), batch, response.Total - len(allRecords))
} else {
fmt.Println("Error retrieving some records:\n\t",
err)
}
// Get scroll_token from response
var data map[string]interface{}
jsonResponse, jsonErr := json.Marshal(response)
if jsonErr == nil {
json.Unmarshal(jsonResponse, &data)
if scrollToken, ok := data["scroll_token"]; ok {
p.SearchBaseParams.ScrollToken = fmt.Sprintf("%v", scrollToken)
} else {
continueScrolling = false
fmt.Println("Unable to continue scrolling")
}
allRecords = append(allRecords, response.Data...)
}
batch++
foundAllRecords = (len(allRecords) == response.Total)
time.Sleep(6 * time.Second) // avoid hitting rate limit thresholds
}
endTime := time.Now()
runtime := endTime.Sub(startTime).Seconds()
fmt.Printf("Successfully recovered %d profiles in %d batches [%d seconds]\n",
len(allRecords), batch, (int) (math.Round((float64) (runtime))))
// Use utility function to save profiles to csv
csvHeaderFields := []string{"work_email", "full_name", "linkedin_url",
"job_title", "job_company_name"}
csvFilename := "all_employee_profiles.csv"
saveProfilesToCsv(allRecords, csvFilename, csvHeaderFields, ",")
}
// Save profiles to csv (utility function)
func saveProfilesToCsv(profiles []pdlmodel.Person, filename string, fields []string, delim string) {
// Define header fields
if fields == nil && len(profiles) > 0 {
e := reflect.ValueOf(&(profiles[0])).Elem()
for i := 0; i < e.NumField(); i++ {
fields = append(fields, e.Type().Field(i).Name)
}
}
// Write csv file
csvFile, err := os.Create(filename)
if err == nil {
csvwriter := csv.NewWriter(csvFile)
defer csvwriter.Flush()
// Write Header
csvwriter.Write(fields)
// Write Body:
count := 0
for i := range profiles {
var data map[string]interface{}
jsonResponse, jsonErr := json.Marshal(profiles[i])
if jsonErr == nil {
json.Unmarshal(jsonResponse, &data)
var record []string
for j := range fields {
record = append(record, fmt.Sprintf("%v", data[fields[j]]))
}
csvwriter.Write(record)
count++
}
}
fmt.Printf("Wrote %d lines to: %s\n", count, filename)
}
}
package main
import (
"fmt"
"time"
"os"
"math"
"reflect"
"encoding/json"
"encoding/csv"
)
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
apiKey := "YOUR API KEY"
// Set API KEY as env variable
// apiKey := os.Getenv("API_KEY")
client := pdl.New(apiKey)
// Limit the number of records to pull (to prevent accidentally using up
// more credits than expected when testing out this code).
const maxNumRecordsLimit = 150 // The maximum number of records to retrieve
const useMaxNumRecordsLimit = true // Set to False to pull all available records
sqlQuery := "SELECT * FROM person" +
" WHERE job_company_id='amazon'"
p := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: 100,
Pretty: true,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
SQL: sqlQuery,
},
}
// Pull all results in multiple batches
batch := 1
var allRecords []pdlmodel.Person
startTime := time.Now()
foundAllRecords := false
continueScrolling := true
var numRecordsToRequest int
for continueScrolling && !foundAllRecords {
// Check if we reached the maximum number of records we wanted to pull
if useMaxNumRecordsLimit {
numRecordsToRequest = maxNumRecordsLimit - len(allRecords)
p.BaseParams.Size = (int) (math.Max(0.0, math.Min(100.0, (float64) (numRecordsToRequest))))
if numRecordsToRequest == 0 {
fmt.Printf("Stopping - reached maximum number of records to pull " +
"[MAX_NUM_RECORDS_LIMIT = %d]\n", maxNumRecordsLimit)
break
}
}
// Send Response
response, err := client.Person.Search(p)
// Check response status code:
if err == nil {
fmt.Printf("Retrieved %d records in batch %d - %d records remaining\n",
len(response.Data), batch, response.Total - len(allRecords))
} else {
fmt.Println("Error retrieving some records:\n\t",
err)
}
// Get scroll_token from response
var data map[string]interface{}
jsonResponse, jsonErr := json.Marshal(response)
if jsonErr == nil {
json.Unmarshal(jsonResponse, &data)
if scrollToken, ok := data["scroll_token"]; ok {
p.SearchBaseParams.ScrollToken = fmt.Sprintf("%v", scrollToken)
} else {
continueScrolling = false
fmt.Println("Unable to continue scrolling")
}
allRecords = append(allRecords, response.Data...)
}
batch++
foundAllRecords = (len(allRecords) == response.Total)
time.Sleep(6 * time.Second) // avoid hitting rate limit thresholds
}
endTime := time.Now()
runtime := endTime.Sub(startTime).Seconds()
fmt.Printf("Successfully recovered %d profiles in %d batches [%d seconds]\n",
len(allRecords), batch, (int) (math.Round((float64) (runtime))))
// Use utility function to save profiles to csv
csvHeaderFields := []string{"work_email", "full_name", "linkedin_url",
"job_title", "job_company_name"}
csvFilename := "all_employee_profiles.csv"
saveProfilesToCsv(allRecords, csvFilename, csvHeaderFields, ",")
}
// Save profiles to csv (utility function)
func saveProfilesToCsv(profiles []pdlmodel.Person, filename string, fields []string, delim string) {
// Define header fields
if fields == nil && len(profiles) > 0 {
e := reflect.ValueOf(&(profiles[0])).Elem()
for i := 0; i < e.NumField(); i++ {
fields = append(fields, e.Type().Field(i).Name)
}
}
// Write csv file
csvFile, err := os.Create(filename)
if err == nil {
csvwriter := csv.NewWriter(csvFile)
defer csvwriter.Flush()
// Write Header
csvwriter.Write(fields)
// Write Body:
count := 0
for i := range profiles {
var data map[string]interface{}
jsonResponse, jsonErr := json.Marshal(profiles[i])
if jsonErr == nil {
json.Unmarshal(jsonResponse, &data)
var record []string
for j := range fields {
record = append(record, fmt.Sprintf("%v", data[fields[j]]))
}
csvwriter.Write(record)
count++
}
}
fmt.Printf("Wrote %d lines to: %s\n", count, filename)
}
}
import requests, json, time, csv
API_KEY = # ENTER YOUR API KEY
# Limit the number of records to pull (to prevent accidentally using up
# more credits than expected when testing out this code).
MAX_NUM_RECORDS_LIMIT = 150 # The maximum number of records to retrieve
USE_MAX_NUM_RECORDS_LIMIT = True # Set to False to pull all available records
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
SQL_QUERY = \
"""
SELECT * FROM person
WHERE job_company_id='amazon';
"""
PARAMS = {
'sql': SQL_QUERY,
'size': 100,
'pretty': True
}
# Pull all results in multiple batches
batch = 1
all_records = []
start_time = time.time()
found_all_records = False
continue_scrolling = True
while continue_scrolling and not found_all_records:
# Check if we reached the maximum number of records we wanted to pull
if USE_MAX_NUM_RECORDS_LIMIT:
num_records_to_request = MAX_NUM_RECORDS_LIMIT - len(all_records)
PARAMS['size'] = max(0, min(100, num_records_to_request))
if num_records_to_request == 0:
print(f"Stopping - reached maximum number of records to pull "
f"[MAX_NUM_RECORDS_LIMIT = {MAX_NUM_RECORDS_LIMIT}]")
break
# Send Response
response = requests.get(
PDL_URL,
headers=HEADERS,
params=PARAMS
).json()
# Check response status code:
if response['status'] == 200:
all_records.extend(response['data'])
print(f"Retrieved {len(response['data'])} records in batch {batch} "
f"- {response['total'] - len(all_records)} records remaining")
else:
print(f"Error retrieving some records:\n\t"
f"[{response['status']} - {response['error']['type']}] "
f"{response['error']['message']}")
# Get scroll_token from response
if 'scroll_token' in response:
PARAMS['scroll_token'] = response['scroll_token']
else:
continue_scrolling = False
print(f"Unable to continue scrolling")
batch += 1
found_all_records = (len(all_records) == response['total'])
time.sleep(6) # avoid hitting rate limit thresholds
end_time = time.time()
runtime = end_time - start_time
print(f"Successfully recovered {len(all_records)} profiles in "
f"{batch} batches [{round(runtime, 2)} seconds]")
# Save profiles to csv (utility function)
def save_profiles_to_csv(profiles, filename, fields=[], delim=','):
# Define header fields
if fields == [] and len(profiles) > 0:
fields = profiles[0].keys()
# Write csv file
with open(filename, 'w') as csvfile:
writer = csv.writer(csvfile, delimiter=delim)
# Write Header:
writer.writerow(fields)
# Write Body:
count = 0
for profile in profiles:
writer.writerow([ profile[field] for field in fields ])
count += 1
print(f"Wrote {count} lines to: '{filename}'")
# Use utility function to save profiles to csv
csv_header_fields = ['work_email', 'full_name', "linkedin_url",
'job_title', 'job_company_name']
csv_filename = "all_employee_profiles.csv"
save_profiles_to_csv(all_records, csv_filename, csv_header_fields)
import requests, json, time, csv
API_KEY = # ENTER YOUR API KEY
# Limit the number of records to pull (to prevent accidentally using up
# more credits than expected when testing out this code).
MAX_NUM_RECORDS_LIMIT = 150 # The maximum number of records to retrieve
USE_MAX_NUM_RECORDS_LIMIT = True # Set to False to pull all available records
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"job_company_id": "amazon"}}
]
}
}
}
PARAMS = {
'query': json.dumps(ES_QUERY),
'size': 100,
'pretty': True
}
# Pull all results in multiple batches
batch = 1
all_records = []
start_time = time.time()
found_all_records = False
continue_scrolling = True
while continue_scrolling and not found_all_records:
# Check if we reached the maximum number of records we wanted to pull
if USE_MAX_NUM_RECORDS_LIMIT:
num_records_to_request = MAX_NUM_RECORDS_LIMIT - len(all_records)
PARAMS['size'] = max(0, min(100, num_records_to_request))
if num_records_to_request == 0:
print(f"Stopping - reached maximum number of records to pull "
f"[MAX_NUM_RECORDS_LIMIT = {MAX_NUM_RECORDS_LIMIT}]")
break
# Send Response
response = requests.get(
PDL_URL,
headers=HEADERS,
params=PARAMS
).json()
# Check response status code:
if response['status'] == 200:
all_records.extend(response['data'])
print(f"Retrieved {len(response['data'])} records in batch {batch} "
f"- {response['total'] - len(all_records)} records remaining")
else:
print(f"Error retrieving some records:\n\t"
f"[{response['status']} - {response['error']['type']}] "
f"{response['error']['message']}")
# Get scroll_token from response
if 'scroll_token' in response:
PARAMS['scroll_token'] = response['scroll_token']
else:
continue_scrolling = False
print(f"Unable to continue scrolling")
batch += 1
found_all_records = (len(all_records) == response['total'])
time.sleep(6) # avoid hitting rate limit thresholds
end_time = time.time()
runtime = end_time - start_time
print(f"Successfully recovered {len(all_records)} profiles in "
f"{batch} batches [{round(runtime, 2)} seconds]")
# Save profiles to csv (utility function)
def save_profiles_to_csv(profiles, filename, fields=[], delim=','):
# Define header fields
if fields == [] and len(profiles) > 0:
fields = profiles[0].keys()
# Write csv file
with open(filename, 'w') as csvfile:
writer = csv.writer(csvfile, delimiter=delim)
# Write Header:
writer.writerow(fields)
# Write Body:
count = 0
for profile in profiles:
writer.writerow([ profile[field] for field in fields ])
count += 1
print(f"Wrote {count} lines to: '{filename}'")
# Use utility function to save profiles to csv
csv_header_fields = ['work_email', 'full_name', "linkedin_url",
'job_title', 'job_company_name']
csv_filename = "all_employee_profiles.csv"
save_profiles_to_csv(all_records, csv_filename, csv_header_fields)
Sales Prospecting
"I want to email engineering leaders at stripe.com, plaid.com, xignite.com and square.com, to reach out to them about my product."
import json
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
DESIRED_COMPANY_DOMAINS = [
'stripe.com', 'plaid.com', 'xignite.com', 'square.com'
]
# Create a client, specifying an API key
client = PDLPY(
api_key="YOUR API KEY",
)
ES_QUERY = {
"query": {
"bool": {
"must": [
{"terms": {"job_company_website": DESIRED_COMPANY_DOMAINS}},
{"term": {"job_title_role": "engineering"}},
{"terms": {"job_title_levels": ["vp", "director", "manager"]}},
{"exists": {"field": "work_email"}}
]
}
}
}
PARAMS = {
'query': ES_QUERY,
'size': 100
}
response = client.person.search(**PARAMS).json()
if response["status"] == 200:
for record in response['data']:
# bring in leads and make $$$
print(
record['work_email'],
record['full_name'],
record['job_title'],
record['job_company_name']
)
print(f"successfully grabbed {len(response['data'])} records from pdl")
print(f"{response['total']} total pdl records exist matching this query")
else:
print("NOTE. The eager beaver was not so eager. See error and try again.")
print("error:", response)
import json
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
DESIRED_COMPANY_DOMAINS = [
'stripe.com', 'plaid.com', 'xignite.com', 'square.com'
]
COMPANY_DOMAINS_STRING_REP = ", ".join(
(f"'{site}'" for site in DESIRED_COMPANY_DOMAINS)
)
# Create a client, specifying an API key
client = PDLPY(
api_key="YOUR API KEY",
)
SQL_QUERY = \
f"""
SELECT * FROM person
WHERE job_company_website IN ({COMPANY_DOMAINS_STRING_REP})
AND job_title_role='engineering'
AND job_title_levels IN ('vp', 'director', 'manager')
AND work_email IS NOT NULL;
"""
PARAMS = {
'sql': SQL_QUERY,
'size': 100
}
response = client.person.search(**PARAMS).json()
if response["status"] == 200:
for record in response['data']:
# bring in leads and make $$$
print(
record['work_email'],
record['full_name'],
record['job_title'],
record['job_company_name']
)
print(f"successfully grabbed {len(response['data'])} records from pdl")
print(f"{response['total']} total pdl records exist matching this query")
else:
print("NOTE. The eager beaver was not so eager. See error and try again.")
print("error:", response)
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
const desiredCompanyDomains = [
"stripe.com", "plaid.com", "xignite.com", "square.com"
];
const esQuery = {
query: {
bool: {
must:[
{terms: {job_company_website: desiredCompanyDomains}},
{term: {job_title_role: "engineering"}},
{terms: {job_title_levels: ["vp", "director", "manager"]}},
{exists: {field: "work_email"}}
]
}
}
}
const params = {
searchQuery: esQuery,
size: 100
}
PDLJSClient.person.search.elastic(params).then((data) => {
var record
for (let response in data.data) {
record = data.data[response]
// bring in leads and make $$$
console.log(
record["work_email"],
record["full_name"],
record["job_title"],
record["job_company_name"],
)
}
console.log(`successfully grabbed ${data.data.length} records from pdl`);
console.log(`${data["total"]} total pdl records exist matching this query`)
}).catch((error) => {
console.log("NOTE. The eager beaver was not so eager. See error and try again.")
console.log(error);
});
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
const desiredCompanyDomains = [
"stripe.com", "plaid.com", "xignite.com", "square.com"
];
var companyStringRep = "'" + desiredCompanyDomains.join("', '") + "'";
const sqlQuery = `SELECT * FROM person
WHERE job_company_website IN (${companyStringRep})
AND job_title_role='engineering'
AND job_title_levels IN ('vp', 'director', 'manager')
AND work_email IS NOT NULL;`
const params = {
searchQuery: sqlQuery,
size: 100
}
PDLJSClient.person.search.sql(params).then((data) => {
var record
for (let response in data.data) {
record = data.data[response]
// bring in leads and make $$$
console.log(
record["work_email"],
record["full_name"],
record["job_title"],
record["job_company_name"],
)
}
console.log(`successfully grabbed ${data.data.length} records from pdl`);
console.log(`${data["total"]} total pdl records exist matching this query`)
}).catch((error) => {
console.log("NOTE. The eager beaver was not so eager. See error and try again.")
console.log(error);
});
require 'json'
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
Peopledatalabs.api_key = 'YOUR API KEY'
DESIRED_COMPANY_DOMAINS = [
'stripe.com', 'plaid.com', 'xignite.com', 'square.com'
]
ES_QUERY = {
"query": {
"bool": {
"must": [
{"terms": {"job_company_website": DESIRED_COMPANY_DOMAINS}},
{"term": {"job_title_role": "engineering"}},
{"terms": {"job_title_levels": ["vp", "director", "manager"]}},
{"exists": {"field": "work_email"}}
]
}
}
}
response = Peopledatalabs::Search.people(searchType: 'elastic', query: ES_QUERY, size: 100)
if response['status'] == 200
data = response['data']
data.each do |record|
# bring in leads and make $$$
puts "#{record['work_email']} \
#{record['full_name']} \
#{record['job_title']} \
#{record['job_company_name']}"
end
puts "successfully grabbed #{data.length()} records from pdl"
puts "#{response['total']} total pdl records exist matching this query"
else
puts "NOTE. The eager beaver was not so eager. See error and try again."
puts "Error: #{response}"
end
require 'json'
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
Peopledatalabs.api_key = 'YOUR API KEY'
COMPANY_DOMAINS_STRING_REP = "'stripe.com', 'plaid.com', 'xignite.com', 'square.com'"
SQL_QUERY = \
"""
SELECT * FROM person
WHERE job_company_website IN (#{COMPANY_DOMAINS_STRING_REP})
AND job_title_role='engineering'
AND job_title_levels IN ('vp', 'director', 'manager')
AND work_email IS NOT NULL;
"""
response = Peopledatalabs::Search.people(searchType: 'sql', query: SQL_QUERY, size: 100)
if response['status'] == 200
data = response['data']
data.each do |record|
# bring in leads and make $$$
puts "#{record['work_email']} \
#{record['full_name']} \
#{record['job_title']} \
#{record['job_company_name']}"
end
puts "successfully grabbed #{data.length()} records from pdl"
puts "#{response['total']} total pdl records exist matching this query"
else
puts "NOTE. The eager beaver was not so eager. See error and try again."
puts "Error: #{response}"
end
package main
import (
"fmt"
)
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
apiKey := "YOUR API KEY"
// Set API KEY as env variable
// apiKey := os.Getenv("API_KEY")
client := pdl.New(apiKey)
desiredCompanyDomains := []string{"stripe.com", "plaid.com", "xignite.com", "square.com"}
elasticSearchQuery := map[string]interface{} {
"query": map[string]interface{} {
"bool": map[string]interface{} {
"must": []map[string]interface{} {
{"terms": map[string]interface{}{"job_company_website": desiredCompanyDomains}},
{"term": map[string]interface{}{"job_title_role": "engineering"}},
{"terms": map[string]interface{}{"job_title_levels": []string{"vp", "director", "manager"}}},
{"exists": map[string]interface{}{"field": "work_email"}},
},
},
},
}
params := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: 100,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
Query: elasticSearchQuery,
},
}
response, err := client.Person.Search(params)
if err == nil {
for i := range response.Data {
record := response.Data[i]
fmt.Println(record.WorkEmail, record.FullName, record.JobTitle, record.JobCompanyName)
}
fmt.Printf("successfully grabbed %d records from pdl\n", len(response.Data))
fmt.Printf("%d total pdl records exist matching this query\n", response.Total)
} else {
fmt.Println("NOTE. The eager beaver was not so eager. See error and try again.")
fmt.Println("error:", err)
}
}
package main
import (
"fmt"
)
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
apiKey := "YOUR API KEY"
// Set API KEY as env variable
// apiKey := os.Getenv("API_KEY")
client := pdl.New(apiKey)
companyDomainsStringRep := "'stripe.com', 'plaid.com', 'xignite.com', 'square.com'"
sqlQuery := "SELECT * FROM person" +
" WHERE job_company_website IN (" + companyDomainsStringRep + ")" +
" AND job_title_role='engineering'" +
" AND job_title_levels IN ('vp', 'director', 'manager')" +
" AND work_email IS NOT NULL;"
params := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: 100,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
SQL: sqlQuery,
},
}
response, err := client.Person.Search(params)
if err == nil {
for i := range response.Data {
record := response.Data[i]
fmt.Println(record.WorkEmail, record.FullName, record.JobTitle, record.JobCompanyName)
}
fmt.Printf("successfully grabbed %d records from pdl\n", len(response.Data))
fmt.Printf("%d total pdl records exist matching this query\n", response.Total)
} else {
fmt.Println("NOTE. The eager beaver was not so eager. See error and try again.")
fmt.Println("error:", err)
}
}
import requests, json
API_KEY = #YOUR API KEY
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
DESIRED_COMPANY_DOMAINS = [
'stripe.com', 'plaid.com', 'xignite.com', 'square.com'
]
# https://pdl-prod-schema.s3-us-west-2.amazonaws.com/15.0/enums/job_title_levels.txt
# for enumerated possible values of job_title_levels
ES_QUERY = {
"query": {
"bool": {
"must": [
{"terms": {"job_company_website": DESIRED_COMPANY_DOMAINS}},
{"term": {"job_title_role": "engineering"}},
{"terms": {"job_title_levels": ["vp", "director", "manager"]}},
{"exists": {"field": "work_email"}}
]
}
}
}
PARAMS = {
'query': json.dumps(ES_QUERY),
'size': 100
}
response = requests.get(
PDL_URL,
headers=HEADERS,
params=PARAMS
).json()
if response["status"] == 200:
for record in response['data']:
# bring in leads and make $$$
print(
record['work_email'],
record['full_name'],
record['job_title'],
record['job_company_name']
)
print(f"successfully grabbed {len(response['data'])} records from pdl")
print(f"{response['total']} total pdl records exist matching this query")
else:
print("NOTE. The eager beaver was not so eager. See error and try again.")
print("error:", response)
import requests, json
API_KEY = #YOUR API KEY
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
DESIRED_COMPANY_DOMAINS = [
'stripe.com', 'plaid.com', 'xignite.com', 'square.com'
]
COMPANY_DOMAINS_STRING_REP = ", ".join(
(f"'{site}'" for site in DESIRED_COMPANY_DOMAINS)
)
# https://pdl-prod-schema.s3-us-west-2.amazonaws.com/15.0/job_title_levels.txt
# for enumerated possible values of job_title_levels
SQL_QUERY = \
f"""
SELECT * FROM person
WHERE job_company_website IN ({COMPANY_DOMAINS_STRING_REP})
AND job_title_role='engineering'
AND job_title_levels IN ('vp', 'director', 'manager')
AND work_email IS NOT NULL;
"""
PARAMS = {
'sql': SQL_QUERY,
'size': 100
}
response = requests.get(
PDL_URL,
headers=HEADERS,
params=PARAMS
).json()
if response["status"] == 200:
for record in response['data']:
# bring in leads and make $$$
print(
record['work_email'],
record['full_name'],
record['job_title'],
record['job_company_name']
)
print(f"successfully grabbed {len(response['data'])} records from pdl")
print(f"{response['total']} total pdl records exist matching this query")
else:
print("NOTE. The eager beaver was not so eager. See error and try again.")
print("error:", response)
Recruiting
"I have a client looking for marketing managers and dishwashers in Oregon but NOT in portland (don't ask why). They want to reach out to them on LinkedIn, so they asked that each candidate have a Linkedin URL. I want as many people as PDL can give me matching this criteria."
from time import sleep
import json
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Create a client, specifying an API key
client = PDLPY(
api_key="YOUR API KEY",
)
def get_all_pdl_records_es(query):
#runs search in batches with 6 second intervals
PAGE_SIZE = 100
all_records = []
batch = 1
params = {
'query': query,
'size': PAGE_SIZE,
'dataset': "all"
}
while batch == 1 or params['scroll_token']:
response = client.person.search(**params).json()
if response['status'] == 200:
all_records.extend(response['data'])
params['scroll_token'] = response['scroll_token']
print(f"batch {batch} success!")
sleep(6)
batch += 1
else:
print("Unable to continue scrolling")
break
print("done!")
return all_records
if __name__ == '__main__':
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"location_region": "oregon"}},
{"bool": {
"should": [
{"match": {"job_title": "dishwasher"}},
{"bool": {
"must": [
{"term": {"job_title_role": "marketing"}},
{"term": {"job_title_levels": "manager"}}
]
}}
]
}
},
{"exists": {"field": "linkedin_url"}}
],
"must_not":[
{"term": {"location_locality": "portland"}},
]
}
}
}
recruiting_leads = get_all_pdl_records_es(ES_QUERY)
print(f"Got {len(recruiting_leads)} recruiting leads for my wealthy client!")
#GO make_money_with_data(recruiting_leads)!
from time import sleep
import json
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Create a client, specifying an API key
client = PDLPY(
api_key="YOUR API KEY",
)
def get_all_pdl_records_sql(query):
#runs search in batches with 6 second intervals
PAGE_SIZE = 100
all_records = []
batch = 1
params = {
'sql': query,
'size': PAGE_SIZE,
'dataset': "all"
}
while batch == 1 or params['scroll_token']:
response = client.person.search(**params).json()
if response['status'] == 200:
all_records.extend(response['data'])
params['scroll_token'] = response['scroll_token']
print(f"batch {batch} success!")
sleep(6)
batch += 1
else:
print("Unable to continue scrolling")
break
print("done!")
return all_records
SQL_QUERY = \
"""
SELECT * FROM person
WHERE location_region='oregon'
AND NOT location_locality='portland'
AND (
job_title LIKE '%dishwasher%'
OR (
job_title_role='marketing'
AND job_title_levels='manager'
)
)
AND linkedin_url IS NOT NULL;
"""
recruiting_leads = get_all_pdl_records_sql(SQL_QUERY)
print(f"Got {len(recruiting_leads)} recruiting leads for my wealthy client!")
#GO make_money_with_data(recruiting_leads)!
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
const esQuery = {
"query": {
"bool": {
"must": [
{"term": {"location_region": "oregon"}},
{"bool": {
"should": [
{"match": {"job_title": "dishwasher"}},
{"bool": {
"must": [
{"term": {"job_title_role": "marketing"}},
{"term": {"job_title_levels": "manager"}}
]
}}
]
}
},
{"exists": {"field": "linkedin_url"}}
],
"must_not":[
{"term": {"location_locality": "portland"}},
]
}
}
}
var allRecords = [];
var scrollToken = null;
var pageSize = 100;
var batch = 1;
var params = {
searchQuery: esQuery,
size: pageSize,
scroll_token: null,
dataset: "all"
}
// Run initial batch
runBatch();
function runBatch() {
params.scroll_token = scrollToken;
PDLJSClient.person.search.elastic(params).then((data) => {
Array.prototype.push.apply(allRecords, data.data);
scrollToken = data['scroll_token'];
console.log(`batch ${batch} success!`);
batch++;
// Runs search in batches with 6 second intervals
if (scrollToken) {
setTimeout(function() {
runBatch(params);
}, 6000);
}
}).catch((error) => {
console.log("Unable to continue scrolling");
console.log("done");
console.log(`Got ${allRecords.length} recruiting leads for my wealthy client!`);
});
}
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
const sqlQuery = `SELECT * FROM person
WHERE location_region='oregon'
AND NOT location_locality='portland'
AND (
job_title LIKE '%dishwasher%'
OR (
job_title_role='marketing'
AND job_title_levels='manager'
)
)
AND linkedin_url IS NOT NULL;`;
var allRecords = [];
var scrollToken = null;
var pageSize = 100;
var batch = 1;
var params = {
searchQuery: sqlQuery,
size: pageSize,
scroll_token: null,
dataset: "all"
}
// Run initial batch
runBatch();
function runBatch() {
params.scroll_token = scrollToken;
PDLJSClient.person.search.sql(params).then((data) => {
Array.prototype.push.apply(allRecords, data.data);
scrollToken = data['scroll_token'];
console.log(`batch ${batch} success!`);
batch++;
// Runs search in batches with 6 second intervals
if (scrollToken) {
setTimeout(function() {
runBatch(params);
}, 6000);
}
}).catch((error) => {
console.log("Unable to continue scrolling");
console.log("done");
console.log(`Got ${allRecords.length} recruiting leads for my wealthy client!`);
});
}
require 'json'
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
Peopledatalabs.api_key = 'YOUR API KEY'
PAGE_SIZE = 100
#runs search in batches with 6 second intervals
def get_all_pdl_records_es(query)
all_records = []
batch = 1
scroll_token = {}
while batch == 1 || !scroll_token.nil?
response = Peopledatalabs::Search.people(searchType: 'elastic', query: query, size: PAGE_SIZE, scroll_token: scroll_token, dataset: "all")
if response['status'] == 200
all_records += response['data']
scroll_token = response['scroll_token']
puts "batch #{batch} success!"
sleep(6)
batch += 1
else
puts "Unable to continue scrolling"
break
end
end
puts "done!"
return all_records
end
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"location_region": "oregon"}},
{"bool": {
"should": [
{"match": {"job_title": "dishwasher"}},
{"bool": {
"must": [
{"term": {"job_title_role": "marketing"}},
{"term": {"job_title_levels": "manager"}}
]
}}
]
}
},
{"exists": {"field": "linkedin_url"}}
],
"must_not":[
{"term": {"location_locality": "portland"}},
]
}
}
}
recruiting_leads = get_all_pdl_records_es(ES_QUERY)
puts "Got #{recruiting_leads.length()} recruiting leads for my wealthy client!"
#GO make_money_with_data(recruiting_leads)!
require 'json'
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
Peopledatalabs.api_key = 'YOUR API KEY'
PAGE_SIZE = 100
#runs search in batches with 6 second intervals
def get_all_pdl_records_sql(query)
all_records = []
batch = 1
scroll_token = {}
while batch == 1 || !scroll_token.nil?
response = Peopledatalabs::Search.people(searchType: 'sql', query: query, size: PAGE_SIZE, scroll_token: scroll_token, dataset: "all")
if response['status'] == 200
all_records += response['data']
scroll_token = response['scroll_token']
puts "batch #{batch} success!"
sleep(6)
batch += 1
else
puts "Unable to continue scrolling"
break
end
end
puts "done!"
return all_records
end
SQL_QUERY = \
"""
SELECT * FROM person
WHERE location_region='oregon'
AND NOT location_locality='portland'
AND (
job_title LIKE '%dishwasher%'
OR (
job_title_role='marketing'
AND job_title_levels='manager'
)
)
AND linkedin_url IS NOT NULL;
"""
recruiting_leads = get_all_pdl_records_sql(SQL_QUERY)
puts "Got #{recruiting_leads.length()} recruiting leads for my wealthy client!"
#GO make_money_with_data(recruiting_leads)!
package main
import (
"fmt"
"time"
)
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
elasticSearchQuery := map[string]interface{} {
"query": map[string]interface{} {
"bool": map[string]interface{} {
"must": []map[string]interface{} {
{"term": map[string]interface{} {"location_region": "oregon"}},
{"bool": map[string]interface{} {
"should": []map[string]interface{} {
{"match": map[string]interface{} {"job_title": "dishwasher"}},
{"bool": map[string]interface{} {
"must": []map[string]interface{} {
{"term": map[string]interface{} {"job_title_role": "marketing"}},
{"term": map[string]interface{} {"job_title_levels": "manager"}},
},
},
},
},
},
},
{"exists": map[string]interface{} {"field": "linkedin_url"}},
},
"must_not": []map[string]interface{} {
{"term": map[string]interface{} {"location_locality": "portland"}},
},
},
},
}
recruitingLeads := getAllPdlRecordsEs(elasticSearchQuery)
fmt.Printf("Got %d recruiting leads for my wealthy client!\n", len(recruitingLeads))
//GO make_money_with_data(recruiting_leads)!
}
//runs search in batches with 6 second intervals
func getAllPdlRecordsEs(query interface{}) []pdlmodel.Person {
var allRecords []pdlmodel.Person
batch := 1
var scrollToken string
const pageSize = 100
apiKey := "YOUR API KEY"
// Set API KEY as env variable
// apiKey := os.Getenv("API_KEY")
client := pdl.New(apiKey)
p := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: pageSize,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
Query: query,
ScrollToken: scrollToken,
Dataset: "all",
},
}
for batch == 1 || scrollToken != "" {
response, err := client.Person.Search(p)
if err == nil {
allRecords = append(allRecords, response.Data...)
p.SearchBaseParams.ScrollToken, scrollToken = response.ScrollToken, response.ScrollToken
fmt.Printf("batch %d success!\n", batch)
time.Sleep(6 * time.Second)
batch++
} else {
fmt.Println("Unable to continue scrolling")
break
}
}
fmt.Println("done!")
return allRecords
}
package main
import (
"fmt"
"time"
)
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
sqlQuery := "SELECT * FROM person" +
" WHERE location_region='oregon'" +
" AND NOT location_locality='portland'" +
" AND (" +
" job_title LIKE '%dishwasher%'" +
" OR (" +
" job_title_role='marketing'" +
" AND job_title_levels='manager'" +
" )" +
" )" +
" AND linkedin_url IS NOT NULL;"
recruitingLeads := getAllPdlRecordsEs(sqlQuery)
fmt.Printf("Got %d recruiting leads for my wealthy client!\n", len(recruitingLeads))
//GO make_money_with_data(recruiting_leads)!
}
//runs search in batches with 6 second intervals
func getAllPdlRecordsEs(query string) []pdlmodel.Person {
var allRecords []pdlmodel.Person
batch := 1
var scrollToken string
const pageSize = 100
apiKey := "YOUR API KEY"
// Set API KEY as env variable
// apiKey := os.Getenv("API_KEY")
client := pdl.New(apiKey)
p := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: pageSize,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
SQL: query,
ScrollToken: scrollToken,
Dataset: "all",
},
}
for batch == 1 || scrollToken != "" {
response, err := client.Person.Search(p)
if err == nil {
allRecords = append(allRecords, response.Data...)
p.SearchBaseParams.ScrollToken, scrollToken = response.ScrollToken, response.ScrollToken
fmt.Printf("batch %d success!\n", batch)
time.Sleep(6 * time.Second)
batch++
} else {
fmt.Println("Unable to continue scrolling")
break
}
}
fmt.Println("done!")
return allRecords
}
from time import sleep
import requests, json
API_KEY = #YOUR API KEY
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
def get_all_pdl_records_es(query):
#runs search in batches with 6 second intervals
PAGE_SIZE = 100
all_records = []
batch = 1
params = {
'query': json.dumps(query),
'size': PAGE_SIZE,
'dataset': "all"
}
while batch == 1 or params['scroll_token']:
response = requests.get(
PDL_URL,
headers=HEADERS,
params=params
).json()
if response['status'] == 200:
all_records.extend(response['data'])
params['scroll_token'] = response['scroll_token']
print(f"batch {batch} success!")
sleep(6)
batch += 1
else:
print("Unable to continue scrolling")
break
print("done!")
return all_records
if __name__ == '__main__':
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"location_region": "oregon"}},
{"bool": {
"should": [
{"match": {"job_title": "dishwasher"}},
{"bool": {
"must": [
{"term": {"job_title_role": "marketing"}},
{"term": {"job_title_levels": "manager"}}
]
}}
]
}
},
{"exists": {"field": "linkedin_url"}}
],
"must_not":[
{"term": {"location_locality": "portland"}},
]
}
}
}
recruiting_leads = get_all_pdl_records_es(ES_QUERY)
print(f"Got {len(recruiting_leads)} recruiting leads for my wealthy client!")
#GO make_money_with_data(recruiting_leads)!
from time import sleep
import requests
API_KEY = #YOUR API KEY
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
def get_all_pdl_records_sql(query):
#runs search in batches with 6 second intervals
PAGE_SIZE = 100
all_records = []
batch = 1
params = {
'sql': query,
'size': PAGE_SIZE,
'dataset': "all"
}
while batch == 1 or params['scroll_token']:
response = requests.get(
PDL_URL,
headers=HEADERS,
params=params
).json()
if response['status'] == 200:
all_records.extend(response['data'])
params['scroll_token'] = response['scroll_token']
print(f"batch {batch} success!")
sleep(6)
batch += 1
else:
print("Unable to continue scrolling")
break
batch += 1
print("done!")
return all_records
SQL_QUERY = \
"""
SELECT * FROM person
WHERE location_region='oregon'
AND NOT location_locality='portland'
AND (
job_title LIKE '%dishwasher%'
OR (
job_title_role='marketing'
AND job_title_levels='manager'
)
)
AND linkedin_url IS NOT NULL;
"""
recruiting_leads = get_all_pdl_records_sql(SQL_QUERY)
print(f"got {len(recruiting_leads)} recruiting leads for my wealthy client!")
#GO make_money_with_data(recruiting_leads)!
Ads
"I want to sell yachts to rich people through ads on Facebook."
from time import sleep
import json
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Create a client, specifying an API key
client = PDLPY(
api_key="YOUR API KEY",
)
def get_all_pdl_records_es(query):
#runs search in batches with 6 second intervals
PAGE_SIZE = 100
all_records = []
batch = 1
params = {
'query': query,
'size': PAGE_SIZE,
'dataset': "all"
}
while batch == 1 or params['scroll_token']:
response = client.person.search(**params).json()
if response['status'] == 200:
all_records.extend(response['data'])
params['scroll_token'] = response['scroll_token']
print(f"batch {batch} success!")
sleep(6)
batch += 1
else:
print("Unable to continue scrolling")
break
print("done!")
return all_records
if __name__ == '__main__':
ES_QUERY = {
"query": {
"bool": {
"must": [
{"exists": {"field": "facebook_id"}},
{"prefix": {"interests": "yacht"}},
{"term": {"inferred_salary": ">250,000"}}
]
}
}
}
rich_yacht_people = get_all_pdl_records_es(ES_QUERY)
print(f"Got {len(rich_yacht_people)} rich yacht people for my wealthy client!")
#GO make_money_with_data(rich_yacht_people)!
from time import sleep
import json
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Create a client, specifying an API key
client = PDLPY(
api_key="YOUR API KEY",
)
def get_all_pdl_records_sql(query):
#runs search in batches with 6 second intervals
PAGE_SIZE = 100
all_records = []
batch = 1
params = {
'sql': query,
'size': PAGE_SIZE,
'dataset': "all"
}
while batch == 1 or params['scroll_token']:
response = client.person.search(**params).json()
if response['status'] == 200:
all_records.extend(response['data'])
params['scroll_token'] = response['scroll_token']
print(f"batch {batch} success!")
sleep(6)
batch += 1
else:
print("Unable to continue scrolling")
break
print("done!")
return all_records
SQL_QUERY = \
"""
SELECT * FROM person
WHERE facebook_id IS NOT NULL
AND interests LIKE 'yacht%'
AND inferred_salary='>250,000';
"""
rich_yacht_people = get_all_pdl_records_sql(SQL_QUERY)
print(f"got {len(rich_yacht_people)} rich yacht people for my wealthy client!")
#GO make_money_with_data(rich_yacht_people)
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
const esQuery = {
"query": {
"bool": {
"must": [
{"exists": {"field": "facebook_id"}},
{"prefix": {"interests": "yacht"}},
{"term": {"inferred_salary": ">250,000"}}
]
}
}
}
var allRecords = [];
var scrollToken = null;
var pageSize = 100;
var batch = 1;
var params = {
searchQuery: esQuery,
size: pageSize,
scroll_token: null,
dataset: "all"
}
// Run initial batch
runBatch();
function runBatch() {
params.scroll_token = scrollToken;
PDLJSClient.person.search.elastic(params).then((data) => {
Array.prototype.push.apply(allRecords, data.data);
scrollToken = data['scroll_token'];
console.log(`batch ${batch} success!`);
batch++;
// Runs search in batches with 6 second intervals
if (scrollToken) {
setTimeout(function() {
runBatch(params);
}, 6000);
}
}).catch((error) => {
console.log("Unable to continue scrolling");
console.log("done");
console.log(`Got ${allRecords.length} rich yacht people for my wealthy client!`);
});
}
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
const sqlQuery = `SELECT * FROM person
WHERE facebook_id IS NOT NULL
AND interests LIKE 'yacht%'
AND inferred_salary='>250,000';`;
var allRecords = [];
var scrollToken = null;
var pageSize = 100;
var batch = 1;
var params = {
searchQuery: sqlQuery,
size: pageSize,
scroll_token: null,
dataset: "all"
}
// Run initial batch
runBatch();
function runBatch() {
params.scroll_token = scrollToken;
PDLJSClient.person.search.sql(params).then((data) => {
Array.prototype.push.apply(allRecords, data.data);
scrollToken = data['scroll_token'];
console.log(`batch ${batch} success!`);
batch++;
// Runs search in batches with 6 second intervals
if (scrollToken) {
setTimeout(function() {
runBatch(params);
}, 6000);
}
}).catch((error) => {
console.log("Unable to continue scrolling");
console.log("done");
console.log(`Got ${allRecords.length} rich yacht people for my wealthy client!`);
});
}
require 'json'
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
Peopledatalabs.api_key = 'YOUR API KEY'
PAGE_SIZE = 100
#runs search in batches with 6 second intervals
def get_all_pdl_records_es(query)
all_records = []
batch = 1
scroll_token = {}
while batch == 1 || !scroll_token.nil?
response = Peopledatalabs::Search.people(searchType: 'elastic', query: query, size: PAGE_SIZE, scroll_token: scroll_token, dataset: "all")
if response['status'] == 200
all_records += response['data']
scroll_token = response['scroll_token']
puts "batch #{batch} success!"
sleep(6)
batch += 1
else
puts "Unable to continue scrolling"
break
end
end
puts "done!"
return all_records
end
ES_QUERY = {
"query": {
"bool": {
"must": [
{"exists": {"field": "facebook_id"}},
{"prefix": {"interests": "yacht"}},
{"term": {"inferred_salary": ">250,000"}}
]
}
}
}
rich_yacht_people = get_all_pdl_records_es(ES_QUERY)
puts "Got #{rich_yacht_people.length()} rich yacht people for my wealthy client!"
#GO make_money_with_data(rich_yacht_people)!
require 'json'
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
Peopledatalabs.api_key = 'YOUR API KEY'
PAGE_SIZE = 100
#runs search in batches with 6 second intervals
def get_all_pdl_records_sql(query)
all_records = []
batch = 1
scroll_token = {}
while batch == 1 || !scroll_token.nil?
response = Peopledatalabs::Search.people(searchType: 'sql', query: query, size: PAGE_SIZE, scroll_token: scroll_token, dataset: "all")
if response['status'] == 200
all_records += response['data']
scroll_token = response['scroll_token']
puts "batch #{batch} success!"
sleep(6)
batch += 1
else
puts "Unable to continue scrolling"
break
end
end
puts "done!"
return all_records
end
SQL_QUERY = \
"""
SELECT * FROM person
WHERE facebook_id IS NOT NULL
AND interests LIKE 'yacht%'
AND inferred_salary='>250,000';
"""
rich_yacht_people = get_all_pdl_records_sql(SQL_QUERY)
puts "Got #{rich_yacht_people.length()} rich yacht people for my wealthy client!"
#GO make_money_with_data(rich_yacht_people)!
package main
import (
"fmt"
"time"
)
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
elasticSearchQuery := map[string]interface{} {
"query": map[string]interface{} {
"bool": map[string]interface{} {
"must": []map[string]interface{} {
{"exists": map[string]interface{}{"field": "facebook_id"}},
{"prefix": map[string]interface{}{"interests": "yacht"}},
{"term": map[string]interface{}{"inferred_salary": ">250,000"}},
},
},
},
}
richYachtPeople := getAllPdlRecordsEs(elasticSearchQuery)
fmt.Printf("Got %d rich yacht people for my wealthy client!\n", len(richYachtPeople))
//GO make_money_with_data(rich_yacht_people)!
}
//runs search in batches with 6 second intervals
func getAllPdlRecordsEs(query interface{}) []pdlmodel.Person {
var allRecords []pdlmodel.Person
batch := 1
var scrollToken string
const pageSize = 100
apiKey := "YOUR API KEY"
// Set API KEY as env variable
// apiKey := os.Getenv("API_KEY")
client := pdl.New(apiKey)
p := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: pageSize,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
Query: query,
ScrollToken: scrollToken,
Dataset: "all",
},
}
for batch == 1 || scrollToken != "" {
response, err := client.Person.Search(p)
if err == nil {
allRecords = append(allRecords, response.Data...)
p.SearchBaseParams.ScrollToken, scrollToken = response.ScrollToken, response.ScrollToken
fmt.Printf("batch %d success!\n", batch)
time.Sleep(6 * time.Second)
batch++
} else {
fmt.Println("Unable to continue scrolling")
break
}
}
fmt.Println("done!")
return allRecords
}
package main
import (
"fmt"
"time"
)
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
sqlQuery := "SELECT * FROM person" +
" WHERE facebook_id IS NOT NULL" +
" AND interests LIKE 'yacht%'" +
" AND inferred_salary='>250,000';"
richYachtPeople := getAllPdlRecordsEs(sqlQuery)
fmt.Printf("Got %d rich yacht people for my wealthy client!\n", len(richYachtPeople))
//GO make_money_with_data(rich_yacht_people)!
}
//runs search in batches with 6 second intervals
func getAllPdlRecordsEs(query string) []pdlmodel.Person {
var allRecords []pdlmodel.Person
batch := 1
var scrollToken string
const pageSize = 100
apiKey := "YOUR API KEY"
// Set API KEY as env variable
// apiKey := os.Getenv("API_KEY")
client := pdl.New(apiKey)
p := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: pageSize,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
SQL: query,
ScrollToken: scrollToken,
Dataset: "all",
},
}
for batch == 1 || scrollToken != "" {
response, err := client.Person.Search(p)
if err == nil {
allRecords = append(allRecords, response.Data...)
p.SearchBaseParams.ScrollToken, scrollToken = response.ScrollToken, response.ScrollToken
fmt.Printf("batch %d success!\n", batch)
time.Sleep(6 * time.Second)
batch++
} else {
fmt.Println("Unable to continue scrolling")
break
}
}
fmt.Println("done!")
return allRecords
}
from time import sleep
import requests, json
API_KEY = #YOUR API KEY
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
def get_all_pdl_records_es(query):
#runs search in batches with 6 second intervals
PAGE_SIZE = 100
all_records = []
batch = 1
params = {
'query': json.dumps(query),
'size': PAGE_SIZE,
'dataset': "all"
}
while batch == 1 or params['scroll_token']:
response = requests.get(
PDL_URL,
headers=HEADERS,
params=params
).json()
if response['status'] == 200:
all_records.extend(response['data'])
params['scroll_token'] = response['scroll_token']
print(f"batch {batch} success!")
sleep(6)
batch += 1
else:
print("Unable to continue scrolling")
break
print("done!")
return all_records
if __name__ == '__main__':
ES_QUERY = {
"query": {
"bool": {
"must": [
{"exists": {"field": "facebook_id"}},
{"prefix": {"interests": "yacht"}},
{"term": {"inferred_salary": ">250,000"}}
]
}
}
}
rich_yacht_people = get_all_pdl_records_es(ES_QUERY)
print(f"Got {len(rich_yacht_people)} rich yacht people for my wealthy client!")
#GO make_money_with_data(rich_yacht_people)!
from time import sleep
import requests, json
API_KEY = #YOUR API KEY
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
def get_all_pdl_records_sql(query):
#runs search in batches with 6 second intervals
PAGE_SIZE = 100
all_records = []
batch = 1
params = {
'sql': query,
'size': PAGE_SIZE,
'dataset': "all"
}
while batch == 1 or params['scroll_token']:
response = requests.get(
PDL_URL,
headers=HEADERS,
params=params
).json()
if response['status'] == 200:
all_records.extend(response['data'])
params['scroll_token'] = response['scroll_token']
print(f"batch {batch} success!")
sleep(6)
batch += 1
else:
print("Unable to continue scrolling")
break
print("done!")
return all_records
SQL_QUERY = \
"""
SELECT * FROM person
WHERE facebook_id IS NOT NULL
AND interests LIKE 'yacht%'
AND inferred_salary='>250,000';
"""
rich_yacht_people = get_all_pdl_records_sql(SQL_QUERY)
print(f"got {len(rich_yacht_people)} rich yacht people for my wealthy client!")
#GO make_money_with_data(rich_yacht_people)
Customer Insights
"I want information about my biggest customer (Zenefits.)"
from time import sleep
import json
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Create a client, specifying an API key
client = PDLPY(
api_key="YOUR API KEY",
)
def get_all_pdl_records_es(query):
#runs search in batches with 6 second intervals
PAGE_SIZE = 100
all_records = []
batch = 1
params = {
'query': query,
'size': PAGE_SIZE,
'dataset': "all"
}
while batch == 1 or params['scroll_token']:
response = client.person.search(**params).json()
if response['status'] == 200:
all_records.extend(response['data'])
params['scroll_token'] = response['scroll_token']
print(f"batch {batch} success!")
sleep(6)
batch += 1
else:
print("Unable to continue scrolling")
break
print("done!")
return all_records
if __name__ == '__main__':
ES_QUERY = {
"query": {
"term": {"job_company_website": "zenefits.com"}
}
}
all_zenefits_employees = get_all_pdl_records_es(ES_QUERY)
skills_agg = {}
titles_agg = {}
schools_agg = {}
other_companies_agg = {}
for record in all_zenefits_employees:
for skill in record['skills']:
skills_agg.setdefault(skill, 0)
skills_agg[skill] += 1
if record['job_title']:
titles_agg.setdefault(record['job_title'], 0)
titles_agg[record['job_title']] += 1
for edu in record['education']:
if edu['school'] and edu['school']['type'] == "post-secondary institution":
schools_agg.setdefault(edu['school']['name'], 0)
schools_agg[edu['school']['name']] += 1
for exp in record['experience']:
if exp['company'] and exp['company']['name'] != 'zenefits':
other_companies_agg.setdefault(exp['company']['name'], 0)
other_companies_agg[exp['company']['name']] += 1
print("top 10 skills for zenefits employees:")
for skill, count in sorted(
skills_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, skill)
print("top 10 titles for zenefits employees:")
for title, count in sorted(
titles_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, title)
print("top 10 universities for zenefits employees:")
for school, count in sorted(
schools_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, school)
print("top 10 former companies for zenefits employees:")
for company, count in sorted(
other_companies_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, company)
from time import sleep
import json
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Create a client, specifying an API key
client = PDLPY(
api_key="YOUR API KEY",
)
def get_all_pdl_records_sql(query):
#runs search in batches with 6 second intervals
PAGE_SIZE = 100
all_records = []
batch = 1
params = {
'sql': query,
'size': PAGE_SIZE,
'dataset': "all"
}
while batch == 1 or params['scroll_token']:
response = client.person.search(**params).json()
if response['status'] == 200:
all_records.extend(response['data'])
params['scroll_token'] = response['scroll_token']
print(f"batch {batch} success!")
sleep(6)
batch += 1
else:
print("Unable to continue scrolling")
break
print("done!")
return all_records
SQL_QUERY = \
"""
SELECT * FROM person
WHERE job_company_website='zenefits.com';
"""
all_zenefits_employees = get_all_pdl_records_sql(SQL_QUERY)
skills_agg = {}
titles_agg = {}
schools_agg = {}
other_companies_agg = {}
for record in all_zenefits_employees:
for skill in record['skills']:
skills_agg.setdefault(skill, 0)
skills_agg[skill] += 1
if record['job_title']:
titles_agg.setdefault(record['job_title'], 0)
titles_agg[record['job_title']] += 1
for edu in record['education']:
if edu['school'] and edu['school']['type'] == "post-secondary institution":
schools_agg.setdefault(edu['school']['name'], 0)
schools_agg[edu['school']['name']] += 1
for exp in record['experience']:
if exp['company'] and exp['company']['name'] != 'zenefits':
other_companies_agg.setdefault(exp['company']['name'], 0)
other_companies_agg[exp['company']['name']] += 1
print("top 10 skills for zenefits employees:")
for skill, count in sorted(
skills_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, skill)
print("top 10 titles for zenefits employees:")
for title, count in sorted(
titles_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, title)
print("top 10 universities for zenefits employees:")
for school, count in sorted(
schools_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, school)
print("top 10 former companies for zenefits employees:")
for company, count in sorted(
other_companies_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, company)
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
const esQuery = {
"query": {
"term": {"job_company_website": "zenefits.com"}
}
}
var allRecords = [];
var scrollToken = null;
var pageSize = 100;
var batch = 1;
var params = {
searchQuery: esQuery,
size: pageSize,
scroll_token: null,
dataset: "all"
}
// Run initial batch
runBatch();
function runBatch() {
params.scroll_token = scrollToken;
PDLJSClient.person.search.elastic(params).then((data) => {
Array.prototype.push.apply(allRecords, data.data);
scrollToken = data['scroll_token'];
console.log(`batch ${batch} success!`);
batch++;
// Runs search in batches with 6 second intervals
if (scrollToken) {
setTimeout(function() {
runBatch(params);
}, 6000);
}
}).catch((error) => {
console.log("Unable to continue scrolling");
console.log("done");
printResults();
});
}
function printResults() {
var skillsAgg = {};
var titlesAgg = {};
var schoolsAgg = {};
var otherCompaniesAgg = {};
var sortable;
for (let record in allRecords) {
for (let skill in allRecords[record]["skills"]) {
if (!skillsAgg[allRecords[record]["skills"][skill]]) {
skillsAgg[allRecords[record]["skills"][skill]] = 0;
}
skillsAgg[allRecords[record]["skills"][skill]]++;
}
if (allRecords[record]["job_title"]) {
if (!titlesAgg[allRecords[record]["job_title"]]) {
titlesAgg[allRecords[record]["job_title"]] = 0;
}
titlesAgg[allRecords[record]["job_title"]]++;
for (let edu in allRecords[record]["education"]) {
if (allRecords[record]["education"][edu]["school"] &&
allRecords[record]["education"][edu]["school"]["type"] == "post-secondary institution") {
if (!schoolsAgg[allRecords[record]["education"][edu]["school"]["name"]]) {
schoolsAgg[allRecords[record]["education"][edu]["school"]["name"]] = 0;
}
schoolsAgg[allRecords[record]["education"][edu]["school"]["name"]]++;
}
}
}
for (let exp in allRecords[record]["experience"]) {
if (allRecords[record]["experience"][exp]["company"] &&
allRecords[record]["experience"][exp]["company"]["name"] != "zenefits") {
if (!otherCompaniesAgg[allRecords[record]["experience"][exp]["company"]["name"]]) {
otherCompaniesAgg[allRecords[record]["experience"][exp]["company"]["name"]] = 0;
}
otherCompaniesAgg[allRecords[record]["experience"][exp]["company"]["name"]]++
}
}
}
console.log("top 10 skills for zenefits employees:");
sortAndPrint(skillsAgg);
console.log("top 10 titles for zenefits employees:");
sortAndPrint(titlesAgg);
console.log("top 10 universities for zenefits employees:");
sortAndPrint(schoolsAgg);
console.log("top 10 former companies for zenefits employees:");
sortAndPrint(otherCompaniesAgg);
}
function sortAndPrint(object) {
var sortable = [];
for (let field in object) {
sortable.push([field, object[field]]);
}
sortable.sort(function(a, b) {
return b[1] - a[1];
});
for (let i = 0; i < 10; i++) {
console.log(sortable[i][0]);
}
}
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
const sqlQuery = `SELECT * FROM person
WHERE job_company_website='zenefits.com';`;
var allRecords = [];
var scrollToken = null;
var pageSize = 100;
var batch = 1;
var params = {
searchQuery: sqlQuery,
size: pageSize,
scroll_token: null,
dataset: "all"
}
// Run initial batch
runBatch();
function runBatch() {
params.scroll_token = scrollToken;
PDLJSClient.person.search.sql(params).then((data) => {
Array.prototype.push.apply(allRecords, data.data);
scrollToken = data['scroll_token'];
console.log(`batch ${batch} success!`);
batch++;
// Runs search in batches with 6 second intervals
if (scrollToken) {
setTimeout(function() {
runBatch(params);
}, 6000);
}
}).catch((error) => {
console.log("Unable to continue scrolling");
console.log("done");
printResults();
});
}
function printResults() {
var skillsAgg = {};
var titlesAgg = {};
var schoolsAgg = {};
var otherCompaniesAgg = {};
var sortable;
for (let record in allRecords) {
for (let skill in allRecords[record]["skills"]) {
if (!skillsAgg[allRecords[record]["skills"][skill]]) {
skillsAgg[allRecords[record]["skills"][skill]] = 0;
}
skillsAgg[allRecords[record]["skills"][skill]]++;
}
if (allRecords[record]["job_title"]) {
if (!titlesAgg[allRecords[record]["job_title"]]) {
titlesAgg[allRecords[record]["job_title"]] = 0;
}
titlesAgg[allRecords[record]["job_title"]]++;
for (let edu in allRecords[record]["education"]) {
if (allRecords[record]["education"][edu]["school"] &&
allRecords[record]["education"][edu]["school"]["type"] == "post-secondary institution") {
if (!schoolsAgg[allRecords[record]["education"][edu]["school"]["name"]]) {
schoolsAgg[allRecords[record]["education"][edu]["school"]["name"]] = 0;
}
schoolsAgg[allRecords[record]["education"][edu]["school"]["name"]]++;
}
}
}
for (let exp in allRecords[record]["experience"]) {
if (allRecords[record]["experience"][exp]["company"] &&
allRecords[record]["experience"][exp]["company"]["name"] != "zenefits") {
if (!otherCompaniesAgg[allRecords[record]["experience"][exp]["company"]["name"]]) {
otherCompaniesAgg[allRecords[record]["experience"][exp]["company"]["name"]] = 0;
}
otherCompaniesAgg[allRecords[record]["experience"][exp]["company"]["name"]]++
}
}
}
console.log("top 10 skills for zenefits employees:");
sortAndPrint(skillsAgg);
console.log("top 10 titles for zenefits employees:");
sortAndPrint(titlesAgg);
console.log("top 10 universities for zenefits employees:");
sortAndPrint(schoolsAgg);
console.log("top 10 former companies for zenefits employees:");
sortAndPrint(otherCompaniesAgg);
}
function sortAndPrint(object) {
var sortable = [];
for (let field in object) {
sortable.push([field, object[field]]);
}
sortable.sort(function(a, b) {
return b[1] - a[1];
});
for (let i = 0; i < 10; i++) {
console.log(sortable[i][0]);
}
}
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
Peopledatalabs.api_key = 'YOUR API KEY'
PAGE_SIZE = 100
#runs search in batches with 6 second intervals
def get_all_pdl_records_es(query)
all_records = []
batch = 1
scroll_token = {}
while batch == 1 || !scroll_token.nil?
response = Peopledatalabs::Search.people(searchType: 'elastic', query: query, size: PAGE_SIZE, scroll_token: scroll_token, dataset: "all")
if response['status'] == 200
all_records += response['data']
scroll_token = response['scroll_token']
puts "batch #{batch} success!"
sleep(6)
batch += 1
else
puts "Unable to continue scrolling"
break
end
end
puts "done!"
return all_records
end
ES_QUERY = {
"query": {
"term": {"job_company_website": "zenefits.com"}
}
}
all_zenefits_employees = get_all_pdl_records_es(ES_QUERY)
skills_agg = {}
titles_agg = {}
schools_agg = {}
other_companies_agg = {}
all_zenefits_employees.each do |record|
record['skills'].each do |skill|
skills_agg[skill] = skills_agg.fetch(skill, 0)
skills_agg[skill] += 1
end
if record.key?('job_title')
titles_agg[record['job_title']] = titles_agg.fetch(record['job_title'], 0)
titles_agg[record['job_title']] += 1
record['education'].each do |edu|
if edu.key?('school') && !edu['school'].nil? && edu['school']['type'] == "post-secondary institution"
schools_agg[edu['school']['name']] = schools_agg.fetch(edu['school']['name'], 0)
schools_agg[edu['school']['name']] += 1
end
end
end
record['experience'].each do |exp|
if exp.key?('company') && !exp['company'].nil? && exp['company']['name'] != 'zenefits'
other_companies_agg[exp['company']['name']] = other_companies_agg.fetch(exp['company']['name'], 0)
other_companies_agg[exp['company']['name']] += 1
end
end
end
puts "top 10 skills for zenefits employees:"
skills_agg.sort_by(&:last).reverse.first(10).each { |key, value| puts "#{key} #{value}" }
puts "top 10 titles for zenefits employees:"
titles_agg.sort_by(&:last).reverse.first(10).each { |key, value| puts "#{key} #{value}" }
puts "top 10 universities for zenefits employees:"
schools_agg.sort_by(&:last).reverse.first(10).each { |key, value| puts "#{key} #{value}" }
puts "top 10 former companies for zenefits employees:"
other_companies_agg.sort_by(&:last).reverse.first(10).each { |key, value| puts "#{key} #{value}" }
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
Peopledatalabs.api_key = 'YOUR API KEY'
PAGE_SIZE = 100
#runs search in batches with 6 second intervals
def get_all_pdl_records_sql(query)
all_records = []
batch = 1
scroll_token = {}
while batch == 1 || !scroll_token.nil?
response = Peopledatalabs::Search.people(searchType: 'sql', query: query, size: PAGE_SIZE, scroll_token: scroll_token, dataset: "all")
if response['status'] == 200
all_records += response['data']
scroll_token = response['scroll_token']
puts "batch #{batch} success!"
sleep(6)
batch += 1
else
puts "Unable to continue scrolling"
break
end
end
puts "done!"
return all_records
end
SQL_QUERY = \
"""
SELECT * FROM person
WHERE job_company_website='zenefits.com';
"""
all_zenefits_employees = get_all_pdl_records_sql(SQL_QUERY)
skills_agg = {}
titles_agg = {}
schools_agg = {}
other_companies_agg = {}
all_zenefits_employees.each do |record|
record['skills'].each do |skill|
skills_agg[skill] = skills_agg.fetch(skill, 0)
skills_agg[skill] += 1
end
if record.key?('job_title')
titles_agg[record['job_title']] = titles_agg.fetch(record['job_title'], 0)
titles_agg[record['job_title']] += 1
record['education'].each do |edu|
if edu.key?('school') && !edu['school'].nil? && edu['school']['type'] == "post-secondary institution"
schools_agg[edu['school']['name']] = schools_agg.fetch(edu['school']['name'], 0)
schools_agg[edu['school']['name']] += 1
end
end
end
record['experience'].each do |exp|
if exp.key?('company') && !exp['company'].nil? && exp['company']['name'] != 'zenefits'
other_companies_agg[exp['company']['name']] = other_companies_agg.fetch(exp['company']['name'], 0)
other_companies_agg[exp['company']['name']] += 1
end
end
end
puts "top 10 skills for zenefits employees:"
skills_agg.sort_by(&:last).reverse.first(10).each { |key, value| puts "#{key} #{value}" }
puts "top 10 titles for zenefits employees:"
titles_agg.sort_by(&:last).reverse.first(10).each { |key, value| puts "#{key} #{value}" }
puts "top 10 universities for zenefits employees:"
schools_agg.sort_by(&:last).reverse.first(10).each { |key, value| puts "#{key} #{value}" }
puts "top 10 former companies for zenefits employees:"
other_companies_agg.sort_by(&:last).reverse.first(10).each { |key, value| puts "#{key} #{value}" }
package main
import (
"fmt"
"time"
"sort"
)
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
elasticSearchQuery := map[string]interface{} {
"query": map[string]interface{} {
"bool": map[string]interface{} {
"must": []map[string]interface{} {
{"term": map[string]interface{}{"job_company_website": "zenefits.com"}},
},
},
},
}
allZenefitsEmployees := getAllPdlRecordsEs(elasticSearchQuery)
skillsAgg := make(map[string]int)
titlesAgg := make(map[string]int)
schoolsAgg := make(map[string]int)
otherCompaniesAgg := make(map[string]int)
for _, record := range allZenefitsEmployees {
for _, skill := range record.Skills {
skillsAgg[skill]++
}
if record.JobTitle != "" {
titlesAgg[record.JobTitle]++
for _, edu := range record.Education {
if edu.School.Name != "" && edu.School.Type == "post-secondary institution" {
schoolsAgg[edu.School.Name]++
}
}
}
for _, exp := range record.Experience {
if exp.Company.Name != "zenefits" {
otherCompaniesAgg[exp.Company.Name]++
}
}
}
fmt.Println("top 10 skills for zenefits employees:")
for _, skill := range rank(skillsAgg) {
fmt.Println(skill, skillsAgg[skill])
}
fmt.Println("top 10 titles for zenefits employees:")
for _, title := range rank(titlesAgg) {
fmt.Println(title, titlesAgg[title])
}
fmt.Println("top 10 universities for zenefits employees:")
for _, school := range rank(schoolsAgg) {
fmt.Println(school, schoolsAgg[school])
}
fmt.Println("top 10 former companies for zenefits employees:")
for _, company := range rank(otherCompaniesAgg) {
fmt.Println(company, otherCompaniesAgg[company])
}
}
//runs search in batches with 6 second intervals
func getAllPdlRecordsEs(query interface{}) []pdlmodel.Person {
var allRecords []pdlmodel.Person
batch := 1
var scrollToken string
const pageSize = 100
apiKey := "YOUR API KEY"
// Set API KEY as env variable
// apiKey := os.Getenv("API_KEY")
client := pdl.New(apiKey)
p := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: pageSize,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
Query: query,
ScrollToken: scrollToken,
Dataset: "all",
},
}
for batch == 1 || scrollToken != "" {
response, err := client.Person.Search(p)
if err == nil {
allRecords = append(allRecords, response.Data...)
p.SearchBaseParams.ScrollToken, scrollToken = response.ScrollToken, response.ScrollToken
fmt.Printf("batch %d success!\n", batch)
time.Sleep(6 * time.Second)
batch++
} else {
fmt.Println("Unable to continue scrolling")
break
}
}
fmt.Println("done!")
return allRecords
}
//sorts map in reverse order
func rank(values map[string]int) []string {
type kv struct {
Key string
Value int
}
var ss []kv
for k, v := range values {
ss = append(ss, kv{k, v})
}
sort.Slice(ss, func(i, j int) bool {
return ss[i].Value > ss[j].Value
})
ranked := make([]string, len(values))
for i, kv := range ss {
ranked[i] = kv.Key
}
return ranked
}
package main
import (
"fmt"
"time"
"sort"
)
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
sqlQuery := "SELECT * FROM person" +
" WHERE job_company_website='zenefits.com'"
allZenefitsEmployees := getAllPdlRecordsEs(sqlQuery)
skillsAgg := make(map[string]int)
titlesAgg := make(map[string]int)
schoolsAgg := make(map[string]int)
otherCompaniesAgg := make(map[string]int)
for _, record := range allZenefitsEmployees {
for _, skill := range record.Skills {
skillsAgg[skill]++
}
if record.JobTitle != "" {
titlesAgg[record.JobTitle]++
for _, edu := range record.Education {
if edu.School.Name != "" && edu.School.Type == "post-secondary institution" {
schoolsAgg[edu.School.Name]++
}
}
}
for _, exp := range record.Experience {
if exp.Company.Name != "zenefits" {
otherCompaniesAgg[exp.Company.Name]++
}
}
}
fmt.Println("top 10 skills for zenefits employees:")
for _, skill := range rank(skillsAgg) {
fmt.Println(skill, skillsAgg[skill])
}
fmt.Println("top 10 titles for zenefits employees:")
for _, title := range rank(titlesAgg) {
fmt.Println(title, titlesAgg[title])
}
fmt.Println("top 10 universities for zenefits employees:")
for _, school := range rank(schoolsAgg) {
fmt.Println(school, schoolsAgg[school])
}
fmt.Println("top 10 former companies for zenefits employees:")
for _, company := range rank(otherCompaniesAgg) {
fmt.Println(company, otherCompaniesAgg[company])
}
}
//runs search in batches with 6 second intervals
func getAllPdlRecordsEs(query string) []pdlmodel.Person {
var allRecords []pdlmodel.Person
batch := 1
var scrollToken string
const pageSize = 100
apiKey := "YOUR API KEY"
// Set API KEY as env variable
// apiKey := os.Getenv("API_KEY")
client := pdl.New(apiKey)
p := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: pageSize,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
SQL: query,
ScrollToken: scrollToken,
Dataset: "all",
},
}
for batch == 1 || scrollToken != "" {
response, err := client.Person.Search(p)
if err == nil {
allRecords = append(allRecords, response.Data...)
p.SearchBaseParams.ScrollToken, scrollToken = response.ScrollToken, response.ScrollToken
fmt.Printf("batch %d success!\n", batch)
time.Sleep(6 * time.Second)
batch++
} else {
fmt.Println("Unable to continue scrolling")
break
}
}
fmt.Println("done!")
return allRecords
}
//sorts map in reverse order
func rank(values map[string]int) []string {
type kv struct {
Key string
Value int
}
var ss []kv
for k, v := range values {
ss = append(ss, kv{k, v})
}
sort.Slice(ss, func(i, j int) bool {
return ss[i].Value > ss[j].Value
})
ranked := make([]string, len(values))
for i, kv := range ss {
ranked[i] = kv.Key
}
return ranked
}
from time import sleep
import requests, json
API_KEY = #YOUR API KEY
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
def get_all_pdl_records_es(query):
#runs search in batches with 6 second intervals
PAGE_SIZE = 100
all_records = []
batch = 1
params = {
'query': json.dumps(query),
'size': PAGE_SIZE,
'dataset': "all"
}
while batch == 1 or params['scroll_token']:
response = requests.get(
PDL_URL,
headers=HEADERS,
params=params
).json()
if response['status'] == 200:
all_records.extend(response['data'])
params['scroll_token'] = response['scroll_token']
print(f"batch {batch} success!")
sleep(6)
batch += 1
else:
print("Unable to continue scrolling")
break
print("done!")
return all_records
if __name__ == '__main__':
ES_QUERY = {
"query": {
"term": {"job_company_website": "zenefits.com"}
}
}
all_zenefits_employees = get_all_pdl_records_es(ES_QUERY)
skills_agg = {}
titles_agg = {}
schools_agg = {}
other_companies_agg = {}
for record in all_zenefits_employees:
for skill in record['skills']:
skills_agg.setdefault(skill, 0)
skills_agg[skill] += 1
if record['job_title']:
titles_agg.setdefault(record['job_title'], 0)
titles_agg[record['job_title']] += 1
for edu in record['education']:
if edu['school'] and edu['school']['type'] == "post-secondary institution":
schools_agg.setdefault(edu['school']['name'], 0)
schools_agg[edu['school']['name']] += 1
for exp in record['experience']:
if exp['company'] and exp['company']['name'] != 'zenefits':
other_companies_agg.setdefault(exp['company']['name'], 0)
other_companies_agg[exp['company']['name']] += 1
print("top 10 skills for zenefits employees:")
for skill, count in sorted(
skills_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, skill)
print("top 10 titles for zenefits employees:")
for title, count in sorted(
titles_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, title)
print("top 10 universities for zenefits employees:")
for school, count in sorted(
schools_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, school)
print("top 10 former companies for zenefits employees:")
for company, count in sorted(
other_companies_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, company)
from time import sleep
import requests, json
API_KEY = #YOUR API KEY
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
def get_all_pdl_records_sql(query):
#runs search in batches with 6 second intervals
PAGE_SIZE = 100
all_records = []
batch = 1
params = {
'sql': query,
'size': PAGE_SIZE,
'dataset': "all"
}
while batch == 1 or params['scroll_token']:
response = requests.get(
PDL_URL,
headers=HEADERS,
params=params
).json()
if response['status'] == 200:
all_records.extend(response['data'])
params['scroll_token'] = response['scroll_token']
print(f"batch {batch} success!")
sleep(6)
batch += 1
else:
print("Unable to continue scrolling")
break
print("done!")
return all_records
SQL_QUERY = \
"""
SELECT * FROM person
WHERE job_company_website='zenefits.com';
"""
all_zenefits_employees = get_all_pdl_records_sql(SQL_QUERY)
skills_agg = {}
titles_agg = {}
schools_agg = {}
other_companies_agg = {}
for record in all_zenefits_employees:
for skill in record['skills']:
skills_agg.setdefault(skill, 0)
skills_agg[skill] += 1
if record['job_title']:
titles_agg.setdefault(record['job_title'], 0)
titles_agg[record['job_title']] += 1
for edu in record['education']:
if edu['school'] and edu['school']['type'] == "post-secondary institution":
schools_agg.setdefault(edu['school']['name'], 0)
schools_agg[edu['school']['name']] += 1
for exp in record['experience']:
if exp['company'] and exp['company']['name'] != 'zenefits':
other_companies_agg.setdefault(exp['company']['name'], 0)
other_companies_agg[exp['company']['name']] += 1
print("top 10 skills for zenefits employees:")
for skill, count in sorted(
skills_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, skill)
print("top 10 titles for zenefits employees:")
for title, count in sorted(
titles_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, title)
print("top 10 universities for zenefits employees:")
for school, count in sorted(
schools_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, school)
print("top 10 former companies for zenefits employees:")
for company, count in sorted(
other_companies_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, company)
Advanced Examples
Company Enrichment and Person Search
"I want to find X number of people at each company in my list."
import json
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Create a client, specifying an API key
client = PDLPY(
api_key="YOUR API KEY",
)
company_websites = [
"facebook.com",
"amazon.com",
"apple.com",
"netflix.com",
"google.com"
]
max_num_people = 100
# Enrich company then find people at that company:
for company_website in company_websites:
# Company Enrichment
query_string = { "website": company_website }
response = client.company.enrichment(**query_string).json()
if response['status'] == 200:
enriched_company = response
else:
enriched_company = {}
print(f"Company Enrichment Error for [{company_website}]: {response.text}")
# Person Search
company_employee_matches = {}
if enriched_company:
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"job_company_id": enriched_company["id"]}},
]
}
}
}
params = {
'query': ES_QUERY,
'size': max_num_people
}
response = client.person.search(**params).json()
if response['status'] == 200:
company_employee_matches = response['data']
else:
company_employee_matches = {}
print(f"Person Search Error for [{company_website}]: {response.text}")
print(f"Found {len(company_employee_matches)} employee profiles at {company_website}")
import json
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Create a client, specifying an API key
client = PDLPY(
api_key="YOUR API KEY",
)
company_websites = [
"facebook.com",
"amazon.com",
"apple.com",
"netflix.com",
"google.com"
]
max_num_people = 100
# Enrich company then find people at that company:
for company_website in company_websites:
# Company Enrichment
query_string = { "website": company_website }
response = client.company.enrichment(**query_string).json()
if response['status'] == 200:
enriched_company = response
else:
enriched_company = {}
print(f"Company Enrichment Error for [{company_website}]: {response.text}")
# Person Search
company_employee_matches = {}
if enriched_company:
SQL_QUERY = f"""
SELECT * FROM person
WHERE job_company_id = '{enriched_company['id']}'
"""
params = {
'sql': SQL_QUERY,
'size': max_num_people
}
response = client.person.search(**params).json()
if response['status'] == 200:
company_employee_matches = response['data']
else:
company_employee_matches = {}
print(f"Person Search Error for [{company_website}]: {response.text}")
print(f"Found {len(company_employee_matches)} employee profiles at {company_website}")
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
const companyWebsites = [
"facebook.com",
"amazon.com",
"apple.com",
"netflix.com",
"google.com"
];
const maxMumPeople = 100;
// Enrich company then find people at that company:
for (let companyWebsite = 0; companyWebsite < companyWebsites.length; companyWebsite++) {
// Company Enrichment
let queryString = { "website": companyWebsites[companyWebsite] };
let enrichedCompany = {};
let companyEmployeeMatches = {};
PDLJSClient.company.enrichment(queryString).then((enrichedCompany) => {
// Person Search
let esQuery = {
query: {
bool: {
must:[
{term: {job_company_id: enrichedCompany.id}},
]
}
}
}
let params = {
searchQuery: esQuery,
size: maxMumPeople
}
PDLJSClient.person.search.elastic(params).then((data) => {
companyEmployeeMatches = data.data;
console.log(`Found ${companyEmployeeMatches.length}` +
` employee profiles at ${companyWebsites[companyWebsite]}`);
}).catch((error) => {
console.log(`Person Seach Error for ${companyWebsites[companyWebsite]}` +
`: ${error}`);
});
}).catch((error) => {
console.log(`Company Enrichment Error for ${companyWebsites[companyWebsite]}` +
`: ${error}`);
});
}
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
const companyWebsites = [
"facebook.com",
"amazon.com",
"apple.com",
"netflix.com",
"google.com"
];
const maxMumPeople = 100;
// Enrich company then find people at that company:
for (let companyWebsite = 0; companyWebsite < companyWebsites.length; companyWebsite++) {
// Company Enrichment
let queryString = { "website": companyWebsites[companyWebsite] };
let enrichedCompany = {};
let companyEmployeeMatches = {};
PDLJSClient.company.enrichment(queryString).then((enrichedCompany) => {
// Person Search
let sqlQuery = `SELECT * FROM person
WHERE job_company_id = '${enrichedCompany.id}';`;
let params = {
searchQuery: sqlQuery,
size: maxMumPeople
}
PDLJSClient.person.search.sql(params).then((data) => {
companyEmployeeMatches = data.data;
console.log(`Found ${companyEmployeeMatches.length}` +
` employee profiles at ${companyWebsites[companyWebsite]}`);
}).catch((error) => {
console.log(`Person Seach Error for ${companyWebsites[companyWebsite]}` +
`: ${error}`);
});
}).catch((error) => {
console.log(`Company Enrichment Error for ${companyWebsites[companyWebsite]}` +
`: ${error}`);
});
}
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
Peopledatalabs.api_key = 'YOUR API KEY'
company_websites = [
"facebook.com",
"amazon.com",
"apple.com",
"netflix.com",
"google.com"
]
max_num_people = 100
# Enrich company then find people at that company:
company_websites.each do |company_website|
# Company Enrichment
query_string = { "website": company_website }
response = Peopledatalabs::Enrichment.company(params: query_string)
if response['status'] == 200
enriched_company = response
else
enriched_company = {}
puts "Company Enrichment Error for [#{company_website}]: #{response}"
end
# Person Search
company_employee_matches = {}
if !enriched_company.nil?
es_query = {
"query": {
"bool": {
"must": [
{"term": {"job_company_id": enriched_company["id"]}},
]
}
}
}
response = Peopledatalabs::Search.people(searchType: 'elastic', query: es_query, size: max_num_people)
if response['status'] == 200
company_employee_matches = response['data']
else
company_employee_matches = {}
puts "Person Search Error for [#{company_website}]: #{response}"
end
end
puts "Found #{company_employee_matches.length()} employee profiles at #{company_website}"
end
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
Peopledatalabs.api_key = 'YOUR API KEY'
company_websites = [
"facebook.com",
"amazon.com",
"apple.com",
"netflix.com",
"google.com"
]
max_num_people = 100
# Enrich company then find people at that company:
company_websites.each do |company_website|
# Company Enrichment
query_string = { "website": company_website }
response = Peopledatalabs::Enrichment.company(params: query_string)
if response['status'] == 200
enriched_company = response
else
enriched_company = {}
puts "Company Enrichment Error for [#{company_website}]: #{response}"
end
# Person Search
company_employee_matches = {}
if !enriched_company.nil?
sql_query = """
SELECT * FROM person
WHERE job_company_id = '#{enriched_company['id']}'
"""
response = Peopledatalabs::Search.people(searchType: 'sql', query: sql_query, size: max_num_people)
if response['status'] == 200
company_employee_matches = response['data']
else
company_employee_matches = {}
puts "Person Search Error for [#{company_website}]: #{response}"
end
end
puts "Found #{company_employee_matches.length()} employee profiles at #{company_website}"
end
package main
import (
"fmt"
"reflect"
)
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
apiKey := "YOUR API KEY"
// Set API KEY as env variable
// apiKey := os.Getenv("API_KEY")
client := pdl.New(apiKey)
companyWebsites :=
[]string{"facebook.com","amazon.com","apple.com","netflix.com","google.com"}
const maxNumPeople = 100
// Enrich company then find people at that company:
for _, companyWebsite := range companyWebsites {
var enrichedCompany pdlmodel.EnrichCompanyResponse
// Company Enrichment
queryString := pdlmodel.CompanyParams{Website: companyWebsite}
paramsCompany := pdlmodel.EnrichCompanyParams {
CompanyParams: queryString,
}
responseCompany, errCompany := client.Company.Enrich(paramsCompany)
if errCompany == nil {
enrichedCompany = responseCompany
} else {
fmt.Printf("Company Enrichment Error for [%s]: %s\n", companyWebsite, errCompany)
}
// Person Search
var companyEmployeeMatches []pdlmodel.Person
if !reflect.DeepEqual(enrichedCompany, pdlmodel.EnrichCompanyResponse{}) {
elasticSearchQuery := map[string]interface{} {
"query": map[string]interface{} {
"bool": map[string]interface{} {
"must": []map[string]interface{} {
{"term": map[string]interface{}{"job_company_id": enrichedCompany.Id}},
},
},
},
}
paramsPerson := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: maxNumPeople,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
Query: elasticSearchQuery,
},
}
responsePerson, errPerson := client.Person.Search(paramsPerson)
if errPerson == nil {
companyEmployeeMatches = responsePerson.Data
} else {
fmt.Printf("Person Search Error for [%s]: %s\n", companyWebsite, errPerson)
}
}
fmt.Printf("Found %d employee profiles at %s\n", len(companyEmployeeMatches), companyWebsite)
}
}
package main
import (
"fmt"
"reflect"
)
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
apiKey := "YOUR API KEY"
// Set API KEY as env variable
// apiKey := os.Getenv("API_KEY")
client := pdl.New(apiKey)
companyWebsites :=
[]string{"facebook.com","amazon.com","apple.com","netflix.com","google.com"}
const maxNumPeople = 100
// Enrich company then find people at that company:
for _, companyWebsite := range companyWebsites {
var enrichedCompany pdlmodel.EnrichCompanyResponse
// Company Enrichment
queryString := pdlmodel.CompanyParams{Website: companyWebsite}
paramsCompany := pdlmodel.EnrichCompanyParams {
CompanyParams: queryString,
}
responseCompany, errCompany := client.Company.Enrich(paramsCompany)
if errCompany == nil {
enrichedCompany = responseCompany
} else {
fmt.Printf("Company Enrichment Error for [%s]: %s\n", companyWebsite, errCompany)
}
// Person Search
var companyEmployeeMatches []pdlmodel.Person
if !reflect.DeepEqual(enrichedCompany, pdlmodel.EnrichCompanyResponse{}) {
sqlQuery := "SELECT * FROM person" +
" WHERE job_company_id = '" + enrichedCompany.Id + "'"
paramsPerson := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: maxNumPeople,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
SQL: sqlQuery,
},
}
responsePerson, errPerson := client.Person.Search(paramsPerson)
if errPerson == nil {
companyEmployeeMatches = responsePerson.Data
} else {
fmt.Printf("Person Search Error for [%s]: %s\n", companyWebsite, errPerson)
}
}
fmt.Printf("Found %d employee profiles at %s\n", len(companyEmployeeMatches), companyWebsite)
}
}
import json
import requests
PDL_COMPANY_ENRICH_URL = "https://api.peopledatalabs.com/v5/company/enrich"
PDL_PERSON_SEARCH_URL = "https://api.peopledatalabs.com/v5/person/search"
API_KEY = "####" # Enter your api key here
company_websites = [
"facebook.com",
"amazon.com",
"apple.com",
"netflix.com",
"google.com"
]
max_num_people = 100
# Enrich company then find people at that company:
for company_website in company_websites:
# Company Enrichment
query_string = { "website": company_website }
headers = {
'accept': "application/json",
'content-type': "application/json",
'x-api-key': API_KEY
}
response = requests.request("GET", PDL_COMPANY_ENRICH_URL, headers=headers, params=query_string)
if response.status_code == 200:
enriched_company = response.json()
else:
enriched_company = {}
print(f"Company Enrichment Error for [{company_website}]: {response.text}")
# Person Search
company_employee_matches = {}
if enriched_company:
headers = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"job_company_id": enriched_company["id"]}},
]
}
}
}
params = {
'query': json.dumps(ES_QUERY),
'size': max_num_people
}
response = requests.get( PDL_PERSON_SEARCH_URL, headers=headers, params=params)
if response.status_code == 200:
company_employee_matches = response.json()['data']
else:
company_employee_matches = {}
print(f"Person Search Error for [{company_website}]: {response.text}")
print(f"Found {len(company_employee_matches)} employee profiles at {company_website}")
import json
import requests
PDL_COMPANY_ENRICH_URL = "https://api.peopledatalabs.com/v5/company/enrich"
PDL_PERSON_SEARCH_URL = "https://api.peopledatalabs.com/v5/person/search"
API_KEY = "####" # Enter your api key here
company_websites = [
"facebook.com",
"amazon.com",
"apple.com",
"netflix.com",
"google.com"
]
max_num_people = 100
# Enrich company then find people at that company:
for company_website in company_websites:
# Company Enrichment
query_string = { "website": company_website }
headers = {
'accept': "application/json",
'content-type': "application/json",
'x-api-key': API_KEY
}
response = requests.request("GET", PDL_COMPANY_ENRICH_URL, headers=headers, params=query_string)
if response.status_code == 200:
enriched_company = response.json()
else:
enriched_company = {}
print(f"Company Enrichment Error for [{company_website}]: {response.text}")
# Person Search
company_employee_matches = {}
if enriched_company:
headers = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
SQL_QUERY = f"""
SELECT * FROM person
WHERE job_company_id = '{enriched_company['id']}'
"""
params = {
'sql': SQL_QUERY,
'size': max_num_people
}
response = requests.get( PDL_PERSON_SEARCH_URL, headers=headers, params=params)
if response.status_code == 200:
company_employee_matches = response.json()['data']
else:
company_employee_matches = {}
print(f"Person Search Error for [{company_website}]: {response.text}")
print(f"Found {len(company_employee_matches)} employee profiles at {company_website}")
Updated about 24 hours ago