Examples - Person Search API
Examples
We've provided code samples in Python, cURL, Ruby, Go and JavaScript. If you aren't comfortable working in any of these languages, feel free to use this handy tool to convert code from cURL to the language of your choice.
Heads Up! Credit Usage
Person Search API calls cost the number of total search results returned.
If you are making a search that could have a large number of results, make sure to use the
size
parameter to set the maximum number of results and cap your credit usage.
We want your feedback!
Do you see a bug? Is there an example you'd like to see that's not listed here?
Head over to the public roadmap and submit a bug ticket or a feature request and receive automatic notifications as your bug is resolved or your request is implemented.
Basic Usage
"I want to make a query and save the results to a file."
import json
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Create a client, specifying your API key
CLIENT = PDLPY(
api_key="YOUR API KEY",
)
# Create an Elasticsearch query
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"location_country": "mexico"}},
{"term": {"job_title_role": "health"}},
{"exists": {"field": "phone_numbers"}}
]
}
}
}
# Create a parameters JSON object
PARAMS = {
'query': ES_QUERY,
'size': 10,
'pretty': True
}
# Pass the parameters object to the Person Search API
response = CLIENT.person.search(**PARAMS).json()
# Check for successful response
if response["status"] == 200:
data = response['data']
# Write out each profile found to file
with open("my_pdl_search.jsonl", "w") as out:
for record in data:
out.write(json.dumps(record) + "\n")
print(f"Successfully grabbed {len(data)} records from PDL.")
print(f"{response['total']} total PDL records exist matching this query.")
else:
print("NOTE: The carrier pigeons lost motivation in flight. See error and try again.")
print("Error:", response)
import json
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Create a client, specifying your API key
CLIENT = PDLPY(
api_key="YOUR API KEY",
)
# Create an SQL query
SQL_QUERY = \
"""
SELECT * FROM person
WHERE location_country='mexico'
AND job_title_role='health'
AND phone_numbers IS NOT NULL;
"""
# Create a parameters JSON object
PARAMS = {
'sql': SQL_QUERY,
'size': 10,
'pretty': True
}
# Pass the parameters object to the Person Search API
response = CLIENT.person.search(**PARAMS).json()
# Check for successful response
if response["status"] == 200:
data = response['data']
# Write out each profile found to file
with open("my_pdl_search.jsonl", "w") as out:
for record in data:
out.write(json.dumps(record) + "\n")
print(f"Successfully grabbed {len(data)} records from PDL.")
print(f"{response['total']} total PDL records exist matching this query.")
else:
print("NOTE: The carrier pigeons lost motivation in flight. See error and try again.")
print("Error:", response)
# Elasticsearch
curl -X GET 'https://api.peopledatalabs.com/v5/person/search' \
-H 'X-Api-Key: xxxx' \
--data-raw '{
"size": 10,
"query": {
"bool": {
"must": [
{"term": {"location_country": "mexico"}},
{"term": {"job_title_role": "health"}},
{"exists": {"field": "phone_numbers"}}
]
}
}
}'
# SQL
curl -X GET \
'https://api.peopledatalabs.com/v5/person/search' \
-H 'X-Api-Key: xxxx' \
--data-raw '{
"size": 10,
"sql": "SELECT * FROM person WHERE location_country='\''mexico'\'' AND job_title_role='\''health'\'' AND phone_numbers IS NOT NULL;"
}'
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
import fs from 'fs';
// Create a client, specifying your API key
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
// Create an Elasticsearch query
const esQuery = {
query: {
bool: {
must:[
{term: {location_country: "mexico"}},
{term: {job_title_role: "health"}},
{exists: {field: "phone_numbers"}}
]
}
}
}
// Create a parameters JSON object
const params = {
searchQuery: esQuery,
size: 10,
pretty: true
}
// Pass the parameters object to the Person Search API
PDLJSClient.person.search.elastic(params).then((data) => {
// Write out all profiles found to file
fs.writeFile("my_pdl_search.jsonl", Buffer.from(JSON.stringify(data.data)), (err) => {
if (err) throw err;
});
console.log(`Successfully grabbed ${data.data.length} records from PDL.`);
console.log(`${data["total"]} total PDL records exist matching this query.`)
}).catch((error) => {
console.log("NOTE: The carrier pigeons lost motivation in flight. See error and try again.")
console.log(error);
});
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
import fs from 'fs';
// Create a client, specifying your API key
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
// Create an SQL query
const sqlQuery = `SELECT * FROM person
WHERE location_country='mexico'
AND job_title_role='health'
AND phone_numbers IS NOT NULL;`
// Create a parameters JSON object
const params = {
searchQuery: sqlQuery,
size: 10,
pretty: true
}
// Pass the parameters object to the Person Search API
PDLJSClient.person.search.sql(params).then((data) => {
// Write out all profiles found to file
fs.writeFile("my_pdl_search.jsonl", Buffer.from(JSON.stringify(data.data)), (err) => {
if (err) throw err;
});
console.log(`Successfully grabbed ${data.data.length} records from PDL.`);
console.log(`${data["total"]} total PDL records exist matching this query.`)
}).catch((error) => {
console.log("NOTE: The carrier pigeons lost motivation in flight. See error and try again.")
console.log(error);
});
require 'json'
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
# Set your API key
Peopledatalabs.api_key = 'YOUR API KEY'
# Create an Elasticsearch query
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"location_country": "mexico"}},
{"term": {"job_title_role": "health"}},
{"exists": {"field": "phone_numbers"}}
]
}
}
}
# Pass parameters to the Person Search API
response = Peopledatalabs::Search.person(searchType: 'elastic', query: ES_QUERY, size: 10, pretty: true)
# Check for successful response
if response['status'] == 200
data = response['data']
# Write out each profile found to file
File.open("my_pdl_search.jsonl", "w") do |out|
data.each { |record| out.write(JSON.dump(record) + "\n") }
end
puts "Successfully grabbed #{data.length()} records from PDL."
puts "#{response['total']} total PDL records exist matching this query."
else
puts "NOTE: The carrier pigeons lost motivation in flight. See error and try again."
puts "Error: #{response}"
end
require 'json'
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
# Set your API key
Peopledatalabs.api_key = 'YOUR API KEY'
# Create an SQL query
SQL_QUERY = \
"""
SELECT * FROM person
WHERE location_country='mexico'
AND job_title_role='health'
AND phone_numbers IS NOT NULL;
"""
# Pass parameters to the Person Search API
response = Peopledatalabs::Search.person(searchType: 'sql', query: SQL_QUERY, size: 10, pretty: true)
# Check for successful response
if response['status'] == 200
data = response['data']
# Write out each profile found to file
File.open("my_pdl_search.jsonl", "w") do |out|
data.each { |record| out.write(JSON.dump(record) + "\n") }
end
puts "Successfully grabbed #{data.length()} records from PDL."
puts "#{response['total']} total PDL records exist matching this query."
else
puts "NOTE: The carrier pigeons lost motivation in flight. See error and try again."
puts "Error: #{response}"
end
package main
import (
"fmt"
"os"
"encoding/json"
"context"
)
// See https://github.com/peopledatalabs/peopledatalabs-go
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
// Set your API key
apiKey := "YOUR API KEY"
// Set API key as environmental variable
// apiKey := os.Getenv("API_KEY")
// Create a client, specifying your API key
client := pdl.New(apiKey)
// Create an Elasticsearch query
elasticSearchQuery := map[string]interface{} {
"query": map[string]interface{} {
"bool": map[string]interface{} {
"must": []map[string]interface{} {
{"term": map[string]interface{}{"location_country": "mexico"}},
{"term": map[string]interface{}{"job_title_role": "health"}},
{"exists": map[string]interface{}{"field": "phone_numbers"}},
},
},
},
}
// Create a parameters JSON object
params := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: 10,
Pretty: true,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
Query: elasticSearchQuery,
},
}
// Pass the parameters object to the Person Search API
response, err := client.Person.Search(context.Background(), params)
// Check for successful response
if err == nil {
data := response.Data
// Create file
out, outErr := os.Create("my_pdl_search.jsonl")
defer out.Close()
if (outErr == nil) {
for i := range data {
// Convert each profile found to JSON
record, jsonErr := json.Marshal(data[i])
// Write out each profile to file
if (jsonErr == nil) {
out.WriteString(string(record) + "\n")
}
}
out.Sync()
}
fmt.Printf("Successfully grabbed %d records from PDL.\n", len(data))
fmt.Printf("%d total PDL records exist matching this query.\n", response.Total)
} else {
fmt.Println("NOTE: The carrier pigeons lost motivation in flight. See error and try again.")
fmt.Println("Error:", err)
}
}
package main
import (
"fmt"
"os"
"encoding/json"
"context"
)
// See https://github.com/peopledatalabs/peopledatalabs-go
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
// Set your API key
apiKey := "YOUR API KEY"
// Set API key as environmental variable
// apiKey := os.Getenv("API_KEY")
// Create a client, specifying your API key
client := pdl.New(apiKey)
// Create an SQL query
sqlQuery := "SELECT * FROM person" +
" WHERE location_country='mexico'" +
" AND job_title_role='health'" +
" AND phone_numbers IS NOT NULL;"
// Create a parameters JSON object
params := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: 10,
Pretty: true,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
SQL: sqlQuery,
},
}
// Pass the parameters object to the Person Search API
response, err := client.Person.Search(context.Background(), params)
// Check for successful response
if err == nil {
data := response.Data
// Create file
out, outErr := os.Create("my_pdl_search.jsonl")
defer out.Close()
if (outErr == nil) {
for i := range data {
// Convert each profile found to JSON
record, jsonErr := json.Marshal(data[i])
// Write out each profile to file
if (jsonErr == nil) {
out.WriteString(string(record) + "\n")
}
}
out.Sync()
}
fmt.Printf("Successfully grabbed %d records from PDL.\n", len(data))
fmt.Printf("%d total PDL records exist matching this query.\n", response.Total)
} else {
fmt.Println("NOTE: The carrier pigeons lost motivation in flight. See error and try again.")
fmt.Println("Error:", err)
}
}
import requests, json
# Set your API key
API_KEY = "YOUR API KEY"
# Set the Person Search API URL
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
# Set headers
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
# Create an Elasticsearch query
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"location_country": "mexico"}},
{"term": {"job_title_role": "health"}},
{"exists": {"field": "phone_numbers"}}
]
}
}
}
# Create a parameters JSON object
PARAMS = {
'query': json.dumps(ES_QUERY),
'size': 10,
'pretty': True
}
# Pass the parameters object to the Person Search API
response = requests.get(
PDL_URL,
headers=HEADERS,
params=PARAMS
).json()
# Check for successful response
if response["status"] == 200:
data = response['data']
# Write out each profile found to file
with open("my_pdl_search.jsonl", "w") as out:
for record in data:
out.write(json.dumps(record) + "\n")
print(f"Successfully grabbed {len(data)} records from PDL.")
print(f"{response['total']} total PDL records exist matching this query.")
else:
print("NOTE: The carrier pigeons lost motivation in flight. See error and try again.")
print("Error:", response)
import requests, json
# Set your API key
API_KEY = "YOUR API KEY"
# Set the Person Search API URL
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
# Set headers
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
# Create an SQL query
SQL_QUERY = \
"""
SELECT * FROM person
WHERE location_country='mexico'
AND job_title_role='health'
AND phone_numbers IS NOT NULL;
"""
# Create a parameters JSON object
PARAMS = {
'sql': SQL_QUERY,
'size': 10,
'pretty': True
}
# Pass the parameters object to the Person Search API
response = requests.get(
PDL_URL,
headers=HEADERS,
params=PARAMS
).json()
# Check for successful response
if response["status"] == 200:
data = response['data']
# Write out each profile found to file
with open("my_pdl_search.jsonl", "w") as out:
for record in data:
out.write(json.dumps(record) + "\n")
print(f"Successfully grabbed {len(data)} records from PDL.")
print(f"{response['total']} total PDL records exist matching this query.")
else:
print("NOTE: The carrier pigeons lost motivation in flight. See error and try again.")
print("error:", response)
Using POST Requests
"I want to use POST requests instead of GET requests so that I can make queries with a lot of parameters."
Difference Between GET and POST Requests
See this article for a comparison of the differences between GET and POST requests. The biggest difference is that POST requests don't have any limits on the amount of data that you can pass in the request.
import requests, json
# Set your API key
API_KEY = "YOUR API KEY"
# Set the Person Search API URL
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
# Set headers
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
# Create an Elasticsearch query
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"location_country": "mexico"}},
{"term": {"job_title_role": "health"}},
{"exists": {"field": "phone_numbers"}}
]
}
}
}
# Create a parameters JSON object
PARAMS = {
'query': ES_QUERY, # This is a different syntax than when using GET requests
'size': 10,
'pretty': True
}
# Pass the parameters object to the Person Search API using POST method
response = requests.post(
PDL_URL,
headers=HEADERS,
json=PARAMS # Passing the data directly as a JSON object
# data=json.dumps(PARAMS) # This is an alternative way of passing data using a string
).json()
# Check for successful response
if response["status"] == 200:
data = response['data']
# Write out each profile found to file
with open("my_pdl_search.jsonl", "w") as out:
for record in data:
out.write(json.dumps(record) + "\n")
print(f"Successfully grabbed {len(data)} records from PDL.")
print(f"{response['total']} total PDL records exist matching this query.")
else:
print("NOTE: The carrier pigeons lost motivation in flight. See error and try again.")
print("Error:", response)
import requests, json
# Set your API key
API_KEY = "YOUR API KEY"
# Set the Person Search API URL
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
# Set headers
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
# Create an SQL query
SQL_QUERY = \
"""
SELECT * FROM person
WHERE location_country='mexico'
AND job_title_role='health'
AND phone_numbers IS NOT NULL;
"""
# Create a parameters JSON object
PARAMS = {
'sql': SQL_QUERY,
'size': 10,
'pretty': True
}
# Pass the parameters object to the Person Search API using POST method
response = requests.post(
PDL_URL,
headers=HEADERS,
json=PARAMS # Pass the data directly as a JSON object
# data=json.dumps(PARAMS) # This is an alternative way of passing data using a string
).json()
# Check for successful response
if response["status"] == 200:
data = response['data']
# Write out each profile found to file
with open("my_pdl_search.jsonl", "w") as out:
for record in data:
out.write(json.dumps(record) + "\n")
print(f"Successfully grabbed {len(data)} records from PDL.")
print(f"{response['total']} total PDL records exist matching this query.")
else:
print("NOTE: The carrier pigeons lost motivation in flight. See error and try again.")
print("Error:", response)
# Elasticsearch
curl -X POST \
'https://api.peopledatalabs.com/v5/person/search' \
-H 'X-Api-Key: your-api-key' \
-H 'Content-Type: application/json' \
-d '{
"size": 10,
"query": {
"bool": {
"must": [
{"term": {"location_country": "mexico"}},
{"term": {"job_title_role": "health"}},
{"exists": {"field": "phone_numbers"}}
]
}
}
}'
# SQL
curl -X POST \
'https://api.peopledatalabs.com/v5/person/search' \
-H 'X-Api-Key: your-api-key' \
-H 'Content-Type: application/json' \
-d '{
"size": 10,
"sql": "SELECT * FROM person WHERE location_country='\''mexico'\'' AND job_title_role='\''health'\'' AND phone_numbers IS NOT NULL;"
}'
Searching Specific Datasets
"I want to run a simple query against PDL's Phone Dataset."
Maintaining Backwards Compatibility
We introduced the
dataset
parameter with the July 2021 release, in which we also changed the default dataset fromall
toresume
. For users that want to maintain the same performance in their queries as prior to this change, set thedataset
parameter toall
in the example below.
import json
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Create a client, specifying your API key
CLIENT = PDLPY(
api_key="YOUR API KEY",
)
# Create an Elasticsearch query
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"location_country": "mexico"}},
{"term": {"job_title_role": "health"}}
]
}
}
}
# Create a parameters JSON object
PARAMS = {
'query': ES_QUERY,
'size': 10,
'pretty': True,
'dataset': "phone" # Search for records with a phone number
}
# Pass the parameters object to the Person Search API
response = CLIENT.person.search(**PARAMS).json()
# Check for successful response
if response["status"] == 200:
data = response['data']
# Write out each profile found to file
with open("my_pdl_search.jsonl", "w") as out:
for record in data:
out.write(json.dumps(record) + "\n")
print(f"Successfully grabbed {len(data)} records from PDL.")
print(f"{response['total']} total PDL records exist matching this query.")
else:
print("NOTE: The carrier pigeons lost motivation in flight. See error and try again.")
print("Error:", response)
import json
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Create a client, specifying your API key
CLIENT = PDLPY(
api_key="YOUR API KEY",
)
# Create an SQL query
SQL_QUERY = \
"""
SELECT * FROM person
WHERE location_country='mexico'
AND job_title_role='health'
AND phone_numbers IS NOT NULL;
"""
# Create a parameters JSON object
PARAMS = {
'sql': SQL_QUERY,
'size': 10,
'pretty': True,
'dataset': "phone" # Search for records with a phone number
}
# Pass the parameters object to the Person Search API
response = CLIENT.person.search(**PARAMS).json()
# Check for successful response
if response["status"] == 200:
data = response['data']
# Write out each profile found to file
with open("my_pdl_search.jsonl", "w") as out:
for record in data:
out.write(json.dumps(record) + "\n")
print(f"Successfully grabbed {len(data)} records from PDL.")
print(f"{response['total']} total PDL records exist matching this query.")
else:
print("NOTE: The carrier pigeons lost motivation in flight. See error and try again.")
print("Error:", response)
# Elasticsearch
curl -X GET 'https://api.peopledatalabs.com/v5/person/search' \
-H 'X-Api-Key: xxxx' \
--data-raw '{
"size": 10,
"dataset": "all",
"query": {
"bool": {
"must": [
{"term": {"location_country": "mexico"}},
{"term": {"job_title_role": "health"}},
]
}
}
}'
# SQL
curl -X GET \
'https://api.peopledatalabs.com/v5/person/search' \
-H 'X-Api-Key: xxxx' \
--data-raw '{
"size": 10,
"dataset: "phone",
"sql": "SELECT * FROM person WHERE location_country='\''mexico'\'' AND job_title_role='\''health'\'' AND phone_numbers IS NOT NULL;"
}'
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
import fs from 'fs';
// Create a client, specifying your API key
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
// Create an Elasticsearch query
const esQuery = {
query: {
bool: {
must:[
{term: {location_country: "mexico"}},
{term: {job_title_role: "health"}},
]
}
}
}
// Create a parameters JSON object
const params = {
searchQuery: esQuery,
size: 10,
pretty: true,
dataset: "phone" // Search for records with a phone number
}
// Pass the parameters object to the Person Search API
PDLJSClient.person.search.elastic(params).then((data) => {
// Write out all profiles found to file
fs.writeFile("my_pdl_search.jsonl", Buffer.from(JSON.stringify(data.data)), (err) => {
if (err) throw err;
});
console.log(`Successfully grabbed ${data.data.length} records from PDL.`);
console.log(`${data["total"]} total PDL records exist matching this query.`)
}).catch((error) => {
console.log("NOTE: The carrier pigeons lost motivation in flight. See error and try again.")
console.log(error);
});
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
import fs from 'fs';
// Create a client, specifying your API key
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
// Create an SQL query
const sqlQuery = `SELECT * FROM person
WHERE location_country='mexico'
AND job_title_role='health';`
// Create an Elasticsearch query
const params = {
searchQuery: sqlQuery,
size: 10,
pretty: true,
dataset: "phone" // Search for records with a phone number
}
// Pass the parameters object to the Person Search API
PDLJSClient.person.search.sql(params).then((data) => {
// Write out all profiles found to file
fs.writeFile("my_pdl_search.jsonl", Buffer.from(JSON.stringify(data.data)), (err) => {
if (err) throw err;
});
console.log(`Successfully grabbed ${data.data.length} records from PDL.`);
console.log(`${data["total"]} total PDL records exist matching this query.`)
}).catch((error) => {
console.log("NOTE: The carrier pigeons lost motivation in flight. See error and try again.")
console.log(error);
});
require 'json'
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
# Set your API key
Peopledatalabs.api_key = 'YOUR API KEY'
# Create an Elasticsearch query
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"location_country": "mexico"}},
{"term": {"job_title_role": "health"}}
]
}
}
}
# Pass parameters to the Person Search API
response = Peopledatalabs::Search.people(searchType: 'elastic', query: ES_QUERY, size: 10, pretty: true, 'dataset': 'phone')
# Check for successful response
if response['status'] == 200
data = response['data']
# Write out each profile found to file
File.open("my_pdl_search.jsonl", "w") do |out|
data.each { |record| out.write(JSON.dump(record) + "\n") }
end
puts "Successfully grabbed #{data.length()} records from PDL."
puts "#{response['total']} total PDL records exist matching this query."
else
puts "NOTE: The carrier pigeons lost motivation in flight. See error and try again."
puts "Error: #{response}"
end
require 'json'
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
# Set your API key
Peopledatalabs.api_key = 'YOUR API KEY'
# Create an SQL query
SQL_QUERY = \
"""
SELECT * FROM person
WHERE location_country='mexico'
AND job_title_role='health'
AND phone_numbers IS NOT NULL;
"""
# Pass parameters to the Person Search API
response = Peopledatalabs::Search.people(searchType: 'sql', query: SQL_QUERY, size: 10, pretty: true, 'dataset': 'phone')
# Check for successful response
if response['status'] == 200
data = response['data']
# Write out each profile found to file
File.open("my_pdl_search.jsonl", "w") do |out|
data.each { |record| out.write(JSON.dump(record) + "\n") }
end
puts "Successfully grabbed #{data.length()} records from PDL."
puts "#{response['total']} total PDL records exist matching this query."
else
puts "NOTE: The carrier pigeons lost motivation in flight. See error and try again."
puts "Error: #{response}"
end
package main
import (
"fmt"
"os"
"encoding/json"
"context"
)
// See https://github.com/peopledatalabs/peopledatalabs-go
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
// Set your API key
apiKey := "YOUR API KEY"
// Set API key as environmental variable
// apiKey := os.Getenv("API_KEY")
// Create a client, specifying your API key
client := pdl.New(apiKey)
// Create an Elasticsearch query
elasticSearchQuery := map[string]interface{} {
"query": map[string]interface{} {
"bool": map[string]interface{} {
"must": []map[string]interface{} {
{"term": map[string]interface{}{"location_country": "mexico"}},
{"term": map[string]interface{}{"job_title_role": "health"}},
},
},
},
}
// Create a parameters JSON object
params := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: 10,
Pretty: true,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
Query: elasticSearchQuery,
Dataset: "phone", // Search for records with a phone number
},
}
// Pass the parameters object to the Person Search API
response, err := client.Person.Search(context.Background(), params)
// Check for successful response
if err == nil {
data := response.Data
// Create file
out, outErr := os.Create("my_pdl_search.jsonl")
defer out.Close()
if (outErr == nil) {
for i := range data {
// Convert each profile found to JSON
record, jsonErr := json.Marshal(data[i])
// Write out each profile to file
if (jsonErr == nil) {
out.WriteString(string(record) + "\n")
}
}
out.Sync()
}
fmt.Printf("Successfully grabbed %d records from PDL.\n", len(data))
fmt.Printf("%d total PDL records exist matching this query.\n", response.Total)
} else {
fmt.Println("NOTE: The carrier pigeons lost motivation in flight. See error and try again.")
fmt.Println("Error:", err)
}
}
package main
import (
"fmt"
"os"
"encoding/json"
"context"
)
// See https://github.com/peopledatalabs/peopledatalabs-go
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
// Set your API key
apiKey := "YOUR API KEY"
// Set API key as environmental variable
// apiKey := os.Getenv("API_KEY")
// Create a client, specifying your API key
client := pdl.New(apiKey)
// Create an SQL query
sqlQuery := "SELECT * FROM person" +
" WHERE location_country='mexico'" +
" AND job_title_role='health';"
// Create a parameters JSON object
params := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: 10,
Pretty: true,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
SQL: sqlQuery,
Dataset: "phone", // Search for records with a phone number
},
}
// Pass the parameters object to the Person Search API
response, err := client.Person.Search(context.Background(), params)
// Check for successful response
if err == nil {
data := response.Data
// Create file
out, outErr := os.Create("my_pdl_search.jsonl")
defer out.Close()
if (outErr == nil) {
for i := range data {
// Convert each profile found to JSON
record, jsonErr := json.Marshal(data[i])
// Write out each profile to file
if (jsonErr == nil) {
out.WriteString(string(record) + "\n")
}
}
out.Sync()
}
fmt.Printf("Successfully grabbed %d records from PDL.\n", len(data))
fmt.Printf("%d total PDL records exist matching this query.\n", response.Total)
} else {
fmt.Println("NOTE: The carrier pigeons lost motivation in flight. See error and try again.")
fmt.Println("Error:", err)
}
}
import requests, json
# Set your API key
API_KEY = "YOUR API KEY"
# Set the Person Search API URL
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
# Set headers
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
# Create an Elasticsearch query
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"location_country": "mexico"}},
{"term": {"job_title_role": "health"}}
]
}
}
}
# Create a parameters JSON object
PARAMS = {
'query': json.dumps(ES_QUERY),
'size': 10,
'pretty': True,
'dataset': "phone" # Search for records with a phone number
}
# Pass the parameters object to the Person Search API
response = requests.get(
PDL_URL,
headers=HEADERS,
params=PARAMS
).json()
# Check for successful response
if response["status"] == 200:
data = response['data']
# Write out each profile found to file
with open("my_pdl_search.jsonl", "w") as out:
for record in data:
out.write(json.dumps(record) + "\n")
print(f"Successfully grabbed {len(data)} records from PDL.")
print(f"{response['total']} total PDL records exist matching this query.")
else:
print("NOTE: The carrier pigeons lost motivation in flight. See error and try again.")
print("Error:", response)
import requests, json
# Set your API key
API_KEY = "YOUR API KEY"
# Set the Person Search API URL
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
# Set headers
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
# Create an SQL query
SQL_QUERY = \
"""
SELECT * FROM person
WHERE location_country='mexico'
AND job_title_role='health'
AND phone_numbers IS NOT NULL;
"""
# Create a parameters JSON object
PARAMS = {
'sql': SQL_QUERY,
'size': 10,
'pretty': True,
'dataset': "phone" # Search for records with a phone number
}
# Pass the parameters object to the Person Search API
response = requests.get(
PDL_URL,
headers=HEADERS,
params=PARAMS
).json()
# Check for successful response
if response["status"] == 200:
data = response['data']
# Write out each profile found to file
with open("my_pdl_search.jsonl", "w") as out:
for record in data:
out.write(json.dumps(record) + "\n")
print(f"Successfully grabbed {len(data)} records from PDL.")
print(f"{response['total']} total PDL records exist matching this query.")
else:
print("NOTE: The carrier pigeons lost motivation in flight. See error and try again.")
print("error:", response)
Excluding Datasets
"I want to run a simple query against all PDL datasets except the email and phone datasets."
import json
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Create a client, specifying your API key
CLIENT = PDLPY(
api_key="YOUR API KEY",
)
# Create an Elasticsearch query
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"location_country": "mexico"}},
{"term": {"job_title_role": "health"}}
]
}
}
}
# Create a parameters JSON object
PARAMS = {
'query': ES_QUERY,
'size': 10,
'pretty': True,
'dataset': "-email,phone" # Search all datasets EXCEPT the email and phone slices
}
# Pass the parameters object to the Person Search API
response = CLIENT.person.search(**PARAMS).json()
# Check for successful response
if response["status"] == 200:
data = response['data']
# Write out each profile found to file
with open("my_pdl_search.jsonl", "w") as out:
for record in data:
out.write(json.dumps(record) + "\n")
print(f"Successfully grabbed {len(data)} records from PDL.")
print(f"{response['total']} total PDL records exist matching this query.")
else:
print("NOTE: The carrier pigeons lost motivation in flight. See error and try again.")
print("Error:", response)
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
import fs from 'fs';
// Create a client, specifying your API key
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
// Create an Elasticsearch query
const esQuery = {
query: {
bool: {
must:[
{term: {location_country: "mexico"}},
{term: {job_title_role: "health"}},
]
}
}
}
// Create a parameters JSON object
const params = {
searchQuery: esQuery,
size: 10,
pretty: true,
dataset: "-email,phone" // Search all datasets EXCEPT the email and phone slices
}
// Pass the parameters object to the Person Search API
PDLJSClient.person.search.elastic(params).then((data) => {
// Write out all profiles found to file
fs.writeFile("my_pdl_search.jsonl", Buffer.from(JSON.stringify(data.data)), (err) => {
if (err) throw err;
});
console.log(`Successfully grabbed ${data.data.length} records from PDL.`);
console.log(`${data["total"]} total PDL records exist matching this query.`)
}).catch((error) => {
console.log("NOTE: The carrier pigeons lost motivation in flight. See error and try again.")
console.log(error);
});
require 'json'
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
# Set your API key
Peopledatalabs.api_key = 'YOUR API KEY'
# Create an Elasticsearch query
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"location_country": "mexico"}},
{"term": {"job_title_role": "health"}}
]
}
}
}
# Pass parameters to the Person Search API
response = Peopledatalabs::Search.people(searchType: 'elastic', query: ES_QUERY, size: 10, pretty: true, 'dataset': '-email,phone')
# Check for successful response
if response['status'] == 200
data = response['data']
# Write out each profile found to file
File.open("my_pdl_search.jsonl", "w") do |out|
data.each { |record| out.write(JSON.dump(record) + "\n") }
end
puts "Successfully grabbed #{data.length()} records from PDL."
puts "#{response['total']} total PDL records exist matching this query."
else
puts "NOTE: The carrier pigeons lost motivation in flight. See error and try again."
puts "Error: #{response}"
end
package main
import (
"fmt"
"os"
"encoding/json"
"context"
)
// See https://github.com/peopledatalabs/peopledatalabs-go
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
// Set your API key
apiKey := "YOUR API KEY"
// Set API key as environmental variable
// apiKey := os.Getenv("API_KEY")
// Create a client, specifying your API key
client := pdl.New(apiKey)
// Create an Elasticsearch query
elasticSearchQuery := map[string]interface{} {
"query": map[string]interface{} {
"bool": map[string]interface{} {
"must": []map[string]interface{} {
{"term": map[string]interface{}{"location_country": "mexico"}},
{"term": map[string]interface{}{"job_title_role": "health"}},
},
},
},
}
// Create a parameters JSON object
params := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: 10,
Pretty: true,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
Query: elasticSearchQuery,
Dataset: "-email,phone", // Search all datasets EXCEPT the email and phone slices
},
}
// Pass the parameters object to the Person Search API
response, err := client.Person.Search(context.Background(), params)
// Check for successful response
if err == nil {
data := response.Data
// Create file
out, outErr := os.Create("my_pdl_search.jsonl")
defer out.Close()
if (outErr == nil) {
for i := range data {
// Convert each profile found to JSON
record, jsonErr := json.Marshal(data[i])
// Write out each profile to file
if (jsonErr == nil) {
out.WriteString(string(record) + "\n")
}
}
out.Sync()
}
fmt.Printf("Successfully grabbed %d records from PDL.\n", len(data))
fmt.Printf("%d total PDL records exist matching this query.\n", response.Total)
} else {
fmt.Println("NOTE: The carrier pigeons lost motivation in flight. See error and try again.")
fmt.Println("Error:", err)
}
}
import requests, json
# Set your API key
API_KEY = "YOUR API KEY"
# Set the Person Search API URL
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
# Set headers
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
# Create an Elasticsearch query
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"location_country": "mexico"}},
{"term": {"job_title_role": "health"}}
]
}
}
}
# Create a parameters JSON object
PARAMS = {
'query': json.dumps(ES_QUERY),
'size': 10,
'pretty': True,
'dataset': "-email,phone" # Search all datasets EXCEPT the email and phone slices
}
# Pass the parameters object to the Person Search API
response = requests.get(
PDL_URL,
headers=HEADERS,
params=PARAMS
).json()
# Check for successful response
if response["status"] == 200:
data = response['data']
# Write out each profile found to file
with open("my_pdl_search.jsonl", "w") as out:
for record in data:
out.write(json.dumps(record) + "\n")
print(f"Successfully grabbed {len(data)} records from PDL.")
print(f"{response['total']} total PDL records exist matching this query.")
else:
print("NOTE: The carrier pigeons lost motivation in flight. See error and try again.")
print("Error:", response)
Bulk Retrieval
"I want to pull all current employees at Amazon and save their profiles to a CSV file."
High Credit Usage Code Below
The code example below illustrates retrieving all the employee profiles in a large company and is meant primarily for demonstrating the use of the
scroll_token
parameter when requesting large amounts of records. As a result, this code is mostly illustrative in purpose. It can further expend a lot of credits and doesn't have any error handling. TheMAX_NUM_RECORDS_LIMIT
parameter in the example sets the maximum number of profiles that we will retrieve (and the maximum number of credits that you will expend), so please set it accordingly when testing this example.
import json, time, csv
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Limit the number of records to pull (to prevent accidentally using
# more credits than expected when testing this code)
MAX_NUM_RECORDS_LIMIT = 150 # The maximum number of records to retrieve
USE_MAX_NUM_RECORDS_LIMIT = True # Set to False to pull all available records
# Create a client, specifying your API key
CLIENT = PDLPY(
api_key="YOUR API KEY",
)
# Create an Elasticsearch query
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"job_company_name": "amazon"}}
]
}
}
}
# Create a parameters JSON object
PARAMS = {
'query': ES_QUERY,
'size': 100,
'pretty': True
}
# Pull all results in multiple batches
batch = 1
# Store all records retreived in an array
all_records = []
# Time the process
start_time = time.time()
found_all_records = False
continue_scrolling = True
# While still scrolling through data and still records to be found
while continue_scrolling and not found_all_records:
# Check if we reached the maximum number of records we want
if USE_MAX_NUM_RECORDS_LIMIT:
num_records_to_request = MAX_NUM_RECORDS_LIMIT - len(all_records)
# Adjust size parameter
PARAMS['size'] = max(0, min(100, num_records_to_request))
# Check if MAX_NUM_RECORDS_LIMIT reached
if num_records_to_request == 0:
print(f"Stopping - reached maximum number of records to pull "
f"[MAX_NUM_RECORDS_LIMIT = {MAX_NUM_RECORDS_LIMIT}].")
break
# Pass the parameters object to the Person Search API
response = CLIENT.person.search(**PARAMS).json()
# Check for successful response
if response['status'] == 200:
# Add records retrieved to the records array
all_records.extend(response['data'])
print(f"Retrieved {len(response['data'])} records in batch {batch} "
f"- {response['total'] - len(all_records)} records remaining.")
else:
print(f"Error retrieving some records:\n\t"
f"[{response['status']} - {response['error']['type']}] "
f"{response['error']['message']}")
# Get scroll_token from response if exists and store it in parameters object
if 'scroll_token' in response:
PARAMS['scroll_token'] = response['scroll_token']
else:
continue_scrolling = False
print(f"Unable to continue scrolling.")
batch += 1
found_all_records = (len(all_records) == response['total'])
time.sleep(6) # Avoid hitting rate limit thresholds
# Calculate time required to process batches
end_time = time.time()
runtime = end_time - start_time
print(f"Successfully recovered {len(all_records)} profiles in "
f"{batch} batches [{round(runtime, 2)} seconds].")
# Save profiles to CSV (utility function)
def save_profiles_to_csv(profiles, filename, fields=[], delim=','):
# Define header fields
if fields == [] and len(profiles) > 0:
fields = profiles[0].keys()
# Write CSV file
with open(filename, 'w') as csvfile:
writer = csv.writer(csvfile, delimiter=delim)
# Write header
writer.writerow(fields)
# Write body
count = 0
for profile in profiles:
writer.writerow([ profile[field] for field in fields ])
count += 1
print(f"Wrote {count} lines to: '{filename}'.")
# Use utility function to save all records retrieved to CSV
csv_header_fields = ['work_email', 'full_name', "linkedin_url",
'job_title', 'job_company_name']
csv_filename = "all_employee_profiles.csv"
save_profiles_to_csv(all_records, csv_filename, csv_header_fields)
import json, time, csv
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Limit the number of records to pull (to prevent accidentally using
# more credits than expected when testing this code)
MAX_NUM_RECORDS_LIMIT = 150 # The maximum number of records to retrieve
USE_MAX_NUM_RECORDS_LIMIT = True # Set to False to pull all available records
# Create a client, specifying your API key
CLIENT = PDLPY(
api_key="YOUR API KEY",
)
# Create an SQL query
SQL_QUERY = \
"""
SELECT * FROM person
WHERE job_company_name='amazon';
"""
# Create a parameters JSON object
PARAMS = {
'sql': SQL_QUERY,
'size': 100,
'pretty': True
}
# Pull all results in multiple batches
batch = 1
# Store all records retreived in an array
all_records = []
# Time the process
start_time = time.time()
found_all_records = False
continue_scrolling = True
# While still scrolling through data and still records to be found
while continue_scrolling and not found_all_records:
# Check if we reached the maximum number of records we want
if USE_MAX_NUM_RECORDS_LIMIT:
num_records_to_request = MAX_NUM_RECORDS_LIMIT - len(all_records)
# Adjust size parameter
PARAMS['size'] = max(0, min(100, num_records_to_request))
# Check if MAX_NUM_RECORDS_LIMIT reached
if num_records_to_request == 0:
print(f"Stopping - reached maximum number of records to pull "
f"[MAX_NUM_RECORDS_LIMIT = {MAX_NUM_RECORDS_LIMIT}].")
break
# Pass the parameters object to the Person Search API
response = CLIENT.person.search(**PARAMS).json()
# Check for successful response
if response['status'] == 200:
# Add records retrieved to the records array
all_records.extend(response['data'])
print(f"Retrieved {len(response['data'])} records in batch {batch} "
f"- {response['total'] - len(all_records)} records remaining.")
else:
print(f"Error retrieving some records:\n\t"
f"[{response['status']} - {response['error']['type']}] "
f"{response['error']['message']}")
# Get scroll_token from response if exists and store it in parameters object
if 'scroll_token' in response:
PARAMS['scroll_token'] = response['scroll_token']
else:
continue_scrolling = False
print(f"Unable to continue scrolling.")
batch += 1
found_all_records = (len(all_records) == response['total'])
time.sleep(6) # Avoid hitting rate limit thresholds
# Calculate time required to process batches
end_time = time.time()
runtime = end_time - start_time
print(f"Successfully recovered {len(all_records)} profiles in "
f"{batch} batches [{round(runtime, 2)} seconds].")
# Save profiles to CSV (utility function)
def save_profiles_to_csv(profiles, filename, fields=[], delim=','):
# Define header fields
if fields == [] and len(profiles) > 0:
fields = profiles[0].keys()
# Write CSV file
with open(filename, 'w') as csvfile:
writer = csv.writer(csvfile, delimiter=delim)
# Write header
writer.writerow(fields)
# Write body
count = 0
for profile in profiles:
writer.writerow([ profile[field] for field in fields ])
count += 1
print(f"Wrote {count} lines to: '{filename}'.")
# Use utility function to save all records retrieved to CSV
csv_header_fields = ['work_email', 'full_name', "linkedin_url",
'job_title', 'job_company_name']
csv_filename = "all_employee_profiles.csv"
save_profiles_to_csv(all_records, csv_filename, csv_header_fields)
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
// See https://www.npmjs.com/package/csv-writer
import * as csvwriter from 'csv-writer';
// Create a client, specifying your API key
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
// Limit the number of records to pull (to prevent accidentally using
// more credits than expected when testing this code)
const maxNumRecordsLimit = 150; // The maximum number of records to retrieve
const useMaxNumRecordsLimit = true; // Set to false to pull all available records
// Create an Elasticsearch query
const esQuery = {
query: {
bool: {
must:[
{term: {job_company_name: "amazon"}},
]
}
}
}
// Create a parameters JSON object
var params = {
searchQuery: esQuery,
size: 100,
scroll_token: null,
pretty: true
}
// Pull all results in multiple batches
var batch = 1;
// Store all records retreived in an array
var allRecords = [];
// Time the process
var startTime = Date.now();
var foundAllRecords = false;
var continueScrolling = true;
var numRetrieved = 0;
// Queue parameter objects in order to iterate through batches
var paramQueue = [];
// The current scroll_token
var scrollToken = null;
var numRecordsToRequest = 100;
while (numRecordsToRequest > 0) {
// Check if we reached the maximum number of records we want
if (useMaxNumRecordsLimit) {
numRecordsToRequest = maxNumRecordsLimit - numRetrieved;
// Adjust size parameter
params.size = Math.max(0, Math.min(100, numRecordsToRequest));
numRetrieved += params.size;
// Add batch to the parameter queue
if (params.size > 0) {
paramQueue.push(JSON.parse(JSON.stringify(params)));
}
} else {
break;
}
}
// Run initial batch
runBatch();
// Retrieve records associated with a batch
function runBatch() {
// Get the parameters for the current batch
let currParams = useMaxNumRecordsLimit ? paramQueue[batch-1] : params;
// Set the scroll_token from the previous batch
currParams.scroll_token = scrollToken;
batch++;
// Pass the current parameters object to the Person Search API
PDLJSClient.person.search.elastic(currParams).then((data) => {
// Add records retrieved to the records array
Array.prototype.push.apply(allRecords, data.data);
// Store the scroll_token if exists
if (data['scroll_token']) {
scrollToken = data['scroll_token'];
} else {
continueScrolling = false;
console.log("Unable to continue scrolling.");
}
foundAllRecords = (allRecords.length == data['total']);
console.log(`Retrieved ${data.data.length} records in batch ${(batch-1)}` +
` - ${(data['total'] - allRecords.length)} records remaining.`);
// Run next batch recursively, if any
if (!foundAllRecords && (batch <= paramQueue.length || !useMaxNumRecordsLimit)) {
runBatch();
} else {
console.log(`Stopping - reached maximum number of records to pull [maxNumRecordsLimit = ` +
`${maxNumRecordsLimit}].`);
// Calculate time required to process batches
let endTime = Date.now();
let runTime = endTime - startTime;
console.log (`Successfully recovered ${allRecords.length} profiles in ` +
`${(batch-1)} batches [${Math.round(runTime/1000)} seconds].`);
// Set CSV fields
let csvHeaderFields = [
{id: "work_email", title: "work_email"},
{id: "full_name", title: "full_name"},
{id: "linkedin_url", title: "linkedin_url"},
{id: "job_title", title: "job_title"},
{id: "job_company_name", title: "job_company_name"}
];
let csvFilename = "all_employee_profiles.csv";
// Write records array to CSV file
saveProfilesToCSV(allRecords, csvFilename, csvHeaderFields);
}
}).catch((error) => {
console.log(error);
});
}
// Write CSV file using csv-writer (https://www.npmjs.com/package/csv-writer)
// $ npm i -s csv-writer
function saveProfilesToCSV(profiles, filename, fields) {
// Create CSV file
const createCsvWriter = csvwriter.createObjectCsvWriter;
const csvWriter = createCsvWriter({
path: filename,
header: fields
});
let data = [];
// Iterate through records array
for (let i = 0; i < profiles.length; i++) {
let record = profiles[i];
data[i] = {};
// Store requested fields
for (let field in fields) {
data[i][fields[field].id] = record[fields[field].id];
}
}
// Write data to CSV file
csvWriter
.writeRecords(data)
.then(()=> console.log('The CSV file was written successfully.'));
}
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
// See https://www.npmjs.com/package/csv-writer
import * as csvwriter from 'csv-writer';
// Create a client, specifying your API key
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
// Limit the number of records to pull (to prevent accidentally using
// more credits than expected when testing out this code).
const maxNumRecordsLimit = 150; // The maximum number of records to retrieve
const useMaxNumRecordsLimit = true; // Set to false to pull all available records
// Create an SQL query
const sqlQuery = `SELECT * FROM person
WHERE job_company_name='amazon';`;
// Create a parameters JSON object
var params = {
searchQuery: sqlQuery,
size: 100,
scroll_token: null,
pretty: true
}
// Pull all results in multiple batches
var batch = 1;
// Store all records retreived in an array
var allRecords = [];
// Time the process
var startTime = Date.now();
var foundAllRecords = false;
var continueScrolling = true;
var numRetrieved = 0;
// Queue parameter objects in order to iterate through batches
var paramQueue = [];
// The current scroll_token
var scrollToken = null;
var numRecordsToRequest = 100;
while (numRecordsToRequest > 0) {
// Check if we reached the maximum number of records we want
if (useMaxNumRecordsLimit) {
numRecordsToRequest = maxNumRecordsLimit - numRetrieved;
// Adjust size parameter
params.size = Math.max(0, Math.min(100, numRecordsToRequest));
numRetrieved += params.size;
// Add batch to the parameter queue
if (params.size > 0) {
paramQueue.push(JSON.parse(JSON.stringify(params)));
}
} else {
break;
}
}
// Run initial batch
runBatch();
// Retrieve records associated with a batch
function runBatch() {
// Get the parameters for the current batch
let currParams = useMaxNumRecordsLimit ? paramQueue[batch-1] : params;
// Set the scroll_token from the previous batch
currParams.scroll_token = scrollToken;
batch++;
// Pass the current parameters object to the Person Search API
PDLJSClient.person.search.sql(currParams).then((data) => {
// Add records retrieved to the records array
Array.prototype.push.apply(allRecords, data.data);
// Store the scroll_token if exists
if (data['scroll_token']) {
scrollToken = data['scroll_token'];
} else {
continueScrolling = false;
console.log("Unable to continue scrolling.");
}
foundAllRecords = (allRecords.length == data['total']);
console.log(`Retrieved ${data.data.length} records in batch ${(batch-1)}` +
` - ${(data['total'] - allRecords.length)} records remaining.`);
// Run next batch recursively, if any
if (!foundAllRecords && (batch <= paramQueue.length || !useMaxNumRecordsLimit)) {
runBatch();
} else {
console.log(`Stopping - reached maximum number of records to pull [maxNumRecordsLimit = ` +
`${maxNumRecordsLimit}].`);
// Calculate time required to process batches
let endTime = Date.now();
let runTime = endTime - startTime;
console.log (`Successfully recovered ${allRecords.length} profiles in ` +
`${(batch-1)} batches [${Math.round(runTime/1000)} seconds].`);
// Set CSV fields
let csvHeaderFields = [
{id: "work_email", title: "work_email"},
{id: "full_name", title: "full_name"},
{id: "linkedin_url", title: "linkedin_url"},
{id: "job_title", title: "job_title"},
{id: "job_company_name", title: "job_company_name"}
];
let csvFilename = "all_employee_profiles.csv";
// Write records array to CSV file
saveProfilesToCSV(allRecords, csvFilename, csvHeaderFields);
}
}).catch((error) => {
console.log(error);
});
}
// Write CSV file using csv-writer (https://www.npmjs.com/package/csv-writer)
// $ npm i -s csv-writer
function saveProfilesToCSV(profiles, filename, fields) {
// Create CSV file
const createCsvWriter = csvwriter.createObjectCsvWriter;
const csvWriter = createCsvWriter({
path: filename,
header: fields
});
let data = [];
for (let i = 0; i < profiles.length; i++) {
let record = profiles[i];
data[i] = {};
// Store requested fields
for (let field in fields) {
data[i][fields[field].id] = record[fields[field].id];
}
}
// Write data to CSV file
csvWriter
.writeRecords(data)
.then(()=> console.log('The CSV file was written successfully.'));
}
require 'json'
require 'csv'
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
# Set your API key
Peopledatalabs.api_key = 'YOUR API KEY'
# Limit the number of records to pull (to prevent accidentally using
# more credits than expected when testing out this code).
MAX_NUM_RECORDS_LIMIT = 150 # The maximum number of records to retrieve
USE_MAX_NUM_RECORDS_LIMIT = true # Set to false to pull all available records
# Create an Elasticsearch query
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"job_company_name": "amazon"}}
]
}
}
}
# Pull all results in multiple batches
batch = 1
# Store all records retreived in an array
all_records = []
# Time the process
start_time = Time.now
found_all_records = false
continue_scrolling = true
scroll_token = {}
# While still scrolling through data and still records to be found
while continue_scrolling && !found_all_records do
# Check if we reached the maximum number of records we want
if USE_MAX_NUM_RECORDS_LIMIT
num_records_to_request = MAX_NUM_RECORDS_LIMIT - all_records.length()
# Adjust size parameter
size = [0, [100, num_records_to_request].min].max
# Check if MAX_NUM_RECORDS_LIMIT reached
if num_records_to_request == 0
puts "Stopping - reached maximum number of records to pull "
puts "[MAX_NUM_RECORDS_LIMIT = #{MAX_NUM_RECORDS_LIMIT}]."
break
end
end
# Pass parameters to the Person Search API
response = Peopledatalabs::Search.person(searchType: 'elastic', query: ES_QUERY, size: size, scroll_token: scroll_token, pretty: true)
# Check for successful response
if response['status'] == 200
# Add records retrieved to the records array
all_records += response['data']
puts "Retrieved #{response['data'].length()} records in batch #{batch} "
puts "- #{response['total'] - all_records.length()} records remaining."
else
puts "Error retrieving some records:\n\t"
puts "[#{response['status']} - #{response['error']['type']}] "
puts response['error']['message']
end
# Get scroll_token from response if exists and store it
if response.key?('scroll_token')
scroll_token = response['scroll_token']
else
continue_scrolling = false
puts "Unable to continue scrolling."
end
batch += 1
found_all_records = (all_records.length() == response['total'])
sleep(6) # Avoid hitting rate limit thresholds
end
# Calculate time required to process batches
end_time = Time.now
runtime = end_time - start_time
puts "Successfully recovered #{all_records.length()} profiles in "
puts "#{batch} batches [#{runtime.round(2)} seconds]."
# Save profiles to CSV (utility function)
def save_profiles_to_csv(profiles, filename, fields=[], delim=',')
# Define header fields
if fields == [] && profiles.length() > 0
fields = profiles[0].keys
end
count = 0
# Write CSV file
CSV.open(filename, 'w') do |writer|
# Write header
writer << fields
# Write body
profiles.each do |profile|
record = []
fields.each do |field|
record << profile[field]
count += 1
end
writer << record
end
end
puts "Wrote #{count} lines to: '#{filename}'."
end
# Use utility function to save profiles to CSV
csv_header_fields = ['work_email', 'full_name', "linkedin_url",
'job_title', 'job_company_name']
csv_filename = "all_company_profiles.csv"
save_profiles_to_csv(all_records, csv_filename, csv_header_fields)
require 'json'
require 'csv'
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
# Set your API key
Peopledatalabs.api_key = 'YOUR API KEY'
# Limit the number of records to pull (to prevent accidentally using
# more credits than expected when testing out this code).
MAX_NUM_RECORDS_LIMIT = 150 # The maximum number of records to retrieve
USE_MAX_NUM_RECORDS_LIMIT = true # Set to false to pull all available records
# Create an SQL query
SQL_QUERY = \
"""
SELECT * FROM person
WHERE job_company_name='amazon';
"""
# Pull all results in multiple batches
batch = 1
# Store all records retreived in an array
all_records = []
# Time the process
start_time = Time.now
found_all_records = false
continue_scrolling = true
scroll_token = {}
# While still scrolling through data and still records to be found
while continue_scrolling && !found_all_records do
# Check if we reached the maximum number of records we want
if USE_MAX_NUM_RECORDS_LIMIT
num_records_to_request = MAX_NUM_RECORDS_LIMIT - all_records.length()
# Adjust size parameter
size = [0, [100, num_records_to_request].min].max
# Check if MAX_NUM_RECORDS_LIMIT reached
if num_records_to_request == 0
puts "Stopping - reached maximum number of records to pull "
puts "[MAX_NUM_RECORDS_LIMIT = #{MAX_NUM_RECORDS_LIMIT}]."
break
end
end
# Pass parameters to the Person Search API
response = Peopledatalabs::Search.person(searchType: 'sql', query: SQL_QUERY, size: size, scroll_token: scroll_token, pretty: true)
# Check for successful response
if response['status'] == 200
# Add records retrieved to the records array
all_records += response['data']
puts "Retrieved #{response['data'].length()} records in batch #{batch} "
puts "- #{response['total'] - all_records.length()} records remaining."
else
puts "Error retrieving some records:\n\t"
puts "[#{response['status']} - #{response['error']['type']}] "
puts response['error']['message']
end
# Get scroll_token from response if exists and store it
if response.key?('scroll_token')
scroll_token = response['scroll_token']
else
continue_scrolling = false
puts "Unable to continue scrolling."
end
batch += 1
found_all_records = (all_records.length() == response['total'])
sleep(6) # Avoid hitting rate limit thresholds
end
# Calculate time required to process batches
end_time = Time.now
runtime = end_time - start_time
puts "Successfully recovered #{all_records.length()} profiles in "
puts "#{batch} batches [#{runtime.round(2)} seconds]."
# Save profiles to CSV (utility function)
def save_profiles_to_csv(profiles, filename, fields=[], delim=',')
# Define header fields
if fields == [] && profiles.length() > 0
fields = profiles[0].keys
end
count = 0
# Write CSV file
CSV.open(filename, 'w') do |writer|
# Write header
writer << fields
# Write body
profiles.each do |profile|
record = []
fields.each do |field|
record << profile[field]
count += 1
end
writer << record
end
end
puts "Wrote #{count} lines to: '#{filename}'."
end
# Use utility function to save profiles to CSV
csv_header_fields = ['work_email', 'full_name', "linkedin_url",
'job_title', 'job_company_name']
csv_filename = "all_company_profiles.csv"
save_profiles_to_csv(all_records, csv_filename, csv_header_fields)
package main
import (
"fmt"
"time"
"os"
"math"
"reflect"
"encoding/json"
"encoding/csv"
"context"
)
// See https://github.com/peopledatalabs/peopledatalabs-go
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
// Set your API key
apiKey := "YOUR API KEY"
// Set API key as environmental variable
// apiKey := os.Getenv("API_KEY")
// Create a client, specifying your API key
client := pdl.New(apiKey)
// Limit the number of records to pull (to prevent accidentally using
// more credits than expected when testing out this code).
const maxNumRecordsLimit = 150 // The maximum number of records to retrieve
const useMaxNumRecordsLimit = true // Set to False to pull all available records
// Create an Elasticsearch query
elasticSearchQuery := map[string]interface{} {
"query": map[string]interface{} {
"bool": map[string]interface{} {
"must": []map[string]interface{} {
{"term": map[string]interface{}{"job_company_name": "amazon"}},
},
},
},
}
// Create a parameters JSON object
p := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: 100,
Pretty: true,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
Query: elasticSearchQuery,
},
}
// Pull all results in multiple batches
batch := 1
// Store all records retreived in an array
var allRecords []pdlmodel.Person
// Time the process
startTime := time.Now()
foundAllRecords := false
continueScrolling := true
var numRecordsToRequest int
// While still scrolling through data and still records to be found
for continueScrolling && !foundAllRecords {
// Check if we reached the maximum number of records we want
if useMaxNumRecordsLimit {
numRecordsToRequest = maxNumRecordsLimit - len(allRecords)
// Adjust size parameter
p.BaseParams.Size = (int) (math.Max(0.0, math.Min(100.0, (float64) (numRecordsToRequest))))
// Check if MAX_NUM_RECORDS_LIMIT reached
if numRecordsToRequest == 0 {
fmt.Printf("Stopping - reached maximum number of records to pull " +
"[MAX_NUM_RECORDS_LIMIT = %d].\n", maxNumRecordsLimit)
break
}
}
// Pass the parameters object to the Person Search API
response, err := client.Person.Search(context.Background(), p)
// Check for successful response
if err == nil {
fmt.Printf("Retrieved %d records in batch %d - %d records remaining.\n",
len(response.Data), batch, response.Total - len(allRecords))
} else {
fmt.Println("Error retrieving some records:\n\t",
err)
}
// Convert response to JSON
var data map[string]interface{}
jsonResponse, jsonErr := json.Marshal(response)
if jsonErr == nil {
json.Unmarshal(jsonResponse, &data)
// Get scroll_token from response if exists and store it in parameters object
if scrollToken, ok := data["scroll_token"]; ok {
p.SearchBaseParams.ScrollToken = fmt.Sprintf("%v", scrollToken)
} else {
continueScrolling = false
fmt.Println("Unable to continue scrolling.")
}
// Add records retrieved to the records array
allRecords = append(allRecords, response.Data...)
}
batch++
foundAllRecords = (len(allRecords) == response.Total)
time.Sleep(6 * time.Second) // avoid hitting rate limit thresholds
}
// Calculate time required to process batches
endTime := time.Now()
runtime := endTime.Sub(startTime).Seconds()
fmt.Printf("Successfully recovered %d profiles in %d batches [%d seconds].\n",
len(allRecords), batch, (int) (math.Round((float64) (runtime))))
// Use utility function to save profiles to CSV
csvHeaderFields := []string{"work_email", "full_name", "linkedin_url",
"job_title", "job_company_name"}
csvFilename := "all_employee_profiles.csv"
saveProfilesToCsv(allRecords, csvFilename, csvHeaderFields, ",")
}
// Save profiles to CSV (utility function)
func saveProfilesToCsv(profiles []pdlmodel.Person, filename string, fields []string, delim string) {
// Define header fields
if fields == nil && len(profiles) > 0 {
e := reflect.ValueOf(&(profiles[0])).Elem()
for i := 0; i < e.NumField(); i++ {
fields = append(fields, e.Type().Field(i).Name)
}
}
// Write CSV file
csvFile, err := os.Create(filename)
if err == nil {
csvwriter := csv.NewWriter(csvFile)
defer csvwriter.Flush()
// Write header
csvwriter.Write(fields)
// Write body
count := 0
for i := range profiles {
var data map[string]interface{}
jsonResponse, jsonErr := json.Marshal(profiles[i])
if jsonErr == nil {
json.Unmarshal(jsonResponse, &data)
var record []string
for j := range fields {
record = append(record, fmt.Sprintf("%v", data[fields[j]]))
}
csvwriter.Write(record)
count++
}
}
fmt.Printf("Wrote %d lines to: %s.\n", count, filename)
}
}
package main
import (
"fmt"
"time"
"os"
"math"
"reflect"
"encoding/json"
"encoding/csv"
"context"
)
// See https://github.com/peopledatalabs/peopledatalabs-go
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
// Set your API key
apiKey := "YOUR API KEY"
// Set API key as environmental variable
// apiKey := os.Getenv("API_KEY")
// Create a client, specifying your API key
client := pdl.New(apiKey)
// Limit the number of records to pull (to prevent accidentally using
// more credits than expected when testing out this code).
const maxNumRecordsLimit = 150 // The maximum number of records to retrieve
const useMaxNumRecordsLimit = true // Set to False to pull all available records
// Create an SQL query
sqlQuery := "SELECT * FROM person" +
" WHERE job_company_name='amazon'"
// Create a parameters JSON object
p := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: 100,
Pretty: true,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
SQL: sqlQuery,
},
}
// Pull all results in multiple batches
batch := 1
// Store all records retreived in an array
var allRecords []pdlmodel.Person
// Time the process
startTime := time.Now()
foundAllRecords := false
continueScrolling := true
var numRecordsToRequest int
// While still scrolling through data and still records to be found
for continueScrolling && !foundAllRecords {
// Check if we reached the maximum number of records we want
if useMaxNumRecordsLimit {
numRecordsToRequest = maxNumRecordsLimit - len(allRecords)
// Adjust size parameter
p.BaseParams.Size = (int) (math.Max(0.0, math.Min(100.0, (float64) (numRecordsToRequest))))
// Check if MAX_NUM_RECORDS_LIMIT reached
if numRecordsToRequest == 0 {
fmt.Printf("Stopping - reached maximum number of records to pull " +
"[MAX_NUM_RECORDS_LIMIT = %d].\n", maxNumRecordsLimit)
break
}
}
// Pass the parameters object to the Person Search API
response, err := client.Person.Search(context.Background(), p)
// Check for successful response
if err == nil {
fmt.Printf("Retrieved %d records in batch %d - %d records remaining.\n",
len(response.Data), batch, response.Total - len(allRecords))
} else {
fmt.Println("Error retrieving some records:\n\t",
err)
}
// Convert response to JSON
var data map[string]interface{}
jsonResponse, jsonErr := json.Marshal(response)
if jsonErr == nil {
json.Unmarshal(jsonResponse, &data)
// Get scroll_token from response if exists and store it in parameters object
if scrollToken, ok := data["scroll_token"]; ok {
p.SearchBaseParams.ScrollToken = fmt.Sprintf("%v", scrollToken)
} else {
continueScrolling = false
fmt.Println("Unable to continue scrolling.")
}
// Add records retrieved to the records array
allRecords = append(allRecords, response.Data...)
}
batch++
foundAllRecords = (len(allRecords) == response.Total)
time.Sleep(6 * time.Second) // Avoid hitting rate limit thresholds
}
// Calculate time required to process batches
endTime := time.Now()
runtime := endTime.Sub(startTime).Seconds()
fmt.Printf("Successfully recovered %d profiles in %d batches [%d seconds].\n",
len(allRecords), batch, (int) (math.Round((float64) (runtime))))
// Use utility function to save profiles to CSV
csvHeaderFields := []string{"work_email", "full_name", "linkedin_url",
"job_title", "job_company_name"}
csvFilename := "all_employee_profiles.csv"
saveProfilesToCsv(allRecords, csvFilename, csvHeaderFields, ",")
}
// Save profiles to CSV (utility function)
func saveProfilesToCsv(profiles []pdlmodel.Person, filename string, fields []string, delim string) {
// Define header fields
if fields == nil && len(profiles) > 0 {
e := reflect.ValueOf(&(profiles[0])).Elem()
for i := 0; i < e.NumField(); i++ {
fields = append(fields, e.Type().Field(i).Name)
}
}
// Write CSV file
csvFile, err := os.Create(filename)
if err == nil {
csvwriter := csv.NewWriter(csvFile)
defer csvwriter.Flush()
// Write header
csvwriter.Write(fields)
// Write body
count := 0
for i := range profiles {
var data map[string]interface{}
jsonResponse, jsonErr := json.Marshal(profiles[i])
if jsonErr == nil {
json.Unmarshal(jsonResponse, &data)
var record []string
for j := range fields {
record = append(record, fmt.Sprintf("%v", data[fields[j]]))
}
csvwriter.Write(record)
count++
}
}
fmt.Printf("Wrote %d lines to: %s.\n", count, filename)
}
}
import requests, json, time, csv
# Set your API key
API_KEY = "YOUR API KEY"
# Limit the number of records to pull (to prevent accidentally using
# more credits than expected when testing out this code).
MAX_NUM_RECORDS_LIMIT = 150 # The maximum number of records to retrieve
USE_MAX_NUM_RECORDS_LIMIT = True # Set to False to pull all available records
# Set the Person Search API URL
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
# Set headers
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
# Create an Elasticsearch query
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"job_company_name": "amazon"}}
]
}
}
}
# Create a parameters JSON object
PARAMS = {
'query': json.dumps(ES_QUERY),
'size': 100,
'pretty': True
}
# Pull all results in multiple batches
batch = 1
# Store all records retreived in an array
all_records = []
# Time the process
start_time = time.time()
found_all_records = False
continue_scrolling = True
# While still scrolling through data and still records to be found
while continue_scrolling and not found_all_records:
# Check if we reached the maximum number of records we want
if USE_MAX_NUM_RECORDS_LIMIT:
num_records_to_request = MAX_NUM_RECORDS_LIMIT - len(all_records)
# Adjust size parameter
PARAMS['size'] = max(0, min(100, num_records_to_request))
# Check if MAX_NUM_RECORDS_LIMIT reached
if num_records_to_request == 0:
print(f"Stopping - reached maximum number of records to pull "
f"[MAX_NUM_RECORDS_LIMIT = {MAX_NUM_RECORDS_LIMIT}].")
break
# Pass the parameters object to the Person Search API
response = requests.get(
PDL_URL,
headers=HEADERS,
params=PARAMS
).json()
# Check for successful response
if response['status'] == 200:
# Add records retrieved to the records array
all_records.extend(response['data'])
print(f"Retrieved {len(response['data'])} records in batch {batch} "
f"- {response['total'] - len(all_records)} records remaining.")
else:
print(f"Error retrieving some records:\n\t"
f"[{response['status']} - {response['error']['type']}] "
f"{response['error']['message']}")
# Get scroll_token from response if exists and store it in parameters object
if 'scroll_token' in response:
PARAMS['scroll_token'] = response['scroll_token']
else:
continue_scrolling = False
print(f"Unable to continue scrolling.")
batch += 1
found_all_records = (len(all_records) == response['total'])
time.sleep(6) # avoid hitting rate limit thresholds
# Calculate time required to process batches
end_time = time.time()
runtime = end_time - start_time
print(f"Successfully recovered {len(all_records)} profiles in "
f"{batch} batches [{round(runtime, 2)} seconds].")
# Save profiles to CSV (utility function)
def save_profiles_to_csv(profiles, filename, fields=[], delim=','):
# Define header fields
if fields == [] and len(profiles) > 0:
fields = profiles[0].keys()
# Write CSV file
with open(filename, 'w') as csvfile:
writer = csv.writer(csvfile, delimiter=delim)
# Write header
writer.writerow(fields)
# Write body
count = 0
for profile in profiles:
writer.writerow([ profile[field] for field in fields ])
count += 1
print(f"Wrote {count} lines to: '{filename}'.")
# Use utility function to save profiles to CSV
csv_header_fields = ['work_email', 'full_name', "linkedin_url",
'job_title', 'job_company_name']
csv_filename = "all_employee_profiles.csv"
save_profiles_to_csv(all_records, csv_filename, csv_header_fields)
import requests, json, time, csv
# Set your API key
API_KEY = "YOUR API KEY"
# Limit the number of records to pull (to prevent accidentally using
# more credits than expected when testing out this code).
MAX_NUM_RECORDS_LIMIT = 150 # The maximum number of records to retrieve
USE_MAX_NUM_RECORDS_LIMIT = True # Set to False to pull all available records
# Set the Person Search API URL
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
# Set headers
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
# Create an SQL query
SQL_QUERY = \
"""
SELECT * FROM person
WHERE job_company_name='amazon';
"""
# Create a parameters JSON object
PARAMS = {
'sql': SQL_QUERY,
'size': 100,
'pretty': True
}
# Pull all results in multiple batches
batch = 1
# Store all records retreived in an array
all_records = []
# Time the process
start_time = time.time()
found_all_records = False
continue_scrolling = True
# While still scrolling through data and still records to be found
while continue_scrolling and not found_all_records:
# Check if we reached the maximum number of records we want
if USE_MAX_NUM_RECORDS_LIMIT:
num_records_to_request = MAX_NUM_RECORDS_LIMIT - len(all_records)
# Adjust size parameter
PARAMS['size'] = max(0, min(100, num_records_to_request))
# Check if MAX_NUM_RECORDS_LIMIT reached
if num_records_to_request == 0:
print(f"Stopping - reached maximum number of records to pull "
f"[MAX_NUM_RECORDS_LIMIT = {MAX_NUM_RECORDS_LIMIT}].")
break
# Pass the parameters object to the Person Search API
response = requests.get(
PDL_URL,
headers=HEADERS,
params=PARAMS
).json()
# Check for successful response
if response['status'] == 200:
# Add records retrieved to the records array
all_records.extend(response['data'])
print(f"Retrieved {len(response['data'])} records in batch {batch} "
f"- {response['total'] - len(all_records)} records remaining.")
else:
print(f"Error retrieving some records:\n\t"
f"[{response['status']} - {response['error']['type']}] "
f"{response['error']['message']}")
# Get scroll_token from response if exists and store it in parameters object
if 'scroll_token' in response:
PARAMS['scroll_token'] = response['scroll_token']
else:
continue_scrolling = False
print(f"Unable to continue scrolling.")
batch += 1
found_all_records = (len(all_records) == response['total'])
time.sleep(6) # avoid hitting rate limit thresholds
# Calculate time required to process batches
end_time = time.time()
runtime = end_time - start_time
print(f"Successfully recovered {len(all_records)} profiles in "
f"{batch} batches [{round(runtime, 2)} seconds].")
# Save profiles to CSV (utility function)
def save_profiles_to_csv(profiles, filename, fields=[], delim=','):
# Define header fields
if fields == [] and len(profiles) > 0:
fields = profiles[0].keys()
# Write CSV file
with open(filename, 'w') as csvfile:
writer = csv.writer(csvfile, delimiter=delim)
# Write header
writer.writerow(fields)
# Write body
count = 0
for profile in profiles:
writer.writerow([ profile[field] for field in fields ])
count += 1
print(f"Wrote {count} lines to: '{filename}'.")
# Use utility function to save profiles to CSV
csv_header_fields = ['work_email', 'full_name', "linkedin_url",
'job_title', 'job_company_name']
csv_filename = "all_employee_profiles.csv"
save_profiles_to_csv(all_records, csv_filename, csv_header_fields)
Sales Prospecting
"I want to email engineering leaders at stripe.com, plaid.com, xignite.com and square.com, so that I can reach out to them about my product."
import json
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Create a list of company domains to search against
DESIRED_COMPANY_DOMAINS = [
'stripe.com', 'plaid.com', 'xignite.com', 'square.com'
]
# Create a client, specifying your API key
CLIENT = PDLPY(
api_key="YOUR API KEY",
)
# Create an Elasticsearch query
ES_QUERY = {
"query": {
"bool": {
"must": [
{"terms": {"job_company_website": DESIRED_COMPANY_DOMAINS}},
{"term": {"job_title_role": "engineering"}},
{"terms": {"job_title_levels": ["vp", "director", "manager"]}},
{"exists": {"field": "work_email"}}
]
}
}
}
# Create a parameters JSON object
PARAMS = {
'query': ES_QUERY,
'size': 100
}
# Pass the parameters object to the Person Search API
response = CLIENT.person.search(**PARAMS).json()
# Check for successful response
if response["status"] == 200:
for record in response['data']:
# Print selected fields
print(
record['work_email'],
record['full_name'],
record['job_title'],
record['job_company_name']
)
print(f"Successfully grabbed {len(response['data'])} records from PDL.")
print(f"{response['total']} total PDL records exist matching this query.")
else:
print("NOTE: The eager beaver was not so eager. See error and try again.")
print("error:", response)
import json
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Create a list of company domains to search against
DESIRED_COMPANY_DOMAINS = [
'stripe.com', 'plaid.com', 'xignite.com', 'square.com'
]
# Create a string representation of the list
COMPANY_DOMAINS_STRING_REP = ", ".join(
(f"'{site}'" for site in DESIRED_COMPANY_DOMAINS)
)
# Create a client, specifying your API key
CLIENT = PDLPY(
api_key="YOUR API KEY",
)
# Create an SQL query
SQL_QUERY = \
f"""
SELECT * FROM person
WHERE job_company_website IN ({COMPANY_DOMAINS_STRING_REP})
AND job_title_role='engineering'
AND job_title_levels IN ('vp', 'director', 'manager')
AND work_email IS NOT NULL;
"""
# Create a parameters JSON object
PARAMS = {
'sql': SQL_QUERY,
'size': 100
}
# Pass the parameters object to the Person Search API
response = CLIENT.person.search(**PARAMS).json()
# Check for successful response
if response["status"] == 200:
for record in response['data']:
# Print selected fields
print(
record['work_email'],
record['full_name'],
record['job_title'],
record['job_company_name']
)
print(f"Successfully grabbed {len(response['data'])} records from PDL.")
print(f"{response['total']} total PDL records exist matching this query.")
else:
print("NOTE: The eager beaver was not so eager. See error and try again.")
print("error:", response)
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
// Create a client, specifying your API key
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
// Create a list of company domains to search against
const desiredCompanyDomains = [
"stripe.com", "plaid.com", "xignite.com", "square.com"
];
// Create an Elasticsearch query
const esQuery = {
query: {
bool: {
must:[
{terms: {job_company_website: desiredCompanyDomains}},
{term: {job_title_role: "engineering"}},
{terms: {job_title_levels: ["vp", "director", "manager"]}},
{exists: {field: "work_email"}}
]
}
}
}
// Create a parameters JSON object
const params = {
searchQuery: esQuery,
size: 100
}
// Pass the current parameters object to the Person Search API
PDLJSClient.person.search.elastic(params).then((data) => {
var record
for (let response in data.data) {
record = data.data[response]
// Print selected fields
console.log(
record["work_email"],
record["full_name"],
record["job_title"],
record["job_company_name"],
)
}
console.log(`Successfully grabbed ${data.data.length} records from PDL.`);
console.log(`${data["total"]} total PDL records exist matching this query.`)
}).catch((error) => {
console.log("NOTE: The eager beaver was not so eager. See error and try again.")
console.log(error);
});
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
// Create a client, specifying your API key
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
// Create a list of company domains to search against
const desiredCompanyDomains = [
"stripe.com", "plaid.com", "xignite.com", "square.com"
];
// Create a string representation of the list
var companyStringRep = "'" + desiredCompanyDomains.join("', '") + "'";
// Create an SQL query
const sqlQuery = `SELECT * FROM person
WHERE job_company_website IN (${companyStringRep})
AND job_title_role='engineering'
AND job_title_levels IN ('vp', 'director', 'manager')
AND work_email IS NOT NULL;`
// Create a parameters JSON object
const params = {
searchQuery: sqlQuery,
size: 100
}
// Pass the current parameters object to the Person Search API
PDLJSClient.person.search.sql(params).then((data) => {
var record
for (let response in data.data) {
record = data.data[response]
// Print selected fields
console.log(
record["work_email"],
record["full_name"],
record["job_title"],
record["job_company_name"],
)
}
console.log(`Successfully grabbed ${data.data.length} records from PDL.`);
console.log(`${data["total"]} total PDL records exist matching this query.`)
}).catch((error) => {
console.log("NOTE: The eager beaver was not so eager. See error and try again.")
console.log(error);
});
require 'json'
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
# Set your API key
Peopledatalabs.api_key = 'YOUR API KEY'
# Create a list of company domains to search against
DESIRED_COMPANY_DOMAINS = [
'stripe.com', 'plaid.com', 'xignite.com', 'square.com'
]
# Create an Elasticsearch query
ES_QUERY = {
"query": {
"bool": {
"must": [
{"terms": {"job_company_website": DESIRED_COMPANY_DOMAINS}},
{"term": {"job_title_role": "engineering"}},
{"terms": {"job_title_levels": ["vp", "director", "manager"]}},
{"exists": {"field": "work_email"}}
]
}
}
}
# Pass parameters to the Person Search API
response = Peopledatalabs::Search.person(searchType: 'elastic', query: ES_QUERY, size: 100)
# Check for successful response
if response['status'] == 200
data = response['data']
data.each do |record|
# Print selected fields
puts "#{record['work_email']} \
#{record['full_name']} \
#{record['job_title']} \
#{record['job_company_name']}"
end
puts "Successfully grabbed #{data.length()} records from PDL."
puts "#{response['total']} total PDL records exist matching this query."
else
puts "NOTE: The eager beaver was not so eager. See error and try again."
puts "Error: #{response}"
end
require 'json'
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
# Set your API key
Peopledatalabs.api_key = 'YOUR API KEY'
# Create a string of company domains to search against
COMPANY_DOMAINS_STRING_REP = "'stripe.com', 'plaid.com', 'xignite.com', 'square.com'"
# Create an SQL query
SQL_QUERY = \
"""
SELECT * FROM person
WHERE job_company_website IN (#{COMPANY_DOMAINS_STRING_REP})
AND job_title_role='engineering'
AND job_title_levels IN ('vp', 'director', 'manager')
AND work_email IS NOT NULL;
"""
# Pass parameters to the Person Search API
response = Peopledatalabs::Search.person(searchType: 'sql', query: SQL_QUERY, size: 100)
# Check for successful response
if response['status'] == 200
data = response['data']
data.each do |record|
# Print selected fields
puts "#{record['work_email']} \
#{record['full_name']} \
#{record['job_title']} \
#{record['job_company_name']}"
end
puts "Successfully grabbed #{data.length()} records from PDL."
puts "#{response['total']} total PDL records exist matching this query."
else
puts "NOTE: The eager beaver was not so eager. See error and try again."
puts "Error: #{response}"
end
package main
import (
"fmt"
"context"
)
// See https://github.com/peopledatalabs/peopledatalabs-go
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
// Set your API key
apiKey := "YOUR API KEY"
// Set API key as environmental variable
// apiKey := os.Getenv("API_KEY")
// Create a client, specifying your API key
client := pdl.New(apiKey)
// Create a list of company domains to search against
desiredCompanyDomains := []string{"stripe.com", "plaid.com", "xignite.com", "square.com"}
// Create an Elasticsearch query
elasticSearchQuery := map[string]interface{} {
"query": map[string]interface{} {
"bool": map[string]interface{} {
"must": []map[string]interface{} {
{"terms": map[string]interface{}{"job_company_website": desiredCompanyDomains}},
{"term": map[string]interface{}{"job_title_role": "engineering"}},
{"terms": map[string]interface{}{"job_title_levels": []string{"vp", "director", "manager"}}},
{"exists": map[string]interface{}{"field": "work_email"}},
},
},
},
}
// Create a parameters JSON object
params := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: 100,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
Query: elasticSearchQuery,
},
}
// Pass the parameters object to the Person Search API
response, err := client.Person.Search(context.Background(), params)
// Check for successful response
if err == nil {
for i := range response.Data {
record := response.Data[i]
// Print selected fields
fmt.Println(record.WorkEmail, record.FullName, record.JobTitle, record.JobCompanyName)
}
fmt.Printf("Successfully grabbed %d records from PDL.\n", len(response.Data))
fmt.Printf("%d total PDL records exist matching this query.\n", response.Total)
} else {
fmt.Println("NOTE: The eager beaver was not so eager. See error and try again.")
fmt.Println("error:", err)
}
}
package main
import (
"fmt"
"context"
)
// See https://github.com/peopledatalabs/peopledatalabs-go
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
// Set your API key
apiKey := "YOUR API KEY"
// Set API key as environmental variable
// apiKey := os.Getenv("API_KEY")
// Create a client, specifying your API key
client := pdl.New(apiKey)
// Create a string of company domains to search against
companyDomainsStringRep := "'stripe.com', 'plaid.com', 'xignite.com', 'square.com'"
// Create an SQL query
sqlQuery := "SELECT * FROM person" +
" WHERE job_company_website IN (" + companyDomainsStringRep + ")" +
" AND job_title_role='engineering'" +
" AND job_title_levels IN ('vp', 'director', 'manager')" +
" AND work_email IS NOT NULL;"
// Create a parameters JSON object
params := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: 100,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
SQL: sqlQuery,
},
}
// Pass the parameters object to the Person Search API
response, err := client.Person.Search(context.Background(), params)
// Check for successful response
if err == nil {
for i := range response.Data {
record := response.Data[i]
// Print selected fields
fmt.Println(record.WorkEmail, record.FullName, record.JobTitle, record.JobCompanyName)
}
fmt.Printf("Successfully grabbed %d records from PDL.\n", len(response.Data))
fmt.Printf("%d total PDL records exist matching this query.\n", response.Total)
} else {
fmt.Println("NOTE: The eager beaver was not so eager. See error and try again.")
fmt.Println("error:", err)
}
}
import requests, json
# Set your API key
API_KEY = "YOUR API KEY"
# Set the Person Search API URL
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
# Set headers
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
# Create a list of company domains to search against
DESIRED_COMPANY_DOMAINS = [
'stripe.com', 'plaid.com', 'xignite.com', 'square.com'
]
# https://pdl-prod-schema.s3-us-west-2.amazonaws.com/15.0/enums/job_title_levels.txt
# for enumerated possible values of job_title_levels
# Create an Elasticsearch query
ES_QUERY = {
"query": {
"bool": {
"must": [
{"terms": {"job_company_website": DESIRED_COMPANY_DOMAINS}},
{"term": {"job_title_role": "engineering"}},
{"terms": {"job_title_levels": ["vp", "director", "manager"]}},
{"exists": {"field": "work_email"}}
]
}
}
}
# Create a parameters JSON object
PARAMS = {
'query': json.dumps(ES_QUERY),
'size': 100
}
# Pass the parameters object to the Person Search API
response = requests.get(
PDL_URL,
headers=HEADERS,
params=PARAMS
).json()
# Check for successful response
if response["status"] == 200:
for record in response['data']:
# Print selected fields
print(
record['work_email'],
record['full_name'],
record['job_title'],
record['job_company_name']
)
print(f"Successfully grabbed {len(response['data'])} records from PDL.")
print(f"{response['total']} total PDL records exist matching this query.")
else:
print("NOTE: The eager beaver was not so eager. See error and try again.")
print("error:", response)
import requests, json
# Set your API key
API_KEY = "YOUR API KEY"
# Set the Person Search API URL
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
# Set headers
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
# Create a list of company domains to search against
DESIRED_COMPANY_DOMAINS = [
'stripe.com', 'plaid.com', 'xignite.com', 'square.com'
]
# Create a string representation of the list
COMPANY_DOMAINS_STRING_REP = ", ".join(
(f"'{site}'" for site in DESIRED_COMPANY_DOMAINS)
)
# https://pdl-prod-schema.s3-us-west-2.amazonaws.com/15.0/job_title_levels.txt
# for enumerated possible values of job_title_levels
# Create an SQL query
SQL_QUERY = \
f"""
SELECT * FROM person
WHERE job_company_website IN ({COMPANY_DOMAINS_STRING_REP})
AND job_title_role='engineering'
AND job_title_levels IN ('vp', 'director', 'manager')
AND work_email IS NOT NULL;
"""
# Create a parameters JSON object
PARAMS = {
'sql': SQL_QUERY,
'size': 100
}
# Pass the parameters object to the Person Search API
response = requests.get(
PDL_URL,
headers=HEADERS,
params=PARAMS
).json()
# Check for successful response
if response["status"] == 200:
for record in response['data']:
# Print selected fields
print(
record['work_email'],
record['full_name'],
record['job_title'],
record['job_company_name']
)
print(f"Successfully grabbed {len(response['data'])} records from PDL.")
print(f"{response['total']} total PDL records exist matching this query.")
else:
print("NOTE: The eager beaver was not so eager. See error and try again.")
print("error:", response)
Recruiting
"I have a client looking for marketing managers and dishwashers in Oregon but NOT in portland (don't ask why). They want to reach out to them on LinkedIn, so they asked that each candidate have a LinkedIn URL. I want as many people as PDL can give me matching this criteria."
from time import sleep
import json
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Create a client, specifying your API key
CLIENT = PDLPY(
api_key="YOUR API KEY",
)
# Run Person searches in batches with 6-second intervals
def get_all_pdl_records_es(query):
PAGE_SIZE = 100
# Store all records retreived in an array
all_records = []
batch = 1
# Create a parameters JSON object
params = {
'query': query,
'size': PAGE_SIZE,
'dataset': "all"
}
# Keep retrieving records until unable to continue scrolling
while batch == 1 or params['scroll_token']:
# Pass the parameters object to the Person Search API
response = CLIENT.person.search(**params).json()
# Check for successful response
if response['status'] == 200:
# Add records retrieved to the records array
all_records.extend(response['data'])
# Store the scroll_token for next batch
params['scroll_token'] = response['scroll_token']
print(f"Batch {batch} success!")
sleep(6)
batch += 1
else:
print("Unable to continue scrolling.")
break
print("Done!")
return all_records
if __name__ == '__main__':
# Create an Elasticsearch query
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"location_region": "oregon"}},
{"bool": {
"should": [
{"match": {"job_title": "dishwasher"}},
{"bool": {
"must": [
{"term": {"job_title_role": "marketing"}},
{"term": {"job_title_levels": "manager"}}
]
}}
]
}
},
{"exists": {"field": "linkedin_url"}}
],
"must_not":[
{"term": {"location_locality": "portland"}},
]
}
}
}
# Find all records matching query
recruiting_leads = get_all_pdl_records_es(ES_QUERY)
print(f"Got {len(recruiting_leads)} recruiting leads for my wealthy client!")
#GO make_money_with_data(recruiting_leads)!
from time import sleep
import json
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Create a client, specifying your API key
CLIENT = PDLPY(
api_key="YOUR API KEY",
)
# Run Person searches in batches with 6-second intervals
def get_all_pdl_records_sql(query):
PAGE_SIZE = 100
# Store all records retreived in an array
all_records = []
batch = 1
# Create a parameters JSON object
params = {
'sql': query,
'size': PAGE_SIZE,
'dataset': "all"
}
# Keep retrieving records until unable to continue scrolling
while batch == 1 or params['scroll_token']:
# Pass the parameters object to the Person Search API
response = CLIENT.person.search(**params).json()
# Check for successful response
if response['status'] == 200:
# Add records retrieved to the records array
all_records.extend(response['data'])
# Store the scroll_token for next batch
params['scroll_token'] = response['scroll_token']
print(f"Batch {batch} success!")
sleep(6)
batch += 1
else:
print("Unable to continue scrolling.")
break
print("Done!")
return all_records
# Create an SQL query
SQL_QUERY = \
"""
SELECT * FROM person
WHERE location_region='oregon'
AND NOT location_locality='portland'
AND (
job_title LIKE '%dishwasher%'
OR (
job_title_role='marketing'
AND job_title_levels='manager'
)
)
AND linkedin_url IS NOT NULL;
"""
# Find all records matching query
recruiting_leads = get_all_pdl_records_sql(SQL_QUERY)
print(f"Got {len(recruiting_leads)} recruiting leads for my wealthy client!")
#GO make_money_with_data(recruiting_leads)!
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
// Create a client, specifying your API key
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
// Create an Elasticsearch query
const esQuery = {
"query": {
"bool": {
"must": [
{"term": {"location_region": "oregon"}},
{"bool": {
"should": [
{"match": {"job_title": "dishwasher"}},
{"bool": {
"must": [
{"term": {"job_title_role": "marketing"}},
{"term": {"job_title_levels": "manager"}}
]
}}
]
}
},
{"exists": {"field": "linkedin_url"}}
],
"must_not":[
{"term": {"location_locality": "portland"}},
]
}
}
}
// Store all records retreived in an array
var allRecords = [];
// The current scroll_token
var scrollToken = null;
var pageSize = 100;
var batch = 1;
// Create a parameters JSON object
var params = {
searchQuery: esQuery,
size: pageSize,
scroll_token: null,
dataset: "all"
}
// Run batches recursively
runBatch();
// Run Person searches in batches
function runBatch() {
// Store current scroll_token in parameters object
params.scroll_token = scrollToken;
// Pass the current parameters object to the Person Search API
PDLJSClient.person.search.elastic(params).then((data) => {
// Add records retrieved to the records array
Array.prototype.push.apply(allRecords, data.data);
// Store scroll_token from response
scrollToken = data['scroll_token'];
console.log(`Batch ${batch} success!`);
batch++;
// Run searches in batches with 6-second intervals
if (scrollToken) {
setTimeout(function() {
runBatch(params);
}, 6000);
}
}).catch((error) => {
console.log("Unable to continue scrolling.");
console.log("Done!");
console.log(`Got ${allRecords.length} recruiting leads for my wealthy client!`);
});
}
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
// Create a client, specifying your API key
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
// Create an SQL query
const sqlQuery = `SELECT * FROM person
WHERE location_region='oregon'
AND NOT location_locality='portland'
AND (
job_title LIKE '%dishwasher%'
OR (
job_title_role='marketing'
AND job_title_levels='manager'
)
)
AND linkedin_url IS NOT NULL;`;
// Store all records retreived in an array
var allRecords = [];
// The current scroll_token
var scrollToken = null;
var pageSize = 100;
var batch = 1;
// Create a parameters JSON object
var params = {
searchQuery: sqlQuery,
size: pageSize,
scroll_token: null,
dataset: "all"
}
// Run batches recursively
runBatch();
// Run Person searches in batches
function runBatch() {
// Store current scroll_token in parameters object
params.scroll_token = scrollToken;
// Pass the current parameters object to the Person Search API
PDLJSClient.person.search.sql(params).then((data) => {
// Add records retrieved to the records array
Array.prototype.push.apply(allRecords, data.data);
// Store scroll_token from response
scrollToken = data['scroll_token'];
console.log(`Batch ${batch} success!`);
batch++;
// Run searches in batches with 6-second intervals
if (scrollToken) {
setTimeout(function() {
runBatch(params);
}, 6000);
}
}).catch((error) => {
console.log("Unable to continue scrolling.");
console.log("Done!");
console.log(`Got ${allRecords.length} recruiting leads for my wealthy client!`);
});
}
require 'json'
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
# Set your API key
Peopledatalabs.api_key = 'YOUR API KEY'
PAGE_SIZE = 100
# Run Person searches in batches with 6-second intervals
def get_all_pdl_records_es(query)
# Store all records retreived in an array
all_records = []
batch = 1
# The current scroll_token
scroll_token = {}
# Keep retrieving records until unable to continue scrolling
while batch == 1 || !scroll_token.nil?
# Pass parameters to the Person Search API
response = Peopledatalabs::Search.person(searchType: 'elastic', query: query, size: PAGE_SIZE, scroll_token: scroll_token, dataset: "all")
# Check for successful response
if response['status'] == 200
# Add records retrieved to the records array
all_records += response['data']
# Store the scroll_token for next batch
scroll_token = response['scroll_token']
puts "Batch #{batch} success!"
sleep(6)
batch += 1
else
puts "Unable to continue scrolling."
break
end
end
puts "Done!"
return all_records
end
# Create an Elasticsearch query
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"location_region": "oregon"}},
{"bool": {
"should": [
{"match": {"job_title": "dishwasher"}},
{"bool": {
"must": [
{"term": {"job_title_role": "marketing"}},
{"term": {"job_title_levels": "manager"}}
]
}}
]
}
},
{"exists": {"field": "linkedin_url"}}
],
"must_not":[
{"term": {"location_locality": "portland"}},
]
}
}
}
# Find all records matching query
recruiting_leads = get_all_pdl_records_es(ES_QUERY)
puts "Got #{recruiting_leads.length()} recruiting leads for my wealthy client!"
#GO make_money_with_data(recruiting_leads)!
require 'json'
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
# Set your API key
Peopledatalabs.api_key = 'YOUR API KEY'
PAGE_SIZE = 100
# Run Person searches in batches with 6-second intervals
def get_all_pdl_records_sql(query)
# Store all records retreived in an array
all_records = []
batch = 1
# The current scroll_token
scroll_token = {}
# Keep retrieving records until unable to continue scrolling
while batch == 1 || !scroll_token.nil?
# Pass parameters to the Person Search API
response = Peopledatalabs::Search.person(searchType: 'sql', query: query, size: PAGE_SIZE, scroll_token: scroll_token, dataset: "all")
# Check for successful response
if response['status'] == 200
# Add records retrieved to the records array
all_records += response['data']
# Store the scroll_token for next batch
scroll_token = response['scroll_token']
puts "Batch #{batch} success!"
sleep(6)
batch += 1
else
puts "Unable to continue scrolling."
break
end
end
puts "Done!"
return all_records
end
# Create an SQL query
SQL_QUERY = \
"""
SELECT * FROM person
WHERE location_region='oregon'
AND NOT location_locality='portland'
AND (
job_title LIKE '%dishwasher%'
OR (
job_title_role='marketing'
AND job_title_levels='manager'
)
)
AND linkedin_url IS NOT NULL;
"""
# Find all records matching query
recruiting_leads = get_all_pdl_records_sql(SQL_QUERY)
puts "Got #{recruiting_leads.length()} recruiting leads for my wealthy client!"
#GO make_money_with_data(recruiting_leads)!
package main
import (
"fmt"
"time"
"context"
)
// See https://github.com/peopledatalabs/peopledatalabs-go
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
// Create an Elasticsearch query
elasticSearchQuery := map[string]interface{} {
"query": map[string]interface{} {
"bool": map[string]interface{} {
"must": []map[string]interface{} {
{"term": map[string]interface{} {"location_region": "oregon"}},
{"bool": map[string]interface{} {
"should": []map[string]interface{} {
{"match": map[string]interface{} {"job_title": "dishwasher"}},
{"bool": map[string]interface{} {
"must": []map[string]interface{} {
{"term": map[string]interface{} {"job_title_role": "marketing"}},
{"term": map[string]interface{} {"job_title_levels": "manager"}},
},
},
},
},
},
},
{"exists": map[string]interface{} {"field": "linkedin_url"}},
},
"must_not": []map[string]interface{} {
{"term": map[string]interface{} {"location_locality": "portland"}},
},
},
},
}
// Find all records matching query
recruitingLeads := getAllPdlRecordsEs(elasticSearchQuery)
fmt.Printf("Got %d recruiting leads for my wealthy client!\n", len(recruitingLeads))
//GO make_money_with_data(recruiting_leads)!
}
// Run Person searches in batches with 6-second intervals
func getAllPdlRecordsEs(query interface{}) []pdlmodel.Person {
// Store all records retreived in an array
var allRecords []pdlmodel.Person
batch := 1
// The current scroll_token
var scrollToken string
const pageSize = 100
// Set your API key
apiKey := "YOUR API KEY"
// Set API key as environmental variable
// apiKey := os.Getenv("API_KEY")
// Create a client, specifying your API key
client := pdl.New(apiKey)
// Create a parameters JSON object
p := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: pageSize,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
Query: query,
ScrollToken: scrollToken,
Dataset: "all",
},
}
// Keep retrieving records until unable to continue scrolling
for batch == 1 || scrollToken != "" {
// Pass the parameters object to the Person Search API
response, err := client.Person.Search(context.Background(), p)
// Check for successful response
if err == nil {
// Add records retrieved to the records array
allRecords = append(allRecords, response.Data...)
// Store scroll_token from response
p.SearchBaseParams.ScrollToken, scrollToken = response.ScrollToken, response.ScrollToken
fmt.Printf("Batch %d success!\n", batch)
time.Sleep(6 * time.Second)
batch++
} else {
fmt.Println("Unable to continue scrolling.")
break
}
}
fmt.Println("Done!")
return allRecords
}
package main
import (
"fmt"
"time"
"context"
)
// See https://github.com/peopledatalabs/peopledatalabs-go
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
// Create an SQL query
sqlQuery := "SELECT * FROM person" +
" WHERE location_region='oregon'" +
" AND NOT location_locality='portland'" +
" AND (" +
" job_title LIKE '%dishwasher%'" +
" OR (" +
" job_title_role='marketing'" +
" AND job_title_levels='manager'" +
" )" +
" )" +
" AND linkedin_url IS NOT NULL;"
// Find all records matching query
recruitingLeads := getAllPdlRecordsEs(sqlQuery)
fmt.Printf("Got %d recruiting leads for my wealthy client!\n", len(recruitingLeads))
//GO make_money_with_data(recruiting_leads)!
}
// Run Person searches in batches with 6-second intervals
func getAllPdlRecordsEs(query string) []pdlmodel.Person {
// Store all records retreived in an array
var allRecords []pdlmodel.Person
batch := 1
// The current scroll_token
var scrollToken string
const pageSize = 100
// Set your API key
apiKey := "YOUR API KEY"
// Set API key as environmental variable
// apiKey := os.Getenv("API_KEY")
// Create a client, specifying your API key
client := pdl.New(apiKey)
// Create a parameters JSON object
p := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: pageSize,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
SQL: query,
ScrollToken: scrollToken,
Dataset: "all",
},
}
// Keep retrieving records until unable to continue scrolling
for batch == 1 || scrollToken != "" {
// Pass the parameters object to the Person Search API
response, err := client.Person.Search(context.Background(), p)
// Check for successful response
if err == nil {
// Add records retrieved to the records array
allRecords = append(allRecords, response.Data...)
// Store scroll_token from response
p.SearchBaseParams.ScrollToken, scrollToken = response.ScrollToken, response.ScrollToken
fmt.Printf("Batch %d success!\n", batch)
time.Sleep(6 * time.Second)
batch++
} else {
fmt.Println("Unable to continue scrolling.")
break
}
}
fmt.Println("Done!")
return allRecords
}
from time import sleep
import requests, json
# Set your API key
API_KEY = "YOUR API KEY"
# Set the Person Search API URL
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
# Set headers
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
# Run Person searches in batches with 6-second intervals
def get_all_pdl_records_es(query):
PAGE_SIZE = 100
# Store all records retreived in an array
all_records = []
batch = 1
# Create a parameters JSON object
params = {
'query': json.dumps(query),
'size': PAGE_SIZE,
'dataset': "all"
}
# Keep retrieving records until unable to continue scrolling
while batch == 1 or params['scroll_token']:
# Pass the parameters object to the Person Search API
response = requests.get(
PDL_URL,
headers=HEADERS,
params=params
).json()
# Check for successful response
if response['status'] == 200:
# Add records retrieved to the records array
all_records.extend(response['data'])
# Store the scroll_token for next batch
params['scroll_token'] = response['scroll_token']
print(f"Batch {batch} success!")
sleep(6)
batch += 1
else:
print("Unable to continue scrolling.")
break
print("Done!")
return all_records
if __name__ == '__main__':
# Create an Elasticsearch query
ES_QUERY = {
"query": {
"bool": {
"must": [
{"term": {"location_region": "oregon"}},
{"bool": {
"should": [
{"match": {"job_title": "dishwasher"}},
{"bool": {
"must": [
{"term": {"job_title_role": "marketing"}},
{"term": {"job_title_levels": "manager"}}
]
}}
]
}
},
{"exists": {"field": "linkedin_url"}}
],
"must_not":[
{"term": {"location_locality": "portland"}},
]
}
}
}
# Find all records matching query
recruiting_leads = get_all_pdl_records_es(ES_QUERY)
print(f"Got {len(recruiting_leads)} recruiting leads for my wealthy client!")
#GO make_money_with_data(recruiting_leads)!
from time import sleep
import requests
# Set your API key
API_KEY = "YOUR API KEY"
# Set the Person Search API URL
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
# Set headers
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
# Run Person searches in batches with 6-second intervals
def get_all_pdl_records_sql(query):
PAGE_SIZE = 100
# Store all records retreived in an array
all_records = []
batch = 1
# Create a parameters JSON object
params = {
'sql': query,
'size': PAGE_SIZE,
'dataset': "all"
}
# Keep retrieving records until unable to continue scrolling
while batch == 1 or params['scroll_token']:
# Pass the parameters object to the Person Search API
response = requests.get(
PDL_URL,
headers=HEADERS,
params=params
).json()
# Check for successful response
if response['status'] == 200:
# Add records retrieved to the records array
all_records.extend(response['data'])
# Store the scroll_token for next batch
params['scroll_token'] = response['scroll_token']
print(f"Batch {batch} success!")
sleep(6)
batch += 1
else:
print("Unable to continue scrolling.")
break
batch += 1
print("Done!")
return all_records
# Create an SQL query
SQL_QUERY = \
"""
SELECT * FROM person
WHERE location_region='oregon'
AND NOT location_locality='portland'
AND (
job_title LIKE '%dishwasher%'
OR (
job_title_role='marketing'
AND job_title_levels='manager'
)
)
AND linkedin_url IS NOT NULL;
"""
# Find all records matching query
recruiting_leads = get_all_pdl_records_sql(SQL_QUERY)
print(f"got {len(recruiting_leads)} recruiting leads for my wealthy client!")
#GO make_money_with_data(recruiting_leads)!
Ads
"I want to sell yachts to rich people through ads on Facebook."
from time import sleep
import json
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Create a client, specifying your API key
CLIENT = PDLPY(
api_key="YOUR API KEY",
)
# Run Person searches in batches with 6-second intervals
def get_all_pdl_records_es(query):
PAGE_SIZE = 100
# Store all records retreived in an array
all_records = []
batch = 1
# Create a parameters JSON object
params = {
'query': query,
'size': PAGE_SIZE,
'dataset': "all"
}
# Keep retrieving records until unable to continue scrolling
while batch == 1 or params['scroll_token']:
# Pass the parameters object to the Person Search API
response = CLIENT.person.search(**params).json()
# Check for successful response
if response['status'] == 200:
# Add records retrieved to the records array
all_records.extend(response['data'])
# Store the scroll_token for next batch
params['scroll_token'] = response['scroll_token']
print(f"Batch {batch} success!")
sleep(6)
batch += 1
else:
print("Unable to continue scrolling.")
break
print("Done!")
return all_records
if __name__ == '__main__':
# Create an Elasticsearch query
ES_QUERY = {
"query": {
"bool": {
"must": [
{"exists": {"field": "facebook_id"}},
{"prefix": {"interests": "yacht"}},
{"term": {"inferred_salary": ">250,000"}}
]
}
}
}
# Find all records matching query
rich_yacht_people = get_all_pdl_records_es(ES_QUERY)
print(f"Got {len(rich_yacht_people)} rich yacht people for my wealthy client!")
#GO make_money_with_data(rich_yacht_people)!
from time import sleep
import json
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Create a client, specifying your API key
CLIENT = PDLPY(
api_key="YOUR API KEY",
)
# Run Person searches in batches with 6-second intervals
def get_all_pdl_records_sql(query):
#runs search in batches with 6 second intervals
PAGE_SIZE = 100
# Store all records retreived in an array
all_records = []
batch = 1
# Create a parameters JSON object
params = {
'sql': query,
'size': PAGE_SIZE,
'dataset': "all"
}
# Keep retrieving records until unable to continue scrolling
while batch == 1 or params['scroll_token']:
# Pass the parameters object to the Person Search API
response = CLIENT.person.search(**params).json()
# Check for successful response
if response['status'] == 200:
# Add records retrieved to the records array
all_records.extend(response['data'])
# Store the scroll_token for next batch
params['scroll_token'] = response['scroll_token']
print(f"Batch {batch} success!")
sleep(6)
batch += 1
else:
print("Unable to continue scrolling.")
break
print("Done!")
return all_records
# Create an SQL query
SQL_QUERY = \
"""
SELECT * FROM person
WHERE facebook_id IS NOT NULL
AND interests LIKE 'yacht%'
AND inferred_salary='>250,000';
"""
# Find all records matching query
rich_yacht_people = get_all_pdl_records_sql(SQL_QUERY)
print(f"got {len(rich_yacht_people)} rich yacht people for my wealthy client!")
#GO make_money_with_data(rich_yacht_people)
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
// Create a client, specifying your API key
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
// Create an Elasticsearch query
const esQuery = {
"query": {
"bool": {
"must": [
{"exists": {"field": "facebook_id"}},
{"prefix": {"interests": "yacht"}},
{"term": {"inferred_salary": ">250,000"}}
]
}
}
}
// Store all records retreived in an array
var allRecords = [];
// The current scroll_token
var scrollToken = null;
var pageSize = 100;
var batch = 1;
// Create a parameters JSON object
var params = {
searchQuery: esQuery,
size: pageSize,
scroll_token: null,
dataset: "all"
}
// Run batches recursively
runBatch();
// Run Person searches in batches
function runBatch() {
// Store current scroll_token in parameters object
params.scroll_token = scrollToken;
// Pass the current parameters object to the Person Search API
PDLJSClient.person.search.elastic(params).then((data) => {
// Add records retrieved to the records array
Array.prototype.push.apply(allRecords, data.data);
// Store scroll_token from response
scrollToken = data['scroll_token'];
console.log(`Batch ${batch} success!`);
batch++;
// Run searches in batches with 6-second intervals
if (scrollToken) {
setTimeout(function() {
runBatch(params);
}, 6000);
}
}).catch((error) => {
console.log("Unable to continue scrolling.");
console.log("Done!");
console.log(`Got ${allRecords.length} rich yacht people for my wealthy client!`);
});
}
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
// Create a client, specifying your API key
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
// Create an SQL query
const sqlQuery = `SELECT * FROM person
WHERE facebook_id IS NOT NULL
AND interests LIKE 'yacht%'
AND inferred_salary='>250,000';`;
// Store all records retreived in an array
var allRecords = [];
// The current scroll_token
var scrollToken = null;
var pageSize = 100;
var batch = 1;
// Create a parameters JSON object
var params = {
searchQuery: sqlQuery,
size: pageSize,
scroll_token: null,
dataset: "all"
}
// Run batches recursively
runBatch();
// Run Person searches in batches
function runBatch() {
// Store current scroll_token in parameters object
params.scroll_token = scrollToken;
// Pass the current parameters object to the Person Search API
PDLJSClient.person.search.sql(params).then((data) => {
// Add records retrieved to the records array
Array.prototype.push.apply(allRecords, data.data);
// Store scroll_token from response
scrollToken = data['scroll_token'];
console.log(`Batch ${batch} success!`);
batch++;
// Run searches in batches with 6-second intervals
if (scrollToken) {
setTimeout(function() {
runBatch(params);
}, 6000);
}
}).catch((error) => {
console.log("Unable to continue scrolling.");
console.log("Done!");
console.log(`Got ${allRecords.length} rich yacht people for my wealthy client!`);
});
}
require 'json'
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
# Set your API key
Peopledatalabs.api_key = 'YOUR API KEY'
PAGE_SIZE = 100
# Run Person searches in batches with 6-second intervals
def get_all_pdl_records_es(query)
# Store all records retreived in an array
all_records = []
batch = 1
# The current scroll_token
scroll_token = {}
# Keep retrieving records until unable to continue scrolling
while batch == 1 || !scroll_token.nil?
# Pass parameters to the Person Search API
response = Peopledatalabs::Search.person(searchType: 'elastic', query: query, size: PAGE_SIZE, scroll_token: scroll_token, dataset: "all")
# Check for successful response
if response['status'] == 200
# Add records retrieved to the records array
all_records += response['data']
# Store the scroll_token for next batch
scroll_token = response['scroll_token']
puts "Batch #{batch} success!"
sleep(6)
batch += 1
else
puts "Unable to continue scrolling."
break
end
end
puts "Done!"
return all_records
end
# Create an Elasticsearch query
ES_QUERY = {
"query": {
"bool": {
"must": [
{"exists": {"field": "facebook_id"}},
{"prefix": {"interests": "yacht"}},
{"term": {"inferred_salary": ">250,000"}}
]
}
}
}
# Find all records matching query
rich_yacht_people = get_all_pdl_records_es(ES_QUERY)
puts "Got #{rich_yacht_people.length()} rich yacht people for my wealthy client!"
#GO make_money_with_data(rich_yacht_people)!
require 'json'
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
# Set your API key
Peopledatalabs.api_key = 'YOUR API KEY'
PAGE_SIZE = 100
# Run Person searches in batches with 6-second intervals
def get_all_pdl_records_sql(query)
# Store all records retreived in an array
all_records = []
batch = 1
# The current scroll_token
scroll_token = {}
# Keep retrieving records until unable to continue scrolling
while batch == 1 || !scroll_token.nil?
# Pass parameters to the Person Search API
response = Peopledatalabs::Search.person(searchType: 'sql', query: query, size: PAGE_SIZE, scroll_token: scroll_token, dataset: "all")
# Check for successful response
if response['status'] == 200
# Add records retrieved to the records array
# Store the scroll_token for next batch
all_records += response['data']
scroll_token = response['scroll_token']
puts "Batch #{batch} success!"
sleep(6)
batch += 1
else
puts "Unable to continue scrolling."
break
end
end
puts "Done!"
return all_records
end
# Create an SQL query
SQL_QUERY = \
"""
SELECT * FROM person
WHERE facebook_id IS NOT NULL
AND interests LIKE 'yacht%'
AND inferred_salary='>250,000';
"""
# Find all records matching query
rich_yacht_people = get_all_pdl_records_sql(SQL_QUERY)
puts "Got #{rich_yacht_people.length()} rich yacht people for my wealthy client!"
#GO make_money_with_data(rich_yacht_people)!
package main
import (
"fmt"
"time"
"context"
)
// See https://github.com/peopledatalabs/peopledatalabs-go
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
// Create an Elasticsearch query
elasticSearchQuery := map[string]interface{} {
"query": map[string]interface{} {
"bool": map[string]interface{} {
"must": []map[string]interface{} {
{"exists": map[string]interface{}{"field": "facebook_id"}},
{"prefix": map[string]interface{}{"interests": "yacht"}},
{"term": map[string]interface{}{"inferred_salary": ">250,000"}},
},
},
},
}
// Find all records matching query
richYachtPeople := getAllPdlRecordsEs(elasticSearchQuery)
fmt.Printf("Got %d rich yacht people for my wealthy client!\n", len(richYachtPeople))
//GO make_money_with_data(rich_yacht_people)!
}
// Run Person searches in batches with 6-second intervals
func getAllPdlRecordsEs(query interface{}) []pdlmodel.Person {
// Store all records retreived in an array
var allRecords []pdlmodel.Person
batch := 1
// The current scroll_token
var scrollToken string
const pageSize = 100
// Set your API key
apiKey := "YOUR API KEY"
// Set API key as environmental variable
// apiKey := os.Getenv("API_KEY")
// Create a client, specifying your API key
client := pdl.New(apiKey)
// Create a parameters JSON object
p := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: pageSize,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
Query: query,
ScrollToken: scrollToken,
Dataset: "all",
},
}
// Keep retrieving records until unable to continue scrolling
for batch == 1 || scrollToken != "" {
// Pass the parameters object to the Person Search API
response, err := client.Person.Search(context.Background(), p)
// Check for successful response
if err == nil {
// Add records retrieved to the records array
allRecords = append(allRecords, response.Data...)
// Store scroll_token from response
p.SearchBaseParams.ScrollToken, scrollToken = response.ScrollToken, response.ScrollToken
fmt.Printf("Batch %d success!\n", batch)
time.Sleep(6 * time.Second)
batch++
} else {
fmt.Println("Unable to continue scrolling.")
break
}
}
fmt.Println("Done!")
return allRecords
}
package main
import (
"fmt"
"time"
"context"
)
// See https://github.com/peopledatalabs/peopledatalabs-go
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
// Create an SQL query
sqlQuery := "SELECT * FROM person" +
" WHERE facebook_id IS NOT NULL" +
" AND interests LIKE 'yacht%'" +
" AND inferred_salary='>250,000';"
// Find all records matching query
richYachtPeople := getAllPdlRecordsEs(sqlQuery)
fmt.Printf("Got %d rich yacht people for my wealthy client!\n", len(richYachtPeople))
//GO make_money_with_data(rich_yacht_people)!
}
// Run Person searches in batches with 6-second intervals
func getAllPdlRecordsEs(query string) []pdlmodel.Person {
// Store all records retreived in an array
var allRecords []pdlmodel.Person
batch := 1
// The current scroll_token
var scrollToken string
const pageSize = 100
// Set your API key
apiKey := "YOUR API KEY"
// Set API key as environmental variable
// apiKey := os.Getenv("API_KEY")
// Create a client, specifying your API key
client := pdl.New(apiKey)
// Create a parameters JSON object
p := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: pageSize,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
SQL: query,
ScrollToken: scrollToken,
Dataset: "all",
},
}
// Keep retrieving records until unable to continue scrolling
for batch == 1 || scrollToken != "" {
// Pass the parameters object to the Person Search API
response, err := client.Person.Search(context.Background(), p)
// Check for successful response
if err == nil {
// Add records retrieved to the records array
allRecords = append(allRecords, response.Data...)
// Store scroll_token from response
p.SearchBaseParams.ScrollToken, scrollToken = response.ScrollToken, response.ScrollToken
fmt.Printf("Batch %d success!\n", batch)
time.Sleep(6 * time.Second)
batch++
} else {
fmt.Println("Unable to continue scrolling.")
break
}
}
fmt.Println("Done!")
return allRecords
}
from time import sleep
import requests, json
# Set your API key
API_KEY = "YOUR API KEY"
# Set the Person Search API URL
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
# Set headers
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
# Run Person searches in batches with 6-second intervals
def get_all_pdl_records_es(query):
PAGE_SIZE = 100
# Store all records retreived in an array
all_records = []
batch = 1
# Create a parameters JSON object
params = {
'query': json.dumps(query),
'size': PAGE_SIZE,
'dataset': "all"
}
# Keep retrieving records until unable to continue scrolling
while batch == 1 or params['scroll_token']:
# Pass the parameters object to the Person Search API
response = requests.get(
PDL_URL,
headers=HEADERS,
params=params
).json()
# Check for successful response
if response['status'] == 200:
# Add records retrieved to the records array
all_records.extend(response['data'])
# Store the scroll_token for next batch
params['scroll_token'] = response['scroll_token']
print(f"Batch {batch} success!")
sleep(6)
batch += 1
else:
print("Unable to continue scrolling.")
break
print("Done!")
return all_records
if __name__ == '__main__':
# Create an Elasticsearch query
ES_QUERY = {
"query": {
"bool": {
"must": [
{"exists": {"field": "facebook_id"}},
{"prefix": {"interests": "yacht"}},
{"term": {"inferred_salary": ">250,000"}}
]
}
}
}
# Find all records matching query
rich_yacht_people = get_all_pdl_records_es(ES_QUERY)
print(f"Got {len(rich_yacht_people)} rich yacht people for my wealthy client!")
#GO make_money_with_data(rich_yacht_people)!
from time import sleep
import requests, json
# Set your API key
API_KEY = "YOUR API KEY"
# Set the Person Search API URL
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
# Set headers
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
# Run Person searches in batches with 6-second intervals
def get_all_pdl_records_sql(query):
PAGE_SIZE = 100
# Store all records retreived in an array
all_records = []
batch = 1
# Create a parameters JSON object
params = {
'sql': query,
'size': PAGE_SIZE,
'dataset': "all"
}
# Keep retrieving records until unable to continue scrolling
while batch == 1 or params['scroll_token']:
# Pass the parameters object to the Person Search API
response = requests.get(
PDL_URL,
headers=HEADERS,
params=params
).json()
# Check for successful response
if response['status'] == 200:
# Add records retrieved to the records array
all_records.extend(response['data'])
# Store the scroll_token for next batch
params['scroll_token'] = response['scroll_token']
print(f"Batch {batch} success!")
sleep(6)
batch += 1
else:
print("Unable to continue scrolling.")
break
print("Done!")
return all_records
# Create an SQL query
SQL_QUERY = \
"""
SELECT * FROM person
WHERE facebook_id IS NOT NULL
AND interests LIKE 'yacht%'
AND inferred_salary='>250,000';
"""
# Find all records matching query
rich_yacht_people = get_all_pdl_records_sql(SQL_QUERY)
print(f"got {len(rich_yacht_people)} rich yacht people for my wealthy client!")
#GO make_money_with_data(rich_yacht_people)
Customer Insights
"I want information about my biggest customer (Zenefits.)"
from time import sleep
import json
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Create a client, specifying your API key
CLIENT = PDLPY(
api_key="YOUR API KEY",
)
# Run Person searches in batches with 6-second intervals
def get_all_pdl_records_es(query):
PAGE_SIZE = 100
# Store all records retreived in an array
all_records = []
batch = 1
# Create a parameters JSON object
params = {
'query': query,
'size': PAGE_SIZE,
'dataset': "all"
}
# Keep retrieving records until unable to continue scrolling
while batch == 1 or params['scroll_token']:
# Pass the parameters object to the Person Search API
response = CLIENT.person.search(**params).json()
# Check for successful response
if response['status'] == 200:
# Add records retrieved to the records array
all_records.extend(response['data'])
# Store the scroll_token for next batch
params['scroll_token'] = response['scroll_token']
print(f"Batch {batch} success!")
sleep(6)
batch += 1
else:
print("Unable to continue scrolling.")
break
print("Done!")
return all_records
if __name__ == '__main__':
# Create an Elasticsearch query
ES_QUERY = {
"query": {
"term": {"job_company_website": "zenefits.com"}
}
}
# Find all records matching query
all_zenefits_employees = get_all_pdl_records_es(ES_QUERY)
# Create aggregate objects
skills_agg = {}
titles_agg = {}
schools_agg = {}
other_companies_agg = {}
# Iterate through records array for aggregation
for record in all_zenefits_employees:
# Aggregate skills
for skill in record['skills']:
skills_agg.setdefault(skill, 0)
skills_agg[skill] += 1
# Aggregate job titles
if record['job_title']:
titles_agg.setdefault(record['job_title'], 0)
titles_agg[record['job_title']] += 1
# Aggregate schools
for edu in record['education']:
if edu['school'] and edu['school']['type'] == "post-secondary institution":
schools_agg.setdefault(edu['school']['name'], 0)
schools_agg[edu['school']['name']] += 1
# Aggregate other companies
for exp in record['experience']:
if exp['company'] and exp['company']['name'] != 'zenefits':
other_companies_agg.setdefault(exp['company']['name'], 0)
other_companies_agg[exp['company']['name']] += 1
# Sort aggregate objects by count and print top 10 for each
print("Top 10 skills for zenefits employees:")
for skill, count in sorted(
skills_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, skill)
print("Top 10 titles for zenefits employees:")
for title, count in sorted(
titles_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, title)
print("Top 10 universities for zenefits employees:")
for school, count in sorted(
schools_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, school)
print("Top 10 former companies for zenefits employees:")
for company, count in sorted(
other_companies_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, company)
from time import sleep
import json
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Create a client, specifying your API key
CLIENT = PDLPY(
api_key="YOUR API KEY",
)
# Run Person searches in batches with 6-second intervals
def get_all_pdl_records_sql(query):
PAGE_SIZE = 100
# Store all records retreived in an array
all_records = []
batch = 1
# Create a parameters JSON object
params = {
'sql': query,
'size': PAGE_SIZE,
'dataset': "all"
}
# Keep retrieving records until unable to continue scrolling
while batch == 1 or params['scroll_token']:
# Pass the parameters object to the Person Search API
response = CLIENT.person.search(**params).json()
# Check for successful response
if response['status'] == 200:
# Add records retrieved to the records array
all_records.extend(response['data'])
# Store the scroll_token for next batch
params['scroll_token'] = response['scroll_token']
print(f"Batch {batch} success!")
sleep(6)
batch += 1
else:
print("Unable to continue scrolling.")
break
print("Done!")
return all_records
# Create an SQL query
SQL_QUERY = \
"""
SELECT * FROM person
WHERE job_company_website='zenefits.com';
"""
# Find all records matching query
all_zenefits_employees = get_all_pdl_records_sql(SQL_QUERY)
# Create aggregate objects
skills_agg = {}
titles_agg = {}
schools_agg = {}
other_companies_agg = {}
# Iterate through records array for aggregation
for record in all_zenefits_employees:
# Aggregate skills
for skill in record['skills']:
skills_agg.setdefault(skill, 0)
skills_agg[skill] += 1
# Aggregate job titles
if record['job_title']:
titles_agg.setdefault(record['job_title'], 0)
titles_agg[record['job_title']] += 1
# Aggregate schools
for edu in record['education']:
if edu['school'] and edu['school']['type'] == "post-secondary institution":
schools_agg.setdefault(edu['school']['name'], 0)
schools_agg[edu['school']['name']] += 1
# Aggregate other companies
for exp in record['experience']:
if exp['company'] and exp['company']['name'] != 'zenefits':
other_companies_agg.setdefault(exp['company']['name'], 0)
other_companies_agg[exp['company']['name']] += 1
# Sort aggregate objects by count and print top 10 for each
print("Top 10 skills for zenefits employees:")
for skill, count in sorted(
skills_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, skill)
print("Top 10 titles for zenefits employees:")
for title, count in sorted(
titles_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, title)
print("Top 10 universities for zenefits employees:")
for school, count in sorted(
schools_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, school)
print("Top 10 former companies for zenefits employees:")
for company, count in sorted(
other_companies_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, company)
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
// Create a client, specifying your API key
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
// Create an Elasticsearch query
const esQuery = {
"query": {
"term": {"job_company_website": "zenefits.com"}
}
}
// Store all records retreived in an array
var allRecords = [];
// The current scroll_token
var scrollToken = null;
var pageSize = 100;
var batch = 1;
// Create a parameters JSON object
var params = {
searchQuery: esQuery,
size: pageSize,
scroll_token: null,
dataset: "all"
}
// Run batches recursively
runBatch();
// Run Person searches in batches
function runBatch() {
// Store current scroll_token in parameters object
params.scroll_token = scrollToken;
// Pass the current parameters object to the Person Search API
PDLJSClient.person.search.elastic(params).then((data) => {
// Add records retrieved to the records array
Array.prototype.push.apply(allRecords, data.data);
// Store scroll_token from response
scrollToken = data['scroll_token'];
console.log(`Batch ${batch} success!`);
batch++;
// Run searches in batches with 6-second intervals
if (scrollToken) {
setTimeout(function() {
runBatch(params);
}, 6000);
}
}).catch((error) => {
console.log("Unable to continue scrolling.");
console.log("Done!");
printResults();
});
}
// Aggregate, sort and print data
function printResults() {
// Create aggregate objects
var skillsAgg = {};
var titlesAgg = {};
var schoolsAgg = {};
var otherCompaniesAgg = {};
// Iterate through records array for aggregation
for (let record in allRecords) {
// Aggregate skills
for (let skill in allRecords[record]["skills"]) {
if (!skillsAgg[allRecords[record]["skills"][skill]]) {
skillsAgg[allRecords[record]["skills"][skill]] = 0;
}
skillsAgg[allRecords[record]["skills"][skill]]++;
}
// Aggregate job titles
if (allRecords[record]["job_title"]) {
if (!titlesAgg[allRecords[record]["job_title"]]) {
titlesAgg[allRecords[record]["job_title"]] = 0;
}
titlesAgg[allRecords[record]["job_title"]]++;
}
// Aggregate schools
for (let edu in allRecords[record]["education"]) {
if (allRecords[record]["education"][edu]["school"] &&
allRecords[record]["education"][edu]["school"]["type"] == "post-secondary institution") {
if (!schoolsAgg[allRecords[record]["education"][edu]["school"]["name"]]) {
schoolsAgg[allRecords[record]["education"][edu]["school"]["name"]] = 0;
}
schoolsAgg[allRecords[record]["education"][edu]["school"]["name"]]++;
}
}
// Aggregate other companies
for (let exp in allRecords[record]["experience"]) {
if (allRecords[record]["experience"][exp]["company"] &&
allRecords[record]["experience"][exp]["company"]["name"] != "zenefits") {
if (!otherCompaniesAgg[allRecords[record]["experience"][exp]["company"]["name"]]) {
otherCompaniesAgg[allRecords[record]["experience"][exp]["company"]["name"]] = 0;
}
otherCompaniesAgg[allRecords[record]["experience"][exp]["company"]["name"]]++
}
}
}
console.log("Top 10 skills for zenefits employees:");
sortAndPrint(skillsAgg);
console.log("Top 10 titles for zenefits employees:");
sortAndPrint(titlesAgg);
console.log("Top 10 universities for zenefits employees:");
sortAndPrint(schoolsAgg);
console.log("Top 10 former companies for zenefits employees:");
sortAndPrint(otherCompaniesAgg);
}
// Sort object and print top 10
function sortAndPrint(object) {
var sortable = [];
for (let field in object) {
sortable.push([field, object[field]]);
}
sortable.sort(function(a, b) {
return b[1] - a[1];
});
for (let i = 0; i < 10; i++) {
console.log(sortable[i][0]);
}
}
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
// Create a client, specifying your API key
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
// Create an SQL query
const sqlQuery = `SELECT * FROM person
WHERE job_company_website='zenefits.com';`;
// Store all records retreived in an array
var allRecords = [];
// The current scroll_token
var scrollToken = null;
var pageSize = 100;
var batch = 1;
// Create a parameters JSON object
var params = {
searchQuery: sqlQuery,
size: pageSize,
scroll_token: null,
dataset: "all"
}
// Run batches recursively
runBatch();
// Run Person searches in batches
function runBatch() {
// Store current scroll_token in parameters object
params.scroll_token = scrollToken;
// Pass the current parameters object to the Person Search API
PDLJSClient.person.search.sql(params).then((data) => {
// Add records retrieved to the records array
Array.prototype.push.apply(allRecords, data.data);
// Store scroll_token from response
scrollToken = data['scroll_token'];
console.log(`Batch ${batch} success!`);
batch++;
// Run searches in batches with 6-second intervals
if (scrollToken) {
setTimeout(function() {
runBatch(params);
}, 6000);
}
}).catch((error) => {
console.log("Unable to continue scrolling.");
console.log("Done!");
printResults();
});
}
// Aggregate, sort and print data
function printResults() {
// Create aggregate objects
var skillsAgg = {};
var titlesAgg = {};
var schoolsAgg = {};
var otherCompaniesAgg = {};
// Iterate through records array for aggregation
for (let record in allRecords) {
// Aggregate skills
for (let skill in allRecords[record]["skills"]) {
if (!skillsAgg[allRecords[record]["skills"][skill]]) {
skillsAgg[allRecords[record]["skills"][skill]] = 0;
}
skillsAgg[allRecords[record]["skills"][skill]]++;
}
// Aggregate job titles
if (allRecords[record]["job_title"]) {
if (!titlesAgg[allRecords[record]["job_title"]]) {
titlesAgg[allRecords[record]["job_title"]] = 0;
}
titlesAgg[allRecords[record]["job_title"]]++;
}
// Aggregate schools
for (let edu in allRecords[record]["education"]) {
if (allRecords[record]["education"][edu]["school"] &&
allRecords[record]["education"][edu]["school"]["type"] == "post-secondary institution") {
if (!schoolsAgg[allRecords[record]["education"][edu]["school"]["name"]]) {
schoolsAgg[allRecords[record]["education"][edu]["school"]["name"]] = 0;
}
schoolsAgg[allRecords[record]["education"][edu]["school"]["name"]]++;
}
}
// Aggregate other companies
for (let exp in allRecords[record]["experience"]) {
if (allRecords[record]["experience"][exp]["company"] &&
allRecords[record]["experience"][exp]["company"]["name"] != "zenefits") {
if (!otherCompaniesAgg[allRecords[record]["experience"][exp]["company"]["name"]]) {
otherCompaniesAgg[allRecords[record]["experience"][exp]["company"]["name"]] = 0;
}
otherCompaniesAgg[allRecords[record]["experience"][exp]["company"]["name"]]++
}
}
}
console.log("Top 10 skills for zenefits employees:");
sortAndPrint(skillsAgg);
console.log("Top 10 titles for zenefits employees:");
sortAndPrint(titlesAgg);
console.log("Top 10 universities for zenefits employees:");
sortAndPrint(schoolsAgg);
console.log("Top 10 former companies for zenefits employees:");
sortAndPrint(otherCompaniesAgg);
}
// Sort object and print top 10
function sortAndPrint(object) {
var sortable = [];
for (let field in object) {
sortable.push([field, object[field]]);
}
sortable.sort(function(a, b) {
return b[1] - a[1];
});
for (let i = 0; i < 10; i++) {
console.log(sortable[i][0]);
}
}
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
# Set your API key
Peopledatalabs.api_key = 'YOUR API KEY'
PAGE_SIZE = 100
# Run Person searches in batches with 6-second intervals
def get_all_pdl_records_es(query)
# Store all records retreived in an array
all_records = []
batch = 1
# The current scroll_token
scroll_token = {}
# Keep retrieving records until unable to continue scrolling
while batch == 1 || !scroll_token.nil?
# Pass parameters to the Person Search API
response = Peopledatalabs::Search.person(searchType: 'elastic', query: query, size: PAGE_SIZE, scroll_token: scroll_token, dataset: "all")
# Check for successful response
if response['status'] == 200
# Add records retrieved to the records array
all_records += response['data']
# Store the scroll_token for next batch
scroll_token = response['scroll_token']
puts "Batch #{batch} success!"
sleep(6)
batch += 1
else
puts "Unable to continue scrolling."
break
end
end
puts "Done!"
return all_records
end
# Create an Elasticsearch query
ES_QUERY = {
"query": {
"term": {"job_company_website": "zenefits.com"}
}
}
# Find all records matching query
all_zenefits_employees = get_all_pdl_records_es(ES_QUERY)
# Create aggregate objects
skills_agg = {}
titles_agg = {}
schools_agg = {}
other_companies_agg = {}
# Iterate through records array for aggregation
all_zenefits_employees.each do |record|
# Aggregate skills
record['skills'].each do |skill|
skills_agg[skill] = skills_agg.fetch(skill, 0)
skills_agg[skill] += 1
end
# Aggregate job titles
if record.key?('job_title')
titles_agg[record['job_title']] = titles_agg.fetch(record['job_title'], 0)
titles_agg[record['job_title']] += 1
end
# Aggregate schools
record['education'].each do |edu|
if edu.key?('school') && !edu['school'].nil? && edu['school']['type'] == "post-secondary institution"
schools_agg[edu['school']['name']] = schools_agg.fetch(edu['school']['name'], 0)
schools_agg[edu['school']['name']] += 1
end
end
# Aggregate other companies
record['experience'].each do |exp|
if exp.key?('company') && !exp['company'].nil? && exp['company']['name'] != 'zenefits'
other_companies_agg[exp['company']['name']] = other_companies_agg.fetch(exp['company']['name'], 0)
other_companies_agg[exp['company']['name']] += 1
end
end
end
# Sort aggregate objects by count and print top 10 for each
puts "Top 10 skills for zenefits employees:"
skills_agg.sort_by(&:last).reverse.first(10).each { |key, value| puts "#{key} #{value}" }
puts "Top 10 titles for zenefits employees:"
titles_agg.sort_by(&:last).reverse.first(10).each { |key, value| puts "#{key} #{value}" }
puts "Top 10 universities for zenefits employees:"
schools_agg.sort_by(&:last).reverse.first(10).each { |key, value| puts "#{key} #{value}" }
puts "Top 10 former companies for zenefits employees:"
other_companies_agg.sort_by(&:last).reverse.first(10).each { |key, value| puts "#{key} #{value}" }
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
# Set your API key
Peopledatalabs.api_key = 'YOUR API KEY'
PAGE_SIZE = 100
# Run Person searches in batches with 6-second intervals
def get_all_pdl_records_sql(query)
# Store all records retreived in an array
all_records = []
batch = 1
# The current scroll_token
scroll_token = {}
# Keep retrieving records until unable to continue scrolling
while batch == 1 || !scroll_token.nil?
# Pass parameters to the Person Search API
response = Peopledatalabs::Search.person(searchType: 'sql', query: query, size: PAGE_SIZE, scroll_token: scroll_token, dataset: "all")
# Check for successful response
if response['status'] == 200
# Add records retrieved to the records array
all_records += response['data']
# Store the scroll_token for next batch
scroll_token = response['scroll_token']
puts "Batch #{batch} success!"
sleep(6)
batch += 1
else
puts "Unable to continue scrolling."
break
end
end
puts "Done!"
return all_records
end
# Create an SQL query
SQL_QUERY = \
"""
SELECT * FROM person
WHERE job_company_website='zenefits.com';
"""
# Find all records matching query
all_zenefits_employees = get_all_pdl_records_sql(SQL_QUERY)
# Create aggregate objects
skills_agg = {}
titles_agg = {}
schools_agg = {}
other_companies_agg = {}
# Iterate through records array for aggregation
all_zenefits_employees.each do |record|
# Aggregate skills
record['skills'].each do |skill|
skills_agg[skill] = skills_agg.fetch(skill, 0)
skills_agg[skill] += 1
end
# Aggregate job titles
if record.key?('job_title')
titles_agg[record['job_title']] = titles_agg.fetch(record['job_title'], 0)
titles_agg[record['job_title']] += 1
end
# Aggregate schools
record['education'].each do |edu|
if edu.key?('school') && !edu['school'].nil? && edu['school']['type'] == "post-secondary institution"
schools_agg[edu['school']['name']] = schools_agg.fetch(edu['school']['name'], 0)
schools_agg[edu['school']['name']] += 1
end
end
# Aggregate other companies
record['experience'].each do |exp|
if exp.key?('company') && !exp['company'].nil? && exp['company']['name'] != 'zenefits'
other_companies_agg[exp['company']['name']] = other_companies_agg.fetch(exp['company']['name'], 0)
other_companies_agg[exp['company']['name']] += 1
end
end
end
# Sort aggregate objects by count and print top 10 for each
puts "Top 10 skills for zenefits employees:"
skills_agg.sort_by(&:last).reverse.first(10).each { |key, value| puts "#{key} #{value}" }
puts "Top 10 titles for zenefits employees:"
titles_agg.sort_by(&:last).reverse.first(10).each { |key, value| puts "#{key} #{value}" }
puts "Top 10 universities for zenefits employees:"
schools_agg.sort_by(&:last).reverse.first(10).each { |key, value| puts "#{key} #{value}" }
puts "Top 10 former companies for zenefits employees:"
other_companies_agg.sort_by(&:last).reverse.first(10).each { |key, value| puts "#{key} #{value}" }
package main
import (
"fmt"
"time"
"sort"
"context"
)
// See https://github.com/peopledatalabs/peopledatalabs-go
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
// Create an Elasticsearch query
elasticSearchQuery := map[string]interface{} {
"query": map[string]interface{} {
"bool": map[string]interface{} {
"must": []map[string]interface{} {
{"term": map[string]interface{}{"job_company_website": "zenefits.com"}},
},
},
},
}
// Find all records matching query
allZenefitsEmployees := getAllPdlRecordsEs(elasticSearchQuery)
// Create aggregate objects
skillsAgg := make(map[string]int)
titlesAgg := make(map[string]int)
schoolsAgg := make(map[string]int)
otherCompaniesAgg := make(map[string]int)
// Iterate through records array for aggregation
for _, record := range allZenefitsEmployees {
// Aggregate skills
for _, skill := range record.Skills {
skillsAgg[skill]++
}
// Aggregate job titles
if record.JobTitle != "" {
titlesAgg[record.JobTitle]++
}
// Aggregate schools
for _, edu := range record.Education {
if edu.School.Name != "" && edu.School.Type == "post-secondary institution" {
schoolsAgg[edu.School.Name]++
}
}
// Aggregate other companies
for _, exp := range record.Experience {
if exp.Company.Name != "zenefits" {
otherCompaniesAgg[exp.Company.Name]++
}
}
}
// Sort aggregate objects by count and print top 10 for each
fmt.Println("Top 10 skills for zenefits employees:")
for _, skill := range rank(skillsAgg) {
fmt.Println(skill, skillsAgg[skill])
}
fmt.Println("Top 10 titles for zenefits employees:")
for _, title := range rank(titlesAgg) {
fmt.Println(title, titlesAgg[title])
}
fmt.Println("Top 10 universities for zenefits employees:")
for _, school := range rank(schoolsAgg) {
fmt.Println(school, schoolsAgg[school])
}
fmt.Println("Top 10 former companies for zenefits employees:")
for _, company := range rank(otherCompaniesAgg) {
fmt.Println(company, otherCompaniesAgg[company])
}
}
// Run Person searches in batches with 6-second intervals
func getAllPdlRecordsEs(query interface{}) []pdlmodel.Person {
// Store all records retreived in an array
var allRecords []pdlmodel.Person
batch := 1
// The current scroll_token
var scrollToken string
const pageSize = 100
// Set your API key
apiKey := "YOUR API KEY"
// Set API key as environmental variable
// apiKey := os.Getenv("API_KEY")
// Create a client, specifying your API key
client := pdl.New(apiKey)
// Create a parameters JSON object
p := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: pageSize,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
Query: query,
ScrollToken: scrollToken,
Dataset: "all",
},
}
// Keep retrieving records until unable to continue scrolling
for batch == 1 || scrollToken != "" {
// Pass the parameters object to the Person Search API
response, err := client.Person.Search(context.Background(), p)
// Check for successful response
if err == nil {
// Add records retrieved to the records array
allRecords = append(allRecords, response.Data...)
// Store scroll_token from response
p.SearchBaseParams.ScrollToken, scrollToken = response.ScrollToken, response.ScrollToken
fmt.Printf("Batch %d success!\n", batch)
time.Sleep(6 * time.Second)
batch++
} else {
fmt.Println("Unable to continue scrolling.")
break
}
}
fmt.Println("Done!")
return allRecords
}
// Sort map in reverse order
func rank(values map[string]int) []string {
type kv struct {
Key string
Value int
}
var ss []kv
for k, v := range values {
ss = append(ss, kv{k, v})
}
sort.Slice(ss, func(i, j int) bool {
return ss[i].Value > ss[j].Value
})
ranked := make([]string, len(values))
for i, kv := range ss {
ranked[i] = kv.Key
}
return ranked
}
package main
import (
"fmt"
"time"
"sort"
"context"
)
// See https://github.com/peopledatalabs/peopledatalabs-go
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
// Create an SQL query
sqlQuery := "SELECT * FROM person" +
" WHERE job_company_website='zenefits.com'"
// Find all records matching query
allZenefitsEmployees := getAllPdlRecordsEs(sqlQuery)
// Create aggregate objects
skillsAgg := make(map[string]int)
titlesAgg := make(map[string]int)
schoolsAgg := make(map[string]int)
otherCompaniesAgg := make(map[string]int)
// Iterate through records array for aggregation
for _, record := range allZenefitsEmployees {
// Aggregate skills
for _, skill := range record.Skills {
skillsAgg[skill]++
}
// Aggregate job titles
if record.JobTitle != "" {
titlesAgg[record.JobTitle]++
}
// Aggregate schools
for _, edu := range record.Education {
if edu.School.Name != "" && edu.School.Type == "post-secondary institution" {
schoolsAgg[edu.School.Name]++
}
}
// Aggregate other companies
for _, exp := range record.Experience {
if exp.Company.Name != "zenefits" {
otherCompaniesAgg[exp.Company.Name]++
}
}
}
// Sort aggregate objects by count and print top 10 for each
fmt.Println("Top 10 skills for zenefits employees:")
for _, skill := range rank(skillsAgg) {
fmt.Println(skill, skillsAgg[skill])
}
fmt.Println("Top 10 titles for zenefits employees:")
for _, title := range rank(titlesAgg) {
fmt.Println(title, titlesAgg[title])
}
fmt.Println("Top 10 universities for zenefits employees:")
for _, school := range rank(schoolsAgg) {
fmt.Println(school, schoolsAgg[school])
}
fmt.Println("Top 10 former companies for zenefits employees:")
for _, company := range rank(otherCompaniesAgg) {
fmt.Println(company, otherCompaniesAgg[company])
}
}
// Run Person searches in batches with 6-second intervals
func getAllPdlRecordsEs(query string) []pdlmodel.Person {
// Store all records retreived in an array
var allRecords []pdlmodel.Person
batch := 1
// The current scroll_token
var scrollToken string
const pageSize = 100
// Set your API key
apiKey := "YOUR API KEY"
// Set API key as environmental variable
// apiKey := os.Getenv("API_KEY")
// Create a client, specifying your API key
client := pdl.New(apiKey)
// Create a parameters JSON object
p := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: pageSize,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
SQL: query,
ScrollToken: scrollToken,
Dataset: "all",
},
}
// Keep retrieving records until unable to continue scrolling
for batch == 1 || scrollToken != "" {
// Pass the parameters object to the Person Search API
response, err := client.Person.Search(context.Background(), p)
// Check for successful response
if err == nil {
// Add records retrieved to the records array
allRecords = append(allRecords, response.Data...)
// Store scroll_token from response
p.SearchBaseParams.ScrollToken, scrollToken = response.ScrollToken, response.ScrollToken
fmt.Printf("Batch %d success!\n", batch)
time.Sleep(6 * time.Second)
batch++
} else {
fmt.Println("Unable to continue scrolling.")
break
}
}
fmt.Println("Done!")
return allRecords
}
// Sort map in reverse order
func rank(values map[string]int) []string {
type kv struct {
Key string
Value int
}
var ss []kv
for k, v := range values {
ss = append(ss, kv{k, v})
}
sort.Slice(ss, func(i, j int) bool {
return ss[i].Value > ss[j].Value
})
ranked := make([]string, len(values))
for i, kv := range ss {
ranked[i] = kv.Key
}
return ranked
}
from time import sleep
import requests, json
# Set your API key
API_KEY = "YOUR API KEY"
# Set the Person Search API URL
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
# Set headers
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
# Run Person searches in batches with 6-second intervals
def get_all_pdl_records_es(query):
PAGE_SIZE = 100
# Store all records retreived in an array
all_records = []
batch = 1
# Create a parameters JSON object
params = {
'query': json.dumps(query),
'size': PAGE_SIZE,
'dataset': "all"
}
# Keep retrieving records until unable to continue scrolling
while batch == 1 or params['scroll_token']:
# Pass the parameters object to the Person Search API
response = requests.get(
PDL_URL,
headers=HEADERS,
params=params
).json()
# Check for successful response
if response['status'] == 200:
# Add records retrieved to the records array
all_records.extend(response['data'])
# Store the scroll_token for next batch
params['scroll_token'] = response['scroll_token']
print(f"Batch {batch} success!")
sleep(6)
batch += 1
else:
print("Unable to continue scrolling.")
break
print("Done!")
return all_records
if __name__ == '__main__':
# Create an Elasticsearch query
ES_QUERY = {
"query": {
"term": {"job_company_website": "zenefits.com"}
}
}
# Find all records matching query
all_zenefits_employees = get_all_pdl_records_es(ES_QUERY)
# Create aggregate objects
skills_agg = {}
titles_agg = {}
schools_agg = {}
other_companies_agg = {}
# Iterate through records array for aggregation
for record in all_zenefits_employees:
# Aggregate skills
for skill in record['skills']:
skills_agg.setdefault(skill, 0)
skills_agg[skill] += 1
# Aggregate job titles
if record['job_title']:
titles_agg.setdefault(record['job_title'], 0)
titles_agg[record['job_title']] += 1
# Aggregate schools
for edu in record['education']:
if edu['school'] and edu['school']['type'] == "post-secondary institution":
schools_agg.setdefault(edu['school']['name'], 0)
schools_agg[edu['school']['name']] += 1
# Aggregate other companies
for exp in record['experience']:
if exp['company'] and exp['company']['name'] != 'zenefits':
other_companies_agg.setdefault(exp['company']['name'], 0)
other_companies_agg[exp['company']['name']] += 1
# Sort aggregate objects by count and print top 10 for each
print("Top 10 skills for zenefits employees:")
for skill, count in sorted(
skills_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, skill)
print("Top 10 titles for zenefits employees:")
for title, count in sorted(
titles_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, title)
print("Top 10 universities for zenefits employees:")
for school, count in sorted(
schools_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, school)
print("Top 10 former companies for zenefits employees:")
for company, count in sorted(
other_companies_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, company)
from time import sleep
import requests, json
# Set your API key
API_KEY = "YOUR API KEY"
# Set the Person Search API URL
PDL_URL = "https://api.peopledatalabs.com/v5/person/search"
# Set headers
HEADERS = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
# Run Person searches in batches with 6-second intervals
def get_all_pdl_records_sql(query):
PAGE_SIZE = 100
# Store all records retreived in an array
all_records = []
batch = 1
# Create a parameters JSON object
params = {
'sql': query,
'size': PAGE_SIZE,
'dataset': "all"
}
# Keep retrieving records until unable to continue scrolling
while batch == 1 or params['scroll_token']:
# Pass the parameters object to the Person Search API
response = requests.get(
PDL_URL,
headers=HEADERS,
params=params
).json()
# Check for successful response
if response['status'] == 200:
# Add records retrieved to the records array
all_records.extend(response['data'])
# Store the scroll_token for next batch
params['scroll_token'] = response['scroll_token']
print(f"Batch {batch} success!")
sleep(6)
batch += 1
else:
print("Unable to continue scrolling.")
break
print("Done!")
return all_records
# Create an SQL query
SQL_QUERY = \
"""
SELECT * FROM person
WHERE job_company_website='zenefits.com';
"""
# Find all records matching query
all_zenefits_employees = get_all_pdl_records_sql(SQL_QUERY)
# Create aggregate objects
skills_agg = {}
titles_agg = {}
schools_agg = {}
other_companies_agg = {}
# Iterate through records array for aggregation
for record in all_zenefits_employees:
# Aggregate skills
for skill in record['skills']:
skills_agg.setdefault(skill, 0)
skills_agg[skill] += 1
# Aggregate job titles
if record['job_title']:
titles_agg.setdefault(record['job_title'], 0)
titles_agg[record['job_title']] += 1
# Aggregate schools
for edu in record['education']:
if edu['school'] and edu['school']['type'] == "post-secondary institution":
schools_agg.setdefault(edu['school']['name'], 0)
schools_agg[edu['school']['name']] += 1
# Aggregate other companies
for exp in record['experience']:
if exp['company'] and exp['company']['name'] != 'zenefits':
other_companies_agg.setdefault(exp['company']['name'], 0)
other_companies_agg[exp['company']['name']] += 1
# Sort aggregate objects by count and print top 10 for each
print("Top 10 skills for zenefits employees:")
for skill, count in sorted(
skills_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, skill)
print("Top 10 titles for zenefits employees:")
for title, count in sorted(
titles_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, title)
print("Top 10 universities for zenefits employees:")
for school, count in sorted(
schools_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, school)
print("Top 10 former companies for zenefits employees:")
for company, count in sorted(
other_companies_agg.items(), key = lambda x: x[1], reverse=True
)[:10]:
print(count, company)
Advanced Examples
Company Enrichment and Person Search
"I want to find X number of people at each company in my list."
import json
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Create a client, specifying your API key
CLIENT = PDLPY(
api_key="YOUR API KEY",
)
# Create a list a companies to search
COMPANY_WEBSITES = [
"facebook.com",
"amazon.com",
"apple.com",
"netflix.com",
"google.com"
]
# Set the maximum people to search at each company
MAX_NUM_PEOPLE = 100
# Enrich each company then find people at that company
for company_website in COMPANY_WEBSITES:
# Create a parameters JSON object for the Company Enrichment API
query_string = { "website": company_website }
# Pass the parameters object to the Company Enrichment API
response = CLIENT.company.enrichment(**query_string).json()
# Check for successful response
if response['status'] == 200:
# Store enriched company
enriched_company = response
else:
enriched_company = {}
print(f"Company Enrichment Error for [{company_website}]: {response.text}")
# Store employees at each company
company_employee_matches = {}
# Check for an enriched company
if enriched_company:
# Create an Elasticsearch query
es_query = {
"query": {
"bool": {
"must": [
{"term": {"job_company_id": enriched_company["id"]}},
]
}
}
}
# Create a parameters JSON object for the Person Search API
params = {
'query': es_query,
'size': MAX_NUM_PEOPLE
}
# Pass the parameters object to the Person Search API
response = CLIENT.person.search(**params).json()
# Check for successful response
if response['status'] == 200:
# Get employees from response
company_employee_matches = response['data']
else:
company_employee_matches = {}
print(f"Person Search Error for [{company_website}]: {response.text}")
print(f"Found {len(company_employee_matches)} employee profiles at {company_website}.")
import json
# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY
# Create a client, specifying your API key
CLIENT = PDLPY(
api_key="YOUR API KEY",
)
# Create a list a companies to search
COMPANY_WEBSITES = [
"facebook.com",
"amazon.com",
"apple.com",
"netflix.com",
"google.com"
]
# Set the maximum people to search at each company
MAX_NUM_PEOPLE = 100
# Enrich each company then find people at that company
for company_website in COMPANY_WEBSITES:
# Create a parameters JSON object for the Company Enrichment API
query_string = { "website": company_website }
# Pass the parameters object to the Company Enrichment API
response = CLIENT.company.enrichment(**query_string).json()
# Check for successful response
if response['status'] == 200:
# Store enriched company
enriched_company = response
else:
enriched_company = {}
print(f"Company Enrichment Error for [{company_website}]: {response.text}")
# Store employees at each company
company_employee_matches = {}
# Check for an enriched company
if enriched_company:
sql_query = f"""
SELECT * FROM person
WHERE job_company_id = '{enriched_company['id']}'
"""
# Create a parameters JSON object for the Person Search API
params = {
'sql': sql_query,
'size': MAX_NUM_PEOPLE
}
# Pass the parameters object to the Person Search API
response = CLIENT.person.search(**params).json()
# Check for successful response
if response['status'] == 200:
# Get employees from response
company_employee_matches = response['data']
else:
company_employee_matches = {}
print(f"Person Search Error for [{company_website}]: {response.text}")
print(f"Found {len(company_employee_matches)} employee profiles at {company_website}.")
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
// Create a client, specifying your API key
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
// Create a list a companies to search
const companyWebsites = [
"facebook.com",
"amazon.com",
"apple.com",
"netflix.com",
"google.com"
];
// Set the maximum people to search at each company
const maxMumPeople = 100;
// Enrich each company then find people at that company
for (let companyWebsite = 0; companyWebsite < companyWebsites.length; companyWebsite++) {
// Create a parameters JSON object for the Company Enrichment API
let queryString = { "website": companyWebsites[companyWebsite] };
let enrichedCompany = {};
// Store employees at each company
let companyEmployeeMatches = {};
// Pass the parameters object to the Company Enrichment API
PDLJSClient.company.enrichment(queryString).then((enrichedCompany) => {
// Create an Elasticsearch query
let esQuery = {
query: {
bool: {
must:[
{term: {job_company_id: enrichedCompany.id}},
]
}
}
}
// Create a parameters JSON object for the Person Search API
let params = {
searchQuery: esQuery,
size: maxMumPeople
}
// Pass the parameters object to the Person Search API
PDLJSClient.person.search.elastic(params).then((data) => {
// Get employees from response
companyEmployeeMatches = data.data;
console.log(`Found ${companyEmployeeMatches.length}` +
` employee profiles at ${companyWebsites[companyWebsite]}.`);
}).catch((error) => {
console.log(`Person Seach Error for ${companyWebsites[companyWebsite]}` +
`: ${error}`);
});
}).catch((error) => {
console.log(`Company Enrichment Error for ${companyWebsites[companyWebsite]}` +
`: ${error}`);
});
}
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';
// Create a client, specifying your API key
const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });
// Create a list a companies to search
const companyWebsites = [
"facebook.com",
"amazon.com",
"apple.com",
"netflix.com",
"google.com"
];
// Set the maximum people to search at each company
const maxMumPeople = 100;
// Enrich each company then find people at that company
for (let companyWebsite = 0; companyWebsite < companyWebsites.length; companyWebsite++) {
// Create a parameters JSON object for the Company Enrichment API
let queryString = { "website": companyWebsites[companyWebsite] };
let enrichedCompany = {};
// Store employees at each company
let companyEmployeeMatches = {};
// Pass the parameters object to the Company Enrichment API
PDLJSClient.company.enrichment(queryString).then((enrichedCompany) => {
// Create an SQL query
let sqlQuery = `SELECT * FROM person
WHERE job_company_id = '${enrichedCompany.id}';`;
// Create a parameters JSON object for the Person Search API
let params = {
searchQuery: sqlQuery,
size: maxMumPeople
}
// Pass the parameters object to the Person Search API
PDLJSClient.person.search.sql(params).then((data) => {
// Get employees from response
companyEmployeeMatches = data.data;
console.log(`Found ${companyEmployeeMatches.length}` +
` employee profiles at ${companyWebsites[companyWebsite]}.`);
}).catch((error) => {
console.log(`Person Seach Error for ${companyWebsites[companyWebsite]}` +
`: ${error}`);
});
}).catch((error) => {
console.log(`Company Enrichment Error for ${companyWebsites[companyWebsite]}` +
`: ${error}`);
});
}
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
# Set your API key
Peopledatalabs.api_key = 'YOUR API KEY'
# Create a list a companies to search
COMPANY_WEBSITES = [
"facebook.com",
"amazon.com",
"apple.com",
"netflix.com",
"google.com"
]
# Set the maximum people to search at each company
MAX_NUM_PEOPLE = 100
# Enrich each company then find people at that company
COMPANY_WEBSITES.each do |company_website|
# Create a parameters JSON object for the Company Enrichment API
query_string = { "website": company_website }
# Pass the parameters object to the Company Enrichment API
response = Peopledatalabs::Enrichment.company(params: query_string)
# Check for successful response
if response['status'] == 200
# Store enriched company
enriched_company = response
else
enriched_company = {}
puts "Company Enrichment Error for [#{company_website}]: #{response}"
end
# Store employees at each company
company_employee_matches = {}
# Check for an enriched company
if !enriched_company.nil?
# Create an Elasticsearch query
es_query = {
"query": {
"bool": {
"must": [
{"term": {"job_company_id": enriched_company["id"]}},
]
}
}
}
# Pass parameters to the Person Search API
response = Peopledatalabs::Search.person(searchType: 'elastic', query: es_query, size: MAX_NUM_PEOPLE)
# Check for successful response
if response['status'] == 200
# Get employees from response
company_employee_matches = response['data']
else
company_employee_matches = {}
puts "Person Search Error for [#{company_website}]: #{response}"
end
end
puts "Found #{company_employee_matches.length()} employee profiles at #{company_website}."
end
# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'
# Set your API key
Peopledatalabs.api_key = 'YOUR API KEY'
# Create a list a companies to search
COMPANY_WEBSITES = [
"facebook.com",
"amazon.com",
"apple.com",
"netflix.com",
"google.com"
]
# Set the maximum people to search at each company
MAX_NUM_PEOPLE = 100
# Enrich each company then find people at that company
COMPANY_WEBSITES.each do |company_website|
# Create a parameters JSON object for the Company Enrichment API
query_string = { "website": company_website }
# Pass the parameters object to the Company Enrichment API
response = Peopledatalabs::Enrichment.company(params: query_string)
# Check for successful response
if response['status'] == 200
# Store enriched company
enriched_company = response
else
enriched_company = {}
puts "Company Enrichment Error for [#{company_website}]: #{response}"
end
# Store employees at each company
company_employee_matches = {}
# Check for an enriched company
if !enriched_company.nil?
# Create an SQL query
sql_query = """
SELECT * FROM person
WHERE job_company_id = '#{enriched_company['id']}'
"""
# Pass parameters to the Person Search API
response = Peopledatalabs::Search.person(searchType: 'sql', query: sql_query, size: MAX_NUM_PEOPLE)
# Check for successful response
if response['status'] == 200
# Get employees from response
company_employee_matches = response['data']
else
company_employee_matches = {}
puts "Person Search Error for [#{company_website}]: #{response}"
end
end
puts "Found #{company_employee_matches.length()} employee profiles at #{company_website}."
end
package main
import (
"fmt"
"reflect"
"context"
)
// See https://github.com/peopledatalabs/peopledatalabs-go
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
// Set your API key
apiKey := "YOUR API KEY"
// Set API key as environmental variable
// apiKey := os.Getenv("API_KEY")
// Create a client, specifying your API key
client := pdl.New(apiKey)
// Create a list a companies to search
companyWebsites :=
[]string{"facebook.com","amazon.com","apple.com","netflix.com","google.com"}
// Set the maximum people to search at each company
const maxNumPeople = 100
// Enrich each company then find people at that company
for _, companyWebsite := range companyWebsites {
var enrichedCompany pdlmodel.EnrichCompanyResponse
// Create a parameters JSON object for the Company Enrichment API
queryString := pdlmodel.CompanyParams{Website: companyWebsite}
paramsCompany := pdlmodel.EnrichCompanyParams {
CompanyParams: queryString,
}
// Pass the parameters object to the Company Enrichment API
responseCompany, errCompany := client.Company.Enrich(context.Background(), paramsCompany)
// Check for successful response
if errCompany == nil {
// Store enriched company
enrichedCompany = responseCompany
} else {
fmt.Printf("Company Enrichment Error for [%s]: %s\n", companyWebsite, errCompany)
}
// Store employees at each company
var companyEmployeeMatches []pdlmodel.Person
// Check for an enriched company
if !reflect.DeepEqual(enrichedCompany, pdlmodel.EnrichCompanyResponse{}) {
// Create an Elasticsearch query
elasticSearchQuery := map[string]interface{} {
"query": map[string]interface{} {
"bool": map[string]interface{} {
"must": []map[string]interface{} {
{"term": map[string]interface{}{"job_company_id": enrichedCompany.Id}},
},
},
},
}
// Create a parameters JSON object for the Person Search API
paramsPerson := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: maxNumPeople,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
Query: elasticSearchQuery,
},
}
// Pass the parameters object to the Person Search API
responsePerson, errPerson := client.Person.Search(context.Background(), paramsPerson)
// Check for successful response
if errPerson == nil {
// Get employees from response
companyEmployeeMatches = responsePerson.Data
} else {
fmt.Printf("Person Search Error for [%s]: %s\n", companyWebsite, errPerson)
}
}
fmt.Printf("Found %d employee profiles at %s.\n", len(companyEmployeeMatches), companyWebsite)
}
}
package main
import (
"fmt"
"reflect"
"context"
)
// See https://github.com/peopledatalabs/peopledatalabs-go
import (
pdl "github.com/peopledatalabs/peopledatalabs-go"
pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)
func main() {
// Set your API key
apiKey := "YOUR API KEY"
// Set API key as environmental variable
// apiKey := os.Getenv("API_KEY")
// Create a client, specifying your API key
client := pdl.New(apiKey)
// Create a list a companies to search
companyWebsites :=
[]string{"facebook.com","amazon.com","apple.com","netflix.com","google.com"}
// Set the maximum people to search at each company
const maxNumPeople = 100
// Enrich each company then find people at that company
for _, companyWebsite := range companyWebsites {
var enrichedCompany pdlmodel.EnrichCompanyResponse
// Create a parameters JSON object for the Company Enrichment API
queryString := pdlmodel.CompanyParams{Website: companyWebsite}
paramsCompany := pdlmodel.EnrichCompanyParams {
CompanyParams: queryString,
}
// Pass the parameters object to the Company Enrichment API
responseCompany, errCompany := client.Company.Enrich(context.Background(), paramsCompany)
// Check for successful response
if errCompany == nil {
// Store enriched company
enrichedCompany = responseCompany
} else {
fmt.Printf("Company Enrichment Error for [%s]: %s\n", companyWebsite, errCompany)
}
// Store employees at each company
var companyEmployeeMatches []pdlmodel.Person
// Check for an enriched company
if !reflect.DeepEqual(enrichedCompany, pdlmodel.EnrichCompanyResponse{}) {
// Create an SQL query
sqlQuery := "SELECT * FROM person" +
" WHERE job_company_id = '" + enrichedCompany.Id + "'"
// Create a parameters JSON object for the Person Search API
paramsPerson := pdlmodel.SearchParams {
BaseParams: pdlmodel.BaseParams {
Size: maxNumPeople,
},
SearchBaseParams: pdlmodel.SearchBaseParams {
SQL: sqlQuery,
},
}
// Pass the parameters object to the Person Search API
responsePerson, errPerson := client.Person.Search(context.Background(), paramsPerson)
// Check for successful response
if errPerson == nil {
// Get employees from response
companyEmployeeMatches = responsePerson.Data
} else {
fmt.Printf("Person Search Error for [%s]: %s\n", companyWebsite, errPerson)
}
}
fmt.Printf("Found %d employee profiles at %s.\n", len(companyEmployeeMatches), companyWebsite)
}
}
import json
import requests
# Set the Company Enrichment API URL
PDL_COMPANY_ENRICH_URL = "https://api.peopledatalabs.com/v5/company/enrich"
# Set the Person Search API URL
PDL_PERSON_SEARCH_URL = "https://api.peopledatalabs.com/v5/person/search"
# Set your API key
API_KEY = "YOUR API KEY"
# Create a list a companies to search
COMPANY_WEBSITES = [
"facebook.com",
"amazon.com",
"apple.com",
"netflix.com",
"google.com"
]
# Set the maximum people to search at each company
MAX_NUM_PEOPLE = 100
# Enrich each company then find people at that company
for company_website in COMPANY_WEBSITES:
# Create a parameters JSON object for the Company Enrichment API
query_string = { "website": company_website }
# Set headers
headers = {
'accept': "application/json",
'content-type': "application/json",
'x-api-key': API_KEY
}
# Pass the parameters object to the Company Enrichment API
response = requests.request("GET", PDL_COMPANY_ENRICH_URL, headers=headers, params=query_string)
# Check for successful response
if response.status_code == 200:
# Store enriched company
enriched_company = response.json()
else:
enriched_company = {}
print(f"Company Enrichment Error for [{company_website}]: {response.text}")
# Store employees at each company
company_employee_matches = {}
# Check for an enriched company
if enriched_company:
# Set headers
headers = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
# Create an Elasticsearch query
es_query = {
"query": {
"bool": {
"must": [
{"term": {"job_company_id": enriched_company["id"]}},
]
}
}
}
# Create a parameters JSON object for the Person Search API
params = {
'query': json.dumps(es_query),
'size': MAX_NUM_PEOPLE
}
# Pass the parameters object to the Person Search API
response = requests.get( PDL_PERSON_SEARCH_URL, headers=headers, params=params)
# Check for successful response
if response.status_code == 200:
# Get employees from response
company_employee_matches = response.json()['data']
else:
company_employee_matches = {}
print(f"Person Search Error for [{company_website}]: {response.text}")
print(f"Found {len(company_employee_matches)} employee profiles at {company_website}.")
import json
import requests
# Set the Company Enrichment API URL
PDL_COMPANY_ENRICH_URL = "https://api.peopledatalabs.com/v5/company/enrich"
# Set the Person Search API URL
PDL_PERSON_SEARCH_URL = "https://api.peopledatalabs.com/v5/person/search"
# Set your API key
API_KEY = "YOUR API KEY"
# Create a list a companies to search
COMPANY_WEBSITES = [
"facebook.com",
"amazon.com",
"apple.com",
"netflix.com",
"google.com"
]
# Set the maximum people to search at each company
MAX_NUM_PEOPLE = 100
# Enrich each company then find people at that company
for company_website in COMPANY_WEBSITES:
# Create a parameters JSON object for the Company Enrichment API
query_string = { "website": company_website }
# Set headers
headers = {
'accept': "application/json",
'content-type': "application/json",
'x-api-key': API_KEY
}
# Pass the parameters object to the Company Enrichment API
response = requests.request("GET", PDL_COMPANY_ENRICH_URL, headers=headers, params=query_string)
# Check for successful response
if response.status_code == 200:
# Store enriched company
enriched_company = response.json()
else:
enriched_company = {}
print(f"Company Enrichment Error for [{company_website}]: {response.text}")
# Store employees at each company
company_employee_matches = {}
# Check for an enriched company
if enriched_company:
# Set headers
headers = {
'Content-Type': "application/json",
'X-api-key': API_KEY
}
# Create an SQL query
sql_query = f"""
SELECT * FROM person
WHERE job_company_id = '{enriched_company['id']}'
"""
# Create a parameters JSON object for the Person Search API
params = {
'sql': sql_query,
'size': MAX_NUM_PEOPLE
}
# Pass the parameters object to the Person Search API
response = requests.get( PDL_PERSON_SEARCH_URL, headers=headers, params=params)
# Check for successful response
if response.status_code == 200:
# Get employees from response
company_employee_matches = response.json()['data']
else:
company_employee_matches = {}
print(f"Person Search Error for [{company_website}]: {response.text}")
print(f"Found {len(company_employee_matches)} employee profiles at {company_website}.")
Updated about 2 months ago