Examples - Company Search API

Example

All code is Python, cURL, Ruby, Go and JavaScript.

Basic Usage

"I want to make a query and save the results to a file."

import json

# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY

# Create a client, specifying an API key
client = PDLPY(
    api_key="YOUR API KEY",
)

ES_QUERY = {
  "query": {
    "bool": {
      "must": [
        {"term": {"website": "google.com"}}
      ]
    }
  }
}

P = {
  'query': ES_QUERY,
  'size': 10,
  'pretty': True
}

response = client.company.search(**P).json()

if response["status"] == 200:
  data = response['data']
  with open("my_pdl_search.jsonl", "w") as out:
    for record in data:
      out.write(json.dumps(record) + "\n")
  print(f"successfully grabbed {len(data)} records from pdl")
  print(f"{response['total']} total pdl records exist matching this query")
else:
  print("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
  print("Error:", response)
import json

# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY

# Create a client, specifying an API key
client = PDLPY(
    api_key="YOUR API KEY",
)

SQL_QUERY = \
"""
  SELECT * FROM company
  WHERE website='google.com';
 """

P = {
  'sql': SQL_QUERY,
  'size': 10,
  'pretty': True
}

response = client.company.search(**P).json()

if response["status"] == 200:
  data = response['data']
  with open("my_pdl_search.jsonl", "w") as out:
    for record in data:
      out.write(json.dumps(record) + "\n")
  print(f"successfully grabbed {len(data)} records from pdl")
  print(f"{response['total']} total pdl records exist matching this query")
else:
  print("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
  print("Error:", response)
# Elasticsearch
curl -X GET 'https://api.peopledatalabs.com/v5/company/search' \
-H 'X-Api-Key: xxxx' \
--data-raw '{
  "size": 10,
  "query": {
    "bool": {
      "must": [
        {"term": {"website": "google.com"}}
      ]
    }
  }
}'

# SQL
curl -X GET \
  'https://api.peopledatalabs.com/v5/company/search' \
  -H 'X-Api-Key: xxxx' \
  --data-raw '{
    "size": 10,
    "sql": "SELECT * FROM company WHERE website='\''google.com'\'';"
}'
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';

import fs from 'fs';

const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });

const esQuery = {
  query: {
    bool: {
      must:[
        {"term": {"website": "google.com"}} 
      ]
    }
  }
}

const params = {
  searchQuery: esQuery, 
  size: 10,
  pretty: true
}

PDLJSClient.company.search.elastic(params).then((data) => {
    fs.writeFile("my_pdl_search.jsonl", Buffer.from(JSON.stringify(data.data)), (err) => {
        if (err) throw err;
    });
    console.log(`successfully grabbed ${data.data.length} records from pdl`);
    console.log(`${data["total"]} total pdl records exist matching this query`)
}).catch((error) => {
    console.log("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
    console.log(error);
});
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';

import fs from 'fs';

const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });

const sqlQuery = `SELECT * FROM company
                    WHERE website='google.com';`;

var params = {
    searchQuery: sqlQuery, 
    size: 10,
    pretty: true
}

}

PDLJSClient.company.search.sql(params).then((data) => {
    fs.writeFile("my_pdl_search.jsonl", Buffer.from(JSON.stringify(data.data)), (err) => {
        if (err) throw err;
    });
    console.log(`successfully grabbed ${data.data.length} records from pdl`);
    console.log(`${data["total"]} total pdl records exist matching this query`)
}).catch((error) => {
    console.log("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
    console.log(error);
});
require 'json'

# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'

Peopledatalabs.api_key = 'YOUR API KEY'

ES_QUERY = {
  "query": {
    "bool": {
      "must": [
        {"term": {"website": "google.com"}},
      ]
    }
  }
}

response = Peopledatalabs::Search.company(searchType: 'elastic', query: ES_QUERY, size: 10, pretty: true)

if response['status'] == 200
    data = response['data']
    File.open("my_pdl_search.jsonl", "w") do |out|
        data.each { |record| out.write(JSON.dump(record) + "\n") }
    end
    puts "successfully grabbed #{data.length()} records from pdl"
    puts "#{response['total']} total pdl records exist matching this query"
else
    puts "NOTE. The carrier pigeons lost motivation in flight. See error and try again."
    puts "Error: #{response}"
end
require 'json'

# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'

Peopledatalabs.api_key = 'YOUR API KEY'

SQL_QUERY = \
"""
  SELECT * FROM company
  WHERE website='google.com';
 """

response = Peopledatalabs::Search.company(searchType: 'sql', query: SQL_QUERY, size: 10, pretty: true)

if response['status'] == 200
    data = response['data']
    File.open("my_pdl_search.jsonl", "w") do |out|
        data.each { |record| out.write(JSON.dump(record) + "\n") }
    end
    puts "successfully grabbed #{data.length()} records from pdl"
    puts "#{response['total']} total pdl records exist matching this query"
else
    puts "NOTE. The carrier pigeons lost motivation in flight. See error and try again."
    puts "Error: #{response}"
end
package main

import(
    "fmt"
    "os"
    "encoding/json"
)

import (
    pdl "github.com/peopledatalabs/peopledatalabs-go"
    pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)

func main() {
    apiKey := "YOUR API KEY"
    // Set API KEY as env variable
    // apiKey := os.Getenv("API_KEY")

    client := pdl.New(apiKey)

    elasticSearchQuery := map[string]interface{} {
        "query": map[string]interface{} {
            "bool": map[string]interface{} {
                "must": []map[string]interface{} {
                    {"term": map[string]interface{}{"website": "google.com"}},
                },
            },
        },
    }

    params := pdlmodel.SearchParams {
        BaseParams: pdlmodel.BaseParams {
            Size: 10,
            Pretty: true,
        },
        SearchBaseParams: pdlmodel.SearchBaseParams {
            Query: elasticSearchQuery,
        },
    }
    
    response, err := client.Company.Search(params)
    if err == nil {
        data := response.Data
        out, outErr := os.Create("my_pdl_search.jsonl")
        defer out.Close()
        if (outErr == nil) {
            for i := range data {
                record, jsonErr := json.Marshal(data[i])
                if (jsonErr == nil) {
                    out.WriteString(string(record) + "\n")
                }
            }
            out.Sync()
        }
        fmt.Printf("successfully grabbed %d records from pdl\n", len(data))
        fmt.Printf("%d total pdl records exist matching this query\n", response.Total)
    } else {
        fmt.Println("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
        fmt.Println("Error:", err)
    } 
}
package main

import(
    "fmt"
    "os"
    "encoding/json"
)

import (
    pdl "github.com/peopledatalabs/peopledatalabs-go"
    pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)

func main() {
    apiKey := "YOUR API KEY"
    // Set API KEY as env variable
    // apiKey := os.Getenv("API_KEY")

    client := pdl.New(apiKey)

    sqlQuery := "SELECT * FROM company" +
        " WHERE website='google.com';"

    params := pdlmodel.SearchParams {
        BaseParams: pdlmodel.BaseParams {
            Size: 10,
            Pretty: true,
        },
        SearchBaseParams: pdlmodel.SearchBaseParams {
            SQL: sqlQuery,
        },
    }
    
    response, err := client.Company.Search(params)
    if err == nil {
        data := response.Data
        out, outErr := os.Create("my_pdl_search.jsonl")
        defer out.Close()
        if (outErr == nil) {
            for i := range data {
                record, jsonErr := json.Marshal(data[i])
                if (jsonErr == nil) {
                    out.WriteString(string(record) + "\n")
                }
            }
            out.Sync()
        }
        fmt.Printf("successfully grabbed %d records from pdl\n", len(data))
        fmt.Printf("%d total pdl records exist matching this query\n", response.Total)
    } else {
        fmt.Println("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
        fmt.Println("Error:", err)
    } 
}
import requests, json
API_KEY = # YOUR API KEY

PDL_URL = "https://api.peopledatalabs.com/v5/company/search"

H = {
  'Content-Type': "application/json",
  'X-api-key': API_KEY
}

ES_QUERY = {
  "query": {
    "bool": {
      "must": [
        {"term": {"website": "google.com"}}
      ]
    }
  }
}

P = {
  'query': json.dumps(ES_QUERY),
  'size': 10,
  'pretty': True
}

response = requests.get(
  PDL_URL,
  headers=H,
  params=P
).json()

if response["status"] == 200:
  data = response['data']
  with open("my_pdl_search.jsonl", "w") as out:
    for record in data:
      out.write(json.dumps(record) + "\n")
  print(f"successfully grabbed {len(data)} records from pdl")
  print(f"{response['total']} total pdl records exist matching this query")
else:
  print("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
  print("Error:", response)
import requests, json

API_KEY = # YOUR API KEY

PDL_URL = "https://api.peopledatalabs.com/v5/company/search"

H = {
  'Content-Type': "application/json",
  'X-api-key': API_KEY
}

SQL_QUERY = \
"""
  SELECT * FROM company
  WHERE website='google.com';
 """

P = {
  'sql': SQL_QUERY,
  'size': 10,
  'pretty': True
}

response = requests.get(
  PDL_URL,
  headers=H,
  params=P
).json()

if response["status"] == 200:
  data = response['data']
  with open("my_pdl_search.jsonl", "w") as out:
    for record in data:
      out.write(json.dumps(record) + "\n")
  print(f"successfully grabbed {len(data)} records from pdl")
  print(f"{response['total']} total pdl records exist matching this query")
else:
  print("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
  print("error:", response)

Using POST Requests

"I would like to use POST requests to query instead of GET requests so that I can make queries with a lot of parameters".

📘

Difference between GET and POST requests

See this for a comparison of the differences between GET and POST requests. The biggest difference is that POST requests don't have any limit on the amount of data that you can pass in a request.

import requests, json
API_KEY = # YOUR API KEY

PDL_URL = "https://api.peopledatalabs.com/v5/company/search"

H = {
  'Content-Type': "application/json",
  'X-api-key': API_KEY
}

ES_QUERY = {
  "query": {
    "bool": {
      "must": [
        {"term": {"website": "google.com"}},
      ]
    }
  }
}

P = {
  'query': ES_QUERY, # This is different from using GET requests
  'size': 10,
  'pretty': True
}

response = requests.post( # Using POST method
  PDL_URL,
  headers=H,
  json=P # Passing the data directly as a JSON object
#  data=json.dumps(P) # This is an alternative way of passing data using a string
).json()

if response["status"] == 200:
  data = response['data']
  with open("my_pdl_search.jsonl", "w") as out:
    for record in data:
      out.write(json.dumps(record) + "\n")
  print(f"successfully grabbed {len(data)} records from pdl")
  print(f"{response['total']} total pdl records exist matching this query")
else:
  print("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
  print("Error:", response)
import requests, json
API_KEY = # YOUR API KEY

PDL_URL = "https://api.peopledatalabs.com/v5/company/search"

H = {
  'Content-Type': "application/json",
  'X-api-key': API_KEY
}

SQL_QUERY = \
"""
  SELECT * FROM company
  WHERE website='google.com';
 """

P = {
  'sql': SQL_QUERY,
  'size': 10,
  'pretty': True
}

response = requests.post( # Using POST method
  PDL_URL,
  headers=H,
  json=P # Passing the data directly as a JSON object
#  data=json.dumps(P) # This is an alternative way of passing data using a string
).json()

if response["status"] == 200:
  data = response['data']
  with open("my_pdl_search.jsonl", "w") as out:
    for record in data:
      out.write(json.dumps(record) + "\n")
  print(f"successfully grabbed {len(data)} records from pdl")
  print(f"{response['total']} total pdl records exist matching this query")
else:
  print("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
  print("Error:", response)
# Elasticsearch
curl -X POST 'https://api.peopledatalabs.com/v5/company/search' \
-H 'X-Api-Key: xxxx' \
--data-raw '{
  "size": 10,
  "query": {
    "bool": {
      "must": [
        {"term": {"website": "google.com"}}
      ]
    }
  }
}'

# SQL
curl -X POST \
  'https://api.peopledatalabs.com/v5/company/search' \
  -H 'X-Api-Key: xxxx' \
  --data-raw '{
    "size": 10,
    "sql": "SELECT * FROM company WHERE website='\''google.com'\'';"
}'

Company Search by Tags

"I want to find US-based companies tagged as "big data" in the financial services industry."

import json

# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY

# Create a client, specifying an API key
client = PDLPY(
    api_key="YOUR API KEY",
)

ES_QUERY = {
  "query": {
    "bool": {
      "must": [
        {"term": {"tags": "big data"}},
        {"term": {"industry": "financial services"}},
        {"term": {"location.country": "united states"}}
      ]
    }
  }
}

P = {
  'query': ES_QUERY,
  'size': 10,
  'pretty': True
}

response = client.company.search(**P).json()

if response["status"] == 200:
  data = response['data']
  with open("my_pdl_search.jsonl", "w") as out:
    for record in data:
      out.write(json.dumps(record) + "\n")
  print(f"successfully grabbed {len(data)} records from pdl")
  print(f"{response['total']} total pdl records exist matching this query")
else:
  print("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
  print("Error:", response)
import json

# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY

# Create a client, specifying an API key
client = PDLPY(
    api_key="YOUR API KEY",
)

SQL_QUERY = \
"""
  SELECT * FROM company
  WHERE tags='big data'
  AND industry='financial services'
  AND location.country='united states';
 """

P = {
  'sql': SQL_QUERY,
  'size': 10,
  'pretty': True
}

response = client.company.search(**P).json()

if response["status"] == 200:
  data = response['data']
  with open("my_pdl_search.jsonl", "w") as out:
    for record in data:
      out.write(json.dumps(record) + "\n")
  print(f"successfully grabbed {len(data)} records from pdl")
  print(f"{response['total']} total pdl records exist matching this query")
else:
  print("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
  print("Error:", response)
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';

import fs from 'fs';

const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });

const esQuery = {
  query: {
    bool: {
      must:[
        {"term": {"tags": "big data"}},
        {"term": {"industry": "financial services"}},
        {"term": {"location.country": "united states"}}
      ]
    }
  }
}

const params = {
  searchQuery: esQuery, 
  size: 10,
  pretty: true
}

PDLJSClient.company.search.elastic(params).then((data) => {
    fs.writeFile("my_pdl_search.jsonl", Buffer.from(JSON.stringify(data.data)), (err) => {
        if (err) throw err;
    });
    console.log(`successfully grabbed ${data.data.length} records from pdl`);
    console.log(`${data["total"]} total pdl records exist matching this query`)
}).catch((error) => {
    console.log("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
    console.log(error);
});
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';

import fs from 'fs';

const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });

const sqlQuery = `SELECT * FROM company
                    WHERE tags='big data'
                    AND industry='financial services'
                    AND location.country='united states';`;

var params = {
    searchQuery: sqlQuery, 
    size: 10,
    pretty: true
}

PDLJSClient.company.search.sql(params).then((data) => {
    fs.writeFile("my_pdl_search.jsonl", Buffer.from(JSON.stringify(data.data)), (err) => {
        if (err) throw err;
    });
    console.log(`successfully grabbed ${data.data.length} records from pdl`);
    console.log(`${data["total"]} total pdl records exist matching this query`)
}).catch((error) => {
    console.log("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
    console.log(error);
});
require 'json'

# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'

Peopledatalabs.api_key = 'YOUR API KEY'

ES_QUERY = {
  "query": {
    "bool": {
      "must": [
        {"term": {"tags": "big data"}},
        {"term": {"industry": "financial services"}},
        {"term": {"location.country": "united states"}}
      ]
    }
  }
}

response = Peopledatalabs::Search.company(searchType: 'elastic', query: ES_QUERY, size: 10, pretty: true)

if response['status'] == 200
    data = response['data']
    File.open("my_pdl_search.jsonl", "w") do |out|
        data.each { |record| out.write(JSON.dump(record) + "\n") }
    end
    puts "successfully grabbed #{data.length()} records from pdl"
    puts "#{response['total']} total pdl records exist matching this query"
else
    puts "NOTE. The carrier pigeons lost motivation in flight. See error and try again."
    puts "Error: #{response}"
end
require 'json'

# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'

Peopledatalabs.api_key = 'YOUR API KEY'

SQL_QUERY = \
"""
  SELECT * FROM company
  WHERE tags='big data'
  AND industry='financial services'
  AND location.country='united states';
 """

response = Peopledatalabs::Search.company(searchType: 'sql', query: SQL_QUERY, size: 10, pretty: true)

if response['status'] == 200
    data = response['data']
    File.open("my_pdl_search.jsonl", "w") do |out|
        data.each { |record| out.write(JSON.dump(record) + "\n") }
    end
    puts "successfully grabbed #{data.length()} records from pdl"
    puts "#{response['total']} total pdl records exist matching this query"
else
    puts "NOTE. The carrier pigeons lost motivation in flight. See error and try again."
    puts "Error: #{response}"
end
package main

import(
    "fmt"
    "os"
    "encoding/json"
)

import (
    pdl "github.com/peopledatalabs/peopledatalabs-go"
    pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)

func main() {
    apiKey := "YOUR API KEY"
    // Set API KEY as env variable
    // apiKey := os.Getenv("API_KEY")

    client := pdl.New(apiKey)

    elasticSearchQuery := map[string]interface{} {
        "query": map[string]interface{} {
            "bool": map[string]interface{} {
                "must": []map[string]interface{} {
                    {"term": map[string]interface{}{"tags": "big data"}},
                    {"term": map[string]interface{}{"industry": "financial services"}},
                    {"term": map[string]interface{}{"location.country": "united states"}},
                },
            },
        },
    }

    params := pdlmodel.SearchParams {
        BaseParams: pdlmodel.BaseParams {
            Size: 10,
            Pretty: true,
        },
        SearchBaseParams: pdlmodel.SearchBaseParams {
            Query: elasticSearchQuery,
        },
    }
    
    response, err := client.Company.Search(params)
    if err == nil {
        data := response.Data
        out, outErr := os.Create("my_pdl_search.jsonl")
        defer out.Close()
        if (outErr == nil) {
            for i := range data {
                record, jsonErr := json.Marshal(data[i])
                if (jsonErr == nil) {
                    out.WriteString(string(record) + "\n")
                }
            }
            out.Sync()
        }
        fmt.Printf("successfully grabbed %d records from pdl\n", len(data))
        fmt.Printf("%d total pdl records exist matching this query\n", response.Total)
    } else {
        fmt.Println("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
        fmt.Println("Error:", err)
    } 
}
package main

import(
    "fmt"
    "os"
    "encoding/json"
)

import (
    pdl "github.com/peopledatalabs/peopledatalabs-go"
    pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)

func main() {
    apiKey := "YOUR API KEY"
    // Set API KEY as env variable
    // apiKey := os.Getenv("API_KEY")

    client := pdl.New(apiKey)

    sqlQuery := "SELECT * FROM company" +
        " WHERE tags='big data'" +
        " AND industry='financial services'" +
        " AND location.country='united states';"

    params := pdlmodel.SearchParams {
        BaseParams: pdlmodel.BaseParams {
            Size: 10,
            Pretty: true,
        },
        SearchBaseParams: pdlmodel.SearchBaseParams {
            SQL: sqlQuery,
        },
    }
    
    response, err := client.Company.Search(params)
    if err == nil {
        data := response.Data
        out, outErr := os.Create("my_pdl_search.jsonl")
        defer out.Close()
        if (outErr == nil) {
            for i := range data {
                record, jsonErr := json.Marshal(data[i])
                if (jsonErr == nil) {
                    out.WriteString(string(record) + "\n")
                }
            }
            out.Sync()
        }
        fmt.Printf("successfully grabbed %d records from pdl\n", len(data))
        fmt.Printf("%d total pdl records exist matching this query\n", response.Total)
    } else {
        fmt.Println("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
        fmt.Println("Error:", err)
    } 
}
import requests, json
API_KEY = # YOUR API KEY

PDL_URL = "https://api.peopledatalabs.com/v5/company/search"

H = {
  'Content-Type': "application/json",
  'X-api-key': API_KEY
}

ES_QUERY = {
  "query": {
    "bool": {
      "must": [
        {"term": {"tags": "big data"}},
        {"term": {"industry": "financial services"}},
        {"term": {"location.country": "united states"}}
      ]
    }
  }
}

P = {
  'query': json.dumps(ES_QUERY),
  'size': 10,
  'pretty': True
}

response = requests.get(
  PDL_URL,
  headers=H,
  params=P
).json()

if response["status"] == 200:
  data = response['data']
  with open("my_pdl_search.jsonl", "w") as out:
    for record in data:
      out.write(json.dumps(record) + "\n")
  print(f"successfully grabbed {len(data)} records from pdl")
  print(f"{response['total']} total pdl records exist matching this query")
else:
  print("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
  print("Error:", response)
import requests, json
API_KEY = # YOUR API KEY

PDL_URL = "https://api.peopledatalabs.com/v5/company/search"

H = {
  'Content-Type': "application/json",
  'X-api-key': API_KEY
}

SQL_QUERY = \
"""
  SELECT * FROM company
  WHERE tags='big data'
  AND industry='financial services'
  AND location.country='united states';
 """

P = {
  'sql': SQL_QUERY,
  'size': 10,
  'pretty': True
}

response = requests.get(
  PDL_URL,
  headers=H,
  params=P
).json()

if response["status"] == 200:
  data = response['data']
  with open("my_pdl_search.jsonl", "w") as out:
    for record in data:
      out.write(json.dumps(record) + "\n")
  print(f"successfully grabbed {len(data)} records from pdl")
  print(f"{response['total']} total pdl records exist matching this query")
else:
  print("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
  print("Error:", response)

Sales and Marketing

"I want to find companies offering account-based marketing services in the United States."

import json

# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY

# Create a client, specifying an API key
client = PDLPY(
    api_key="YOUR API KEY",
)

ES_QUERY = {
  "query": {
    "bool": {
      "must": [
        {"match": {"summary": "account based marketing"}},
        {"term": {"location.country" : "united states"}}
      ]
    }
  }
}

P = {
  'query': ES_QUERY,
  'size': 100
}

response = client.company.search(**P).json()

if response["status"] == 200:
  
  data = response['data']
  
  with open("my_pdl_search.jsonl", "w") as out:
    for record in data:
      out.write(json.dumps(record) + "\n")
  
  print(f"successfully grabbed {len(data)} records from pdl")
  print(f"{response['total']} total pdl records exist matching this query")
else:
  print("NOTE. The eager beaver was not so eager. See error and try again.")
  print("error:", response)
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';

import fs from 'fs';

const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });

const esQuery = {
  query: {
    bool: {
      must:[
        {"match": {"summary": "account based marketing"}},
        {"term": {"location.country" : "united states"}}
      ]
    }
  }
}

const params = {
  searchQuery: esQuery, 
  size: 100,
}

PDLJSClient.company.search.elastic(params).then((data) => {
    fs.writeFile("my_pdl_search.jsonl", Buffer.from(JSON.stringify(data.data)), (err) => {
        if (err) throw err;
    });
    console.log(`successfully grabbed ${data.data.length} records from pdl`);
    console.log(`${data["total"]} total pdl records exist matching this query`)
}).catch((error) => {
    console.log("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
    console.log(error);
});
require 'json'

# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'

Peopledatalabs.api_key = 'YOUR API KEY'

ES_QUERY = {
  "query": {
    "bool": {
      "must": [
        {"match": {"summary": "account based marketing"}},
        {"term": {"location.country": "united states"}}
      ]
    }
  }
}

response = Peopledatalabs::Search.company(searchType: 'elastic', query: ES_QUERY, size: 100, pretty: true)

if response['status'] == 200
    data = response['data']
    File.open("my_pdl_search.jsonl", "w") do |out|
        data.each { |record| out.write(JSON.dump(record) + "\n") }
    end
    puts "successfully grabbed #{data.length()} records from pdl"
    puts "#{response['total']} total pdl records exist matching this query"
else
    puts "NOTE. The carrier pigeons lost motivation in flight. See error and try again."
    puts "Error: #{response}"
end
package main

import(
    "fmt"
    "os"
    "encoding/json"
)

import (
    pdl "github.com/peopledatalabs/peopledatalabs-go"
    pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)

func main() {
    apiKey := "YOUR API KEY"
    // Set API KEY as env variable
    // apiKey := os.Getenv("API_KEY")

    client := pdl.New(apiKey)

    elasticSearchQuery := map[string]interface{} {
        "query": map[string]interface{} {
            "bool": map[string]interface{} {
                "must": []map[string]interface{} {
                    {"match": map[string]interface{}{"summary": "account based marketing"}},
                    {"term": map[string]interface{}{"location.country": "united states"}},
                },
            },
        },
    }

    params := pdlmodel.SearchParams {
        BaseParams: pdlmodel.BaseParams {
            Size: 10,
        },
        SearchBaseParams: pdlmodel.SearchBaseParams {
            Query: elasticSearchQuery,
        },
    }
    
    response, err := client.Company.Search(params)
    if err == nil {
        data := response.Data
        out, outErr := os.Create("my_pdl_search.jsonl")
        defer out.Close()
        if (outErr == nil) {
            for i := range data {
                record, jsonErr := json.Marshal(data[i])
                if (jsonErr == nil) {
                    out.WriteString(string(record) + "\n")
                }
            }
            out.Sync()
        }
        fmt.Printf("successfully grabbed %d records from pdl\n", len(data))
        fmt.Printf("%d total pdl records exist matching this query\n", response.Total)
    } else {
        fmt.Println("NOTE. The eager beaver was not so eager. See error and try again.")
        fmt.Println("Error:", err)
    } 
}
import requests, json
API_KEY = #YOUR API KEY

PDL_URL = "https://api.peopledatalabs.com/v5/company/search"

H = {
  'Content-Type': "application/json",
  'X-api-key': API_KEY
}

ES_QUERY = {
  "query": {
    "bool": {
      "must": [
        {"match": {"summary": "account based marketing"}},
        {"term": {"location.country" : "united states"}}
      ]
    }
  }
}

P = {
  'query': json.dumps(ES_QUERY),
  'size': 100
}

response = requests.get(
  PDL_URL,
  headers=H,
  params=P
).json()

if response["status"] == 200:
  
  data = response['data']
  
  with open("my_pdl_search.jsonl", "w") as out:
    for record in data:
      out.write(json.dumps(record) + "\n")
  
  print(f"successfully grabbed {len(data)} records from pdl")
  print(f"{response['total']} total pdl records exist matching this query")
else:
  print("NOTE. The eager beaver was not so eager. See error and try again.")
  print("error:", response)

Investment Research

"I want to find 100 small biotech companies headquartered in the San Francisco area with under 50 employees ."

import json

# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY

# Create a client, specifying an API key
client = PDLPY(
    api_key="YOUR API KEY",
)

# https://pdl-prod-schema.s3-us-west-2.amazonaws.com/14.0/enums/industry.txt
# for enumerated possible values of industry

# https://pdl-prod-schema.s3-us-west-2.amazonaws.com/14.0/enums/job_company_size.txt
# for enumerated possible values of company sizes

ES_QUERY = {
  "query": {
    "bool": {
      "must": [
        {"terms": {"size": ["1-10", "11-50"]}},
        {"term": {"industry" : "biotechnology"}},
        {"term": {"location.locality": "san francisco"}}
      ]
    }
  }
}

P = {
    "query": ES_QUERY,
    "size": 100
}

response = client.company.search(**P).json()

if response["status"] == 200:
  
  data = response['data']
  
  with open("my_pdl_search.jsonl", "w") as out:
    for record in data:
      out.write(json.dumps(record) + "\n")
  
  print(f"successfully grabbed {len(data)} records from pdl")
  print(f"{response['total']} total pdl records exist matching this query")
else:
  print("NOTE. The eager beaver was not so eager. See error and try again.")
  print("error:", response)
import json

# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY

# Create a client, specifying an API key
client = PDLPY(
    api_key="YOUR API KEY",
)

# https://pdl-prod-schema.s3-us-west-2.amazonaws.com/14.0/enums/industry.txt
# for enumerated possible values of industry

# https://pdl-prod-schema.s3-us-west-2.amazonaws.com/14.0/enums/job_company_size.txt
# for enumerated possible values of company sizes

SQL_QUERY = \
f"""
  SELECT * FROM company
  WHERE size IN ('1-10', '11-50')
  AND industry = 'biotechnology'
  AND location.locality='san francisco';
"""

P = {
  'sql': SQL_QUERY,
  'size': 100
}

response = client.company.search(**P).json()

if response["status"] == 200:
  
  data = response['data']
  
  with open("my_pdl_search.jsonl", "w") as out:
    for record in data:
      out.write(json.dumps(record) + "\n")
  
  print(f"successfully grabbed {len(data)} records from pdl")
  print(f"{response['total']} total pdl records exist matching this query")
else:
  print("NOTE. The eager beaver was not so eager. See error and try again.")
  print("error:", response)
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';

import fs from 'fs';

const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });

const esQuery = {
  query: {
    bool: {
      must:[
        {"terms": {"size": ["1-10", "11-50"]}},
        {"term": {"industry" : "biotechnology"}},
        {"term": {"location.locality": "san francisco"}}
      ]
    }
  }
}

const params = {
  searchQuery: esQuery, 
  size: 100,
}

PDLJSClient.company.search.elastic(params).then((data) => {
    fs.writeFile("my_pdl_search.jsonl", Buffer.from(JSON.stringify(data.data)), (err) => {
        if (err) throw err;
    });
    console.log(`successfully grabbed ${data.data.length} records from pdl`);
    console.log(`${data["total"]} total pdl records exist matching this query`)
}).catch((error) => {
    console.log("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
    console.log(error);
});
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';

import fs from 'fs';

const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });

const sqlQuery = `SELECT * FROM company
                    WHERE size IN ('1-10', '11-50')
                    AND industry = 'biotechnology'
                    AND location.locality='san francisco';`;

var params = {
    searchQuery: sqlQuery, 
    size: 100
}

PDLJSClient.company.search.sql(params).then((data) => {
    fs.writeFile("my_pdl_search.jsonl", Buffer.from(JSON.stringify(data.data)), (err) => {
        if (err) throw err;
    });
    console.log(`successfully grabbed ${data.data.length} records from pdl`);
    console.log(`${data["total"]} total pdl records exist matching this query`)
}).catch((error) => {
    console.log("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
    console.log(error);
});
require 'json'

# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'

Peopledatalabs.api_key = 'YOUR API KEY'

# https://pdl-prod-schema.s3-us-west-2.amazonaws.com/14.0/enums/industry.txt
# for enumerated possible values of industry

# https://pdl-prod-schema.s3-us-west-2.amazonaws.com/14.0/enums/job_company_size.txt
# for enumerated possible values of company sizes

ES_QUERY = {
  "query": {
    "bool": {
      "must": [
        {"terms": {"size": ["1-10", "11-50"]}},
        {"term": {"industry": "biotechnology"}},
        {"term": {"location.locality": "san francisco"}}
      ]
    }
  }
}

response = Peopledatalabs::Search.company(searchType: 'elastic', query: ES_QUERY, size: 100, pretty: true)

if response['status'] == 200
    data = response['data']
    File.open("my_pdl_search.jsonl", "w") do |out|
        data.each { |record| out.write(JSON.dump(record) + "\n") }
    end
    puts "successfully grabbed #{data.length()} records from pdl"
    puts "#{response['total']} total pdl records exist matching this query"
else
    puts "NOTE. The carrier pigeons lost motivation in flight. See error and try again."
    puts "Error: #{response}"
end
require 'json'

# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'

Peopledatalabs.api_key = 'YOUR API KEY'

# https://pdl-prod-schema.s3-us-west-2.amazonaws.com/14.0/enums/industry.txt
# for enumerated possible values of industry

# https://pdl-prod-schema.s3-us-west-2.amazonaws.com/14.0/enums/job_company_size.txt
# for enumerated possible values of company sizes

SQL_QUERY = \
"""
  SELECT * FROM company
  WHERE size IN ('1-10', '11-50')
  AND industry = 'biotechnology'
  AND location.locality='san francisco';
"""

response = Peopledatalabs::Search.company(searchType: 'sql', query: SQL_QUERY, size: 100, pretty: true)

if response['status'] == 200
    data = response['data']
    File.open("my_pdl_search.jsonl", "w") do |out|
        data.each { |record| out.write(JSON.dump(record) + "\n") }
    end
    puts "successfully grabbed #{data.length()} records from pdl"
    puts "#{response['total']} total pdl records exist matching this query"
else
    puts "NOTE. The carrier pigeons lost motivation in flight. See error and try again."
    puts "Error: #{response}"
end
package main

import(
    "fmt"
    "os"
    "encoding/json"
)

import (
    pdl "github.com/peopledatalabs/peopledatalabs-go"
    pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)

func main() {
    apiKey := "YOUR API KEY"
    // Set API KEY as env variable
    // apiKey := os.Getenv("API_KEY")

    client := pdl.New(apiKey)

    // https://pdl-prod-schema.s3-us-west-2.amazonaws.com/14.0/enums/industry.txt
    // for enumerated possible values of industry

    // https://pdl-prod-schema.s3-us-west-2.amazonaws.com/14.0/enums/job_company_size.txt
    // for enumerated possible values of company sizes

    elasticSearchQuery := map[string]interface{} {
        "query": map[string]interface{} {
            "bool": map[string]interface{} {
                "must": []map[string]interface{} {
                    {"terms": map[string]interface{}{"size": []string{"1-10", "11-50"}}},
                    {"term": map[string]interface{}{"industry": "biotechnology"}},
                    {"term": map[string]interface{}{"location.locality": "san francisco"}},
                },
            },
        },
    }

    params := pdlmodel.SearchParams {
        BaseParams: pdlmodel.BaseParams {
            Size: 100,
        },
        SearchBaseParams: pdlmodel.SearchBaseParams {
            Query: elasticSearchQuery,
        },
    }
    
    response, err := client.Company.Search(params)
    if err == nil {
        data := response.Data
        out, outErr := os.Create("my_pdl_search.jsonl")
        defer out.Close()
        if (outErr == nil) {
            for i := range data {
                record, jsonErr := json.Marshal(data[i])
                if (jsonErr == nil) {
                    out.WriteString(string(record) + "\n")
                }
            }
            out.Sync()
        }
        fmt.Printf("successfully grabbed %d records from pdl\n", len(data))
        fmt.Printf("%d total pdl records exist matching this query\n", response.Total)
    } else {
        fmt.Println("NOTE. The eager beaver was not so eager. See error and try again.")
        fmt.Println("Error:", err)
    } 
}
package main

import(
    "fmt"
    "os"
    "encoding/json"
)

import (
    pdl "github.com/peopledatalabs/peopledatalabs-go"
    pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)

func main() {
    apiKey := "YOUR API KEY"
    // Set API KEY as env variable
    // apiKey := os.Getenv("API_KEY")

    client := pdl.New(apiKey)

    // https://pdl-prod-schema.s3-us-west-2.amazonaws.com/14.0/enums/industry.txt
    // for enumerated possible values of industry

    // https://pdl-prod-schema.s3-us-west-2.amazonaws.com/14.0/enums/job_company_size.txt
    // for enumerated possible values of company sizes

    sqlQuery := "SELECT * FROM company" +
        " WHERE size IN ('1-10', '11-50')" +
        " AND industry = 'biotechnology'" +
        " AND location.locality='san francisco';"

    params := pdlmodel.SearchParams {
        BaseParams: pdlmodel.BaseParams {
            Size: 100,
            Pretty: true,
        },
        SearchBaseParams: pdlmodel.SearchBaseParams {
            SQL: sqlQuery,
        },
    }
    
    response, err := client.Company.Search(params)
    if err == nil {
        data := response.Data
        out, outErr := os.Create("my_pdl_search.jsonl")
        defer out.Close()
        if (outErr == nil) {
            for i := range data {
                record, jsonErr := json.Marshal(data[i])
                if (jsonErr == nil) {
                    out.WriteString(string(record) + "\n")
                }
            }
            out.Sync()
        }
        fmt.Printf("successfully grabbed %d records from pdl\n", len(data))
        fmt.Printf("%d total pdl records exist matching this query\n", response.Total)
    } else {
        fmt.Println("NOTE. The eager beaver was not so eager. See error and try again.")
        fmt.Println("Error:", err)
    } 
}
import requests, json
API_KEY = #YOUR API KEY

PDL_URL = "https://api.peopledatalabs.com/v5/company/search"

H = {
"Content-Type": "application/json",
"X-api-key": API_KEY
}

# https://pdl-prod-schema.s3-us-west-2.amazonaws.com/14.0/enums/industry.txt
# for enumerated possible values of industry

# https://pdl-prod-schema.s3-us-west-2.amazonaws.com/14.0/enums/job_company_size.txt
# for enumerated possible values of company sizes

SQL_QUERY = \
f"""
  SELECT * FROM company
  WHERE size IN ('1-10', '11-50')
  AND industry = 'biotechnology'
  AND location.locality='san francisco';
"""

P = {
  'sql': SQL_QUERY,
  'size': 100
}

response = requests.get(
  PDL_URL,
  headers=H,
  params=P
).json()

if response["status"] == 200:
  
  data = response['data']
  
  with open("my_pdl_search.jsonl", "w") as out:
    for record in data:
      out.write(json.dumps(record) + "\n")

  print(f"successfully grabbed {len(response['data'])} records from pdl")
  print(f"{response['total']} total pdl records exist matching this query")
else:
  print("NOTE. The eager beaver was not so eager. See error and try again.")
  print("error:", response)
import requests, json
API_KEY = #YOUR API KEY

PDL_URL = "https://api.peopledatalabs.com/v5/company/search"

H = {
"Content-Type": "application/json",
"X-api-key": API_KEY
}

# https://pdl-prod-schema.s3-us-west-2.amazonaws.com/14.0/enums/industry.txt
# for enumerated possible values of industry

# https://pdl-prod-schema.s3-us-west-2.amazonaws.com/14.0/enums/job_company_size.txt
# for enumerated possible values of company sizes

ES_QUERY = {
  "query": {
    "bool": {
      "must": [
        {"terms": {"size": ["1-10", "11-50"]}},
        {"term": {"industry" : "biotechnology"}},
        {"term": {"location.locality": "san francisco"}}
      ]
    }
  }
}

P = {
    "query": json.dumps(ES_QUERY),
    "size": 100
}

response = requests.get(
    PDL_URL,
    headers=H,
    params=P
).json()

if response["status"] == 200:
  
  data = response['data']
  
  with open("my_pdl_search.jsonl", "w") as out:
    for record in data:
      out.write(json.dumps(record) + "\n")

  print(f"successfully grabbed {len(response['data'])} records from pdl")
  print(f"{response['total']} total pdl records exist matching this query")
else:
  print("NOTE. The eager beaver was not so eager. See error and try again.")
  print("error:", response)

Bulk Retrieval

"I want to find all "automotive" companies in the Detroit area and save them to a CSV file."

🚧

High Credit Usage Code Below

The code example below illustrates pulling all the company profiles in a metro, and is meant primarily for demonstrating the use of the scroll_token parameter when retrieving large amounts of records. As a result, this code mostly illustrative meaning it can use up a lot of credits, and doesn't have any error handling. The MAX_NUM_RECORDS_LIMIT parameter in the example below sets the maximum number of profiles (e.g. credits) that will be pulled, so please set that accordingly when testing this example.

import json, time, csv

# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY

# Create a client, specifying an API key
client = PDLPY(
    api_key="YOUR API KEY",
)

# Limit the number of records to pull (to prevent accidentally using up 
# more credits than expected when testing out this code).
MAX_NUM_RECORDS_LIMIT = 150 # The maximum number of records to retrieve
USE_MAX_NUM_RECORDS_LIMIT = True # Set to False to pull all available records

ES_QUERY = {
  'query': {
    'bool': {
      'must': [
        {'term': {'industry': "automotive"}},
        {'term': {'location.metro': "detroit, michigan"}}
      ]
    }
  }
}

P = {
  'query': ES_QUERY,
  'size': 100,
  'pretty': True
}

# Pull all results in multiple batches
batch = 1
all_records = []
start_time = time.time()
found_all_records = False
continue_scrolling = True

while continue_scrolling and not found_all_records: 

  # Check if we reached the maximum number of records we wanted to pull
  if USE_MAX_NUM_RECORDS_LIMIT:
    num_records_to_request = MAX_NUM_RECORDS_LIMIT - len(all_records)
    P['size'] = max(0, min(100, num_records_to_request))
    if num_records_to_request == 0:
      print(f"Stopping - reached maximum number of records to pull "
            f"[MAX_NUM_RECORDS_LIMIT = {MAX_NUM_RECORDS_LIMIT}]")
      break

  # Send Response
  response = client.company.search(**P).json()

  # Check response status code:
  if response['status'] == 200:
    all_records.extend(response['data'])
    print(f"Retrieved {len(response['data'])} records in batch {batch} "
          f"- {response['total'] - len(all_records)} records remaining")
  else:
    print(f"Error retrieving some records:\n\t"
          f"[{response['status']} - {response['error']['type']}] "
          f"{response['error']['message']}")
  
  # Get scroll_token from response
  if 'scroll_token' in response:
    P['scroll_token'] = response['scroll_token']
  else:
    continue_scrolling = False
    print(f"Unable to continue scrolling")

  batch += 1
  found_all_records = (len(all_records) == response['total'])
  time.sleep(6) # avoid hitting rate limit thresholds
 
end_time = time.time()
runtime = end_time - start_time
        
print(f"Successfully recovered {len(all_records)} profiles in "
      f"{batch} batches [{round(runtime, 2)} seconds]")

# Save profiles to csv (utility function)
def save_profiles_to_csv(profiles, filename, fields=[], delim=','):
  # Define header fields
  if fields == [] and len(profiles) > 0:
      fields = profiles[0].keys()
  # Write csv file
  with open(filename, 'w') as csvfile:
    writer = csv.writer(csvfile, delimiter=delim)
    # Write Header:
    writer.writerow(fields)
    # Write Body:
    count = 0
    for profile in profiles:
      writer.writerow([ profile[field] for field in fields ])
      count += 1
  print(f"Wrote {count} lines to: '{filename}'")

# Use utility function to save profiles to csv    
csv_header_fields = ['name', 'website', "linkedin_url",
                     'size', 'tags']
csv_filename = "all_company_profiles.csv"
save_profiles_to_csv(all_records, csv_filename, csv_header_fields)
import json, time, csv

# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY

# Create a client, specifying an API key
client = PDLPY(
    api_key="YOUR API KEY",
)

# Limit the number of records to pull (to prevent accidentally using up 
# more credits than expected when testing out this code).
MAX_NUM_RECORDS_LIMIT = 150 # The maximum number of records to retrieve
USE_MAX_NUM_RECORDS_LIMIT = True # Set to False to pull all available records

SQL_QUERY = \
f"""
  SELECT * FROM company
  WHERE industry = 'automotive'
  AND location.metro='detroit, michigan';
"""

P = {
  'sql': SQL_QUERY,
  'size': 100,
  'pretty': True
}

# Pull all results in multiple batches
batch = 1
all_records = []
start_time = time.time()
found_all_records = False
continue_scrolling = True

while continue_scrolling and not found_all_records: 

  # Check if we reached the maximum number of records we wanted to pull
  if USE_MAX_NUM_RECORDS_LIMIT:
    num_records_to_request = MAX_NUM_RECORDS_LIMIT - len(all_records)
    P['size'] = max(0, min(100, num_records_to_request))
    if num_records_to_request == 0:
      print(f"Stopping - reached maximum number of records to pull "
            f"[MAX_NUM_RECORDS_LIMIT = {MAX_NUM_RECORDS_LIMIT}]")
      break

  # Send Response
  response = client.company.search(**P).json()

  # Check response status code:
  if response['status'] == 200:
    all_records.extend(response['data'])
    print(f"Retrieved {len(response['data'])} records in batch {batch} "
          f"- {response['total'] - len(all_records)} records remaining")
  else:
    print(f"Error retrieving some records:\n\t"
          f"[{response['status']} - {response['error']['type']}] "
          f"{response['error']['message']}")
  
  # Get scroll_token from response
  if 'scroll_token' in response:
    P['scroll_token'] = response['scroll_token']
  else:
    continue_scrolling = False
    print(f"Unable to continue scrolling")

  batch += 1
  found_all_records = (len(all_records) == response['total'])
  time.sleep(6) # avoid hitting rate limit thresholds
 
end_time = time.time()
runtime = end_time - start_time
        
print(f"Successfully recovered {len(all_records)} profiles in "
      f"{batch} batches [{round(runtime, 2)} seconds]")

# Save profiles to csv (utility function)
def save_profiles_to_csv(profiles, filename, fields=[], delim=','):
  # Define header fields
  if fields == [] and len(profiles) > 0:
      fields = profiles[0].keys()
  # Write csv file
  with open(filename, 'w') as csvfile:
    writer = csv.writer(csvfile, delimiter=delim)
    # Write Header:
    writer.writerow(fields)
    # Write Body:
    count = 0
    for profile in profiles:
      writer.writerow([ profile[field] for field in fields ])
      count += 1
  print(f"Wrote {count} lines to: '{filename}'")

# Use utility function to save profiles to csv    
csv_header_fields = ['name', 'website', "linkedin_url",
                     'size', 'tags']
csv_filename = "all_company_profiles.csv"
save_profiles_to_csv(all_records, csv_filename, csv_header_fields)
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';

// See https://www.npmjs.com/package/csv-writer
import * as csvwriter from 'csv-writer';

const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });

// Limit the number of records to pull (to prevent accidentally using up 
// more credits than expected when testing out this code).
const maxNumRecordsLimit = 150;     // The maximum number of records to retrieve
const useMaxNumRecordsLimit = true; // Set to false to pull all available records

const esQuery = {
  query: {
    bool: {
      must:[
        {'term': {'industry': "automotive"}},
        {'term': {'location.metro': "detroit, michigan"}}
      ]
    }
  }
}

var params = {
  searchQuery: esQuery, 
  size: 100,
  scroll_token: null,
  pretty: true
}

// Pull all results in multiple batches
var batch = 1;

var allRecords = [];
var startTime = Date.now();
var foundAllRecords = false;
var continueScrolling = true;
var numRetrieved = 0;
var paramQueue = [];
var scrollToken = null;
var numRecordsToRequest = 100;

while (numRecordsToRequest > 0) { 

    // Check if we reached the maximum number of records we wanted to pull
    if (useMaxNumRecordsLimit) {
        numRecordsToRequest = maxNumRecordsLimit - numRetrieved;
        params.size = Math.max(0, Math.min(100, numRecordsToRequest));
        numRetrieved += params.size;
        // Add batch to the parameter queue
        if (params.size > 0) {       
            paramQueue.push(JSON.parse(JSON.stringify(params)));
        }
    } else {
        break;
    }
}

// Run initial batch
runBatch();

function runBatch() {
    // Get the parameters for the batch
    let currParams = useMaxNumRecordsLimit ? paramQueue[batch-1] : params;
    // Set the scroll_token from the previous batch
    currParams.scroll_token = scrollToken;
    batch++;
                
    PDLJSClient.company.search.elastic(currParams).then((data) => {
        Array.prototype.push.apply(allRecords, data.data);
            
        // Get the scroll_token
        if (data['scroll_token']) {
            scrollToken = data['scroll_token'];
        } else {
            continueScrolling = false;
            console.log("Unable to continue scrolling");
        }
            
        foundAllRecords = (allRecords.length == data['total']);
            
        console.log("Retrieved " + data.data.length + " records in batch " + (batch-1) +
            " - " + (data['total'] - allRecords.length) + " records remaining");
            
        // Run next batch, if any
        if (!foundAllRecords && (batch <= paramQueue.length || !useMaxNumRecordsLimit)) {
            runBatch();
        } else {
            console.log("Stopping - reached maximum number of records to pull [maxNumRecordsLimit = " +
                maxNumRecordsLimit + "]");  
                
            let endTime = Date.now();
            let runTime = endTime - startTime;
            console.log ("Successfully recovered " + allRecords.length + " profiles in " +
                (batch-1) + " batches [" + Math.round(runTime/1000) + " seconds]");
                
            // Output profiles to CSV
            let csvHeaderFields = [
                {id: "work_email", title: "work_email"},
                {id: "full_name", title: "full_name"}, 
                {id: "linkedin_url", title: "linkedin_url"},
                {id: "job_title", title: "job_title"},
                {id: "job_company_name", title: "job_company_name"}
            ];
            let csvFilename = "all_employee_profiles.csv";
            saveProfilesToCSV(allRecords, csvFilename, csvHeaderFields);           
        }
    }).catch((error) => {
        console.log(error);
    });

}

// Write out CSV file using csv-writer (https://www.npmjs.com/package/csv-writer) 
// $ npm i -s csv-writer
function saveProfilesToCSV(profiles, filename, fields) {

    const createCsvWriter = csvwriter.createObjectCsvWriter;
    const csvWriter = createCsvWriter({
        path: filename,
        header: fields
    });
    
    let data = [];
    for (let i = 0; i < profiles.length; i++) {
        let record = profiles[i];
        data[i] = {};
        for (let field in fields) {
            data[i][fields[field].id] = record[fields[field].id];    
        }
     }

    csvWriter
        .writeRecords(data)
        .then(()=> console.log('The CSV file was written successfully'));
}
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';

// See https://www.npmjs.com/package/csv-writer
import * as csvwriter from 'csv-writer';

const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });

// Limit the number of records to pull (to prevent accidentally using up 
// more credits than expected when testing out this code).
const maxNumRecordsLimit = 150;     // The maximum number of records to retrieve
const useMaxNumRecordsLimit = true; // Set to false to pull all available records

const sqlQuery = `SELECT * FROM company
                    WHERE industry = 'automotive'
                    AND location.metro='detroit, michigan';`;

var params = {
  searchQuery: sqlQuery, 
  size: 100,
  scroll_token: null,
  pretty: true
}

// Pull all results in multiple batches
var batch = 1;

var allRecords = [];
var startTime = Date.now();
var foundAllRecords = false;
var continueScrolling = true;
var numRetrieved = 0;
var paramQueue = [];
var scrollToken = null;
var numRecordsToRequest = 100;

while (numRecordsToRequest > 0) { 

    // Check if we reached the maximum number of records we wanted to pull
    if (useMaxNumRecordsLimit) {
        numRecordsToRequest = maxNumRecordsLimit - numRetrieved;
        params.size = Math.max(0, Math.min(100, numRecordsToRequest));
        numRetrieved += params.size;
        // Add batch to the parameter queue
        if (params.size > 0) {       
            paramQueue.push(JSON.parse(JSON.stringify(params)));
        }
    } else {
        break;
    }
}

// Run initial batch
runBatch();

function runBatch() {
    // Get the parameters for the batch
    let currParams = useMaxNumRecordsLimit ? paramQueue[batch-1] : params;
    // Set the scroll_token from the previous batch
    currParams.scroll_token = scrollToken;
    batch++;
                
    PDLJSClient.company.search.sql(currParams).then((data) => {
        Array.prototype.push.apply(allRecords, data.data);
            
        // Get the scroll_token
        if (data['scroll_token']) {
            scrollToken = data['scroll_token'];
        } else {
            continueScrolling = false;
            console.log("Unable to continue scrolling");
        }
            
        foundAllRecords = (allRecords.length == data['total']);
            
        console.log("Retrieved " + data.data.length + " records in batch " + (batch-1) +
            " - " + (data['total'] - allRecords.length) + " records remaining");
            
        // Run next batch, if any
        if (!foundAllRecords && (batch <= paramQueue.length || !useMaxNumRecordsLimit)) {
            runBatch();
        } else {
            console.log("Stopping - reached maximum number of records to pull [maxNumRecordsLimit = " +
                maxNumRecordsLimit + "]");  
                
            let endTime = Date.now();
            let runTime = endTime - startTime;
            console.log ("Successfully recovered " + allRecords.length + " profiles in " +
                (batch-1) + " batches [" + Math.round(runTime/1000) + " seconds]");
                
            // Output profiles to CSV
            let csvHeaderFields = [
                {id: "work_email", title: "work_email"},
                {id: "full_name", title: "full_name"}, 
                {id: "linkedin_url", title: "linkedin_url"},
                {id: "job_title", title: "job_title"},
                {id: "job_company_name", title: "job_company_name"}
            ];
            let csvFilename = "all_employee_profiles.csv";
            saveProfilesToCSV(allRecords, csvFilename, csvHeaderFields);           
        }
    }).catch((error) => {
        console.log(error);
    });

}

// Write out CSV file using csv-writer (https://www.npmjs.com/package/csv-writer)
// $ npm i -s csv-writer
function saveProfilesToCSV(profiles, filename, fields) {

    const createCsvWriter = csvwriter.createObjectCsvWriter;
    const csvWriter = createCsvWriter({
        path: filename,
        header: fields
    });
    
    let data = [];
    for (let i = 0; i < profiles.length; i++) {
        let record = profiles[i];
        data[i] = {};
        for (let field in fields) {
            data[i][fields[field].id] = record[fields[field].id];    
        }
     }

    csvWriter
        .writeRecords(data)
        .then(()=> console.log('The CSV file was written successfully'));
}
require 'json'
require 'csv'

# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'

Peopledatalabs.api_key = 'YOUR API KEY'

# Limit the number of records to pull (to prevent accidentally using up 
# more credits than expected when testing out this code).
MAX_NUM_RECORDS_LIMIT = 150 # The maximum number of records to retrieve
USE_MAX_NUM_RECORDS_LIMIT = true # Set to false to pull all available records

ES_QUERY = {
  'query': {
    'bool': {
      'must': [
        {'term': {'industry': "automotive"}},
        {'term': {'location.metro': "detroit, michigan"}}
      ]
    }
  }
}

# Pull all results in multiple batches
batch = 1
all_records = []
start_time = Time.now
found_all_records = false
continue_scrolling = true
scroll_token = {}

while continue_scrolling && !found_all_records do 

  # Check if we reached the maximum number of records we wanted to pull
  if USE_MAX_NUM_RECORDS_LIMIT
    num_records_to_request = MAX_NUM_RECORDS_LIMIT - all_records.length()
    size = [0, [100, num_records_to_request].min].max
    if num_records_to_request == 0
      puts "Stopping - reached maximum number of records to pull "
      puts "[MAX_NUM_RECORDS_LIMIT = #{MAX_NUM_RECORDS_LIMIT}]"
      break
    end
  end

  # Send Response
  response = Peopledatalabs::Search.company(searchType: 'elastic', query: ES_QUERY, size: size, scroll_token: scroll_token, pretty: true)

  # Check response status code:
  if response['status'] == 200
    all_records += response['data']
    puts "Retrieved #{response['data'].length()} records in batch #{batch} "
    puts  "- #{response['total'] - all_records.length()} records remaining"
  else
    puts "Error retrieving some records:\n\t"
    puts "[#{response['status']} - #{response['error']['type']}] "
    puts response['error']['message']
  end
  
  # Get scroll_token from response
  if response.key?('scroll_token')
    scroll_token = response['scroll_token']
  else
    continue_scrolling = false
    puts "Unable to continue scrolling"
  end

  batch += 1
  found_all_records = (all_records.length() == response['total'])
  sleep(6) # avoid hitting rate limit thresholds
end

end_time = Time.now
runtime = end_time - start_time
        
puts "Successfully recovered #{all_records.length()} profiles in "
puts "#{batch} batches [#{runtime.round(2)} seconds]"

# Save profiles to csv (utility function)
def save_profiles_to_csv(profiles, filename, fields=[], delim=',')
  # Define header fields
  if fields == [] && profiles.length() > 0
      fields = profiles[0].keys
  end
    
  count = 0
  # Write csv file
  CSV.open(filename, 'w') do |writer|
    # Write Header:
    writer << fields
    # Write Body:
    profiles.each do |profile|
      record = []
      fields.each do |field| 
        record << profile[field]
        count += 1
      end
      writer << record
    end
  end
  puts "Wrote #{count} lines to: '#{filename}'"
end

# Use utility function to save profiles to csv    
csv_header_fields = ['name', 'website', 'linkedin_url',
                     'size', 'tags']
csv_filename = "all_company_profiles.csv"
save_profiles_to_csv(all_records, csv_filename, csv_header_fields)
require 'json'
require 'csv'

# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'

Peopledatalabs.api_key = 'YOUR API KEY'

# Limit the number of records to pull (to prevent accidentally using up 
# more credits than expected when testing out this code).
MAX_NUM_RECORDS_LIMIT = 150 # The maximum number of records to retrieve
USE_MAX_NUM_RECORDS_LIMIT = true # Set to false to pull all available records

SQL_QUERY = \
"""
  SELECT * FROM company
  WHERE industry = 'automotive'
  AND location.metro='detroit, michigan';
"""

# Pull all results in multiple batches
batch = 1
all_records = []
start_time = Time.now
found_all_records = false
continue_scrolling = true
scroll_token = {}

while continue_scrolling && !found_all_records do 

  # Check if we reached the maximum number of records we wanted to pull
  if USE_MAX_NUM_RECORDS_LIMIT
    num_records_to_request = MAX_NUM_RECORDS_LIMIT - all_records.length()
    size = [0, [100, num_records_to_request].min].max
    if num_records_to_request == 0
      puts "Stopping - reached maximum number of records to pull "
      puts "[MAX_NUM_RECORDS_LIMIT = #{MAX_NUM_RECORDS_LIMIT}]"
      break
    end
  end

  # Send Response
  response = Peopledatalabs::Search.company(searchType: 'sql', query: SQL_QUERY, size: size, scroll_token: scroll_token, pretty: true)

  # Check response status code:
  if response['status'] == 200
    all_records += response['data']
    puts "Retrieved #{response['data'].length()} records in batch #{batch} "
    puts  "- #{response['total'] - all_records.length()} records remaining"
  else
    puts "Error retrieving some records:\n\t"
    puts "[#{response['status']} - #{response['error']['type']}] "
    puts response['error']['message']
  end
  
  # Get scroll_token from response
  if response.key?('scroll_token')
    scroll_token = response['scroll_token']
  else
    continue_scrolling = false
    puts "Unable to continue scrolling"
  end

  batch += 1
  found_all_records = (all_records.length() == response['total'])
  sleep(6) # avoid hitting rate limit thresholds
end

end_time = Time.now
runtime = end_time - start_time
        
puts "Successfully recovered #{all_records.length()} profiles in "
puts "#{batch} batches [#{runtime.round(2)} seconds]"

# Save profiles to csv (utility function)
def save_profiles_to_csv(profiles, filename, fields=[], delim=',')
  # Define header fields
  if fields == [] && profiles.length() > 0
      fields = profiles[0].keys
  end
    
  count = 0
  # Write csv file
  CSV.open(filename, 'w') do |writer|
    # Write Header:
    writer << fields
    # Write Body:
    profiles.each do |profile|
      record = []
      fields.each do |field| 
        record << profile[field]
        count += 1
      end
      writer << record
    end
  end
  puts "Wrote #{count} lines to: '#{filename}'"
end

# Use utility function to save profiles to csv    
csv_header_fields = ['name', 'website', 'linkedin_url',
                     'size', 'tags']
csv_filename = "all_company_profiles.csv"
save_profiles_to_csv(all_records, csv_filename, csv_header_fields)
package main

import (
    "fmt"
    "time"
    "os"
    "math"
    "reflect"
    "encoding/json"
    "encoding/csv"
)

import (
    pdl "github.com/peopledatalabs/peopledatalabs-go"
    pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)

func main() {
    apiKey := "YOUR API KEY"
    // Set API KEY as env variable
    // apiKey := os.Getenv("API_KEY")

    client := pdl.New(apiKey)

    // Limit the number of records to pull (to prevent accidentally using up 
    // more credits than expected when testing out this code).
    const maxNumRecordsLimit = 150 // The maximum number of records to retrieve
    const useMaxNumRecordsLimit = true // Set to False to pull all available records

    elasticSearchQuery := map[string]interface{} {
        "query": map[string]interface{} {
            "bool": map[string]interface{} {
                "must": []map[string]interface{} {
                    {"term": map[string]interface{}{"industry": "automotive"}},
                    {"term": map[string]interface{}{"location.metro": "detroit, michigan"}},
                },
            },
        },
    }

    p := pdlmodel.SearchParams {
        BaseParams: pdlmodel.BaseParams {
            Size: 50,
            Pretty: true,
        },
        SearchBaseParams: pdlmodel.SearchBaseParams {
            Query: elasticSearchQuery,
        },
    }

    // Pull all results in multiple batches
    batch := 1
    var allRecords []pdlmodel.Company
    startTime := time.Now()
    foundAllRecords := false
    continueScrolling := true
    var numRecordsToRequest int
    
    for continueScrolling && !foundAllRecords {
        // Check if we reached the maximum number of records we wanted to pull
        if useMaxNumRecordsLimit {
            numRecordsToRequest = maxNumRecordsLimit - len(allRecords)
            p.BaseParams.Size = (int) (math.Max(0.0, math.Min(50.0, (float64) (numRecordsToRequest))))
            if numRecordsToRequest == 0 {
                fmt.Printf("Stopping - reached maximum number of records to pull " +
                           "[MAX_NUM_RECORDS_LIMIT = %d]\n", maxNumRecordsLimit)
                
                break
            }
        }
        
        // Send Response
        response, err := client.Company.Search(p)
        
        // Check response status code:
        if err == nil {
            fmt.Printf("Retrieved %d records in batch %d - %d records remaining\n", 
                       len(response.Data), batch, response.Total - len(allRecords))
        } else {
            fmt.Println("Error retrieving some records:\n\t",
                       err)
        }
        
        // Get scroll_token from response
        var data map[string]interface{}
        jsonResponse, jsonErr := json.Marshal(response)
        if jsonErr == nil {
            json.Unmarshal(jsonResponse, &data)
            if scrollToken, ok := data["scroll_token"]; ok {
                p.SearchBaseParams.ScrollToken = fmt.Sprintf("%v", scrollToken)
            } else {
                continueScrolling = false
                fmt.Println("Unable to continue scrolling")
            }
            allRecords = append(allRecords, response.Data...)
      }
        
        batch++
        foundAllRecords = (len(allRecords) == response.Total)
        time.Sleep(6 * time.Second) // avoid hitting rate limit thresholds
    }
    
    endTime := time.Now()
    runtime := endTime.Sub(startTime).Seconds()
        
    fmt.Printf("Successfully recovered %d profiles in %d batches [%d seconds]\n",
               len(allRecords), batch, (int) (math.Round((float64) (runtime))))
    
    // Use utility function to save profiles to csv    
    csvHeaderFields := []string{"name", "website", "linkedin_url",
                                "size", "tags"}
    csvFilename := "all_company_profiles.csv"
    saveProfilesToCsv(allRecords, csvFilename, csvHeaderFields, ",")
}

// Save profiles to csv (utility function)
func saveProfilesToCsv(profiles []pdlmodel.Company, filename string, fields []string, delim string) {
    // Define header fields
    if fields == nil && len(profiles) > 0 {
        e := reflect.ValueOf(&(profiles[0])).Elem()
        for i := 0; i < e.NumField(); i++ {
            fields = append(fields, e.Type().Field(i).Name)
        }
    }
    
    // Write csv file
    csvFile, err := os.Create(filename)
    if err == nil {
        csvwriter := csv.NewWriter(csvFile)
        defer csvwriter.Flush()
        // Write Header
        csvwriter.Write(fields)
        // Write Body:
        count := 0
        for i := range profiles {
            var data map[string]interface{}
            jsonResponse, jsonErr := json.Marshal(profiles[i])
            if jsonErr == nil {
                json.Unmarshal(jsonResponse, &data)
                var record []string
                for j := range fields {
                    record = append(record, fmt.Sprintf("%v", data[fields[j]]))
                }
                csvwriter.Write(record)
                count++
            }
        }
        fmt.Printf("Wrote %d lines to: %s\n", count, filename)
    }
}
package main

import (
    "fmt"
    "time"
    "os"
    "math"
    "reflect"
    "encoding/json"
    "encoding/csv"
)

import (
    pdl "github.com/peopledatalabs/peopledatalabs-go"
    pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)

func main() {
    apiKey := "YOUR API KEY"
    // Set API KEY as env variable
    // apiKey := os.Getenv("API_KEY")

    client := pdl.New(apiKey)

    // Limit the number of records to pull (to prevent accidentally using up 
    // more credits than expected when testing out this code).
    const maxNumRecordsLimit = 150 // The maximum number of records to retrieve
    const useMaxNumRecordsLimit = true // Set to False to pull all available records

    sqlQuery := "SELECT * FROM company" +
        " WHERE industry = 'automotive'" +
        " AND location.metro='detroit, michigan';"

    p := pdlmodel.SearchParams {
        BaseParams: pdlmodel.BaseParams {
            Size: 50,
            Pretty: true,
        },
        SearchBaseParams: pdlmodel.SearchBaseParams {
            SQL: sqlQuery,
        },
    }

    // Pull all results in multiple batches
    batch := 1
    var allRecords []pdlmodel.Company
    startTime := time.Now()
    foundAllRecords := false
    continueScrolling := true
    var numRecordsToRequest int
    
    for continueScrolling && !foundAllRecords {
        // Check if we reached the maximum number of records we wanted to pull
        if useMaxNumRecordsLimit {
            numRecordsToRequest = maxNumRecordsLimit - len(allRecords)
            p.BaseParams.Size = (int) (math.Max(0.0, math.Min(50.0, (float64) (numRecordsToRequest))))
            if numRecordsToRequest == 0 {
                fmt.Printf("Stopping - reached maximum number of records to pull " +
                           "[MAX_NUM_RECORDS_LIMIT = %d]\n", maxNumRecordsLimit)
                
                break
            }
        }
        
        // Send Response
        response, err := client.Company.Search(p)
        
        // Check response status code:
        if err == nil {
            fmt.Printf("Retrieved %d records in batch %d - %d records remaining\n", 
                       len(response.Data), batch, response.Total - len(allRecords))
        } else {
            fmt.Println("Error retrieving some records:\n\t",
                       err)
        }
        
        // Get scroll_token from response
        var data map[string]interface{}
        jsonResponse, jsonErr := json.Marshal(response)
        if jsonErr == nil {
            json.Unmarshal(jsonResponse, &data)
            if scrollToken, ok := data["scroll_token"]; ok {
                p.SearchBaseParams.ScrollToken = fmt.Sprintf("%v", scrollToken)
            } else {
                continueScrolling = false
                fmt.Println("Unable to continue scrolling")
            }
            allRecords = append(allRecords, response.Data...)
      }
        
        batch++
        foundAllRecords = (len(allRecords) == response.Total)
        time.Sleep(6 * time.Second) // avoid hitting rate limit thresholds
    }
    
    endTime := time.Now()
    runtime := endTime.Sub(startTime).Seconds()
        
    fmt.Printf("Successfully recovered %d profiles in %d batches [%d seconds]\n",
               len(allRecords), batch, (int) (math.Round((float64) (runtime))))
    
    // Use utility function to save profiles to csv    
    csvHeaderFields := []string{"name", "website", "linkedin_url",
                                "size", "tags"}
    csvFilename := "all_company_profiles.csv"
    saveProfilesToCsv(allRecords, csvFilename, csvHeaderFields, ",")
}

// Save profiles to csv (utility function)
func saveProfilesToCsv(profiles []pdlmodel.Company, filename string, fields []string, delim string) {
    // Define header fields
    if fields == nil && len(profiles) > 0 {
        e := reflect.ValueOf(&(profiles[0])).Elem()
        for i := 0; i < e.NumField(); i++ {
            fields = append(fields, e.Type().Field(i).Name)
        }
    }
    
    // Write csv file
    csvFile, err := os.Create(filename)
    if err == nil {
        csvwriter := csv.NewWriter(csvFile)
        defer csvwriter.Flush()
        // Write Header
        csvwriter.Write(fields)
        // Write Body:
        count := 0
        for i := range profiles {
            var data map[string]interface{}
            jsonResponse, jsonErr := json.Marshal(profiles[i])
            if jsonErr == nil {
                json.Unmarshal(jsonResponse, &data)
                var record []string
                for j := range fields {
                    record = append(record, fmt.Sprintf("%v", data[fields[j]]))
                }
                csvwriter.Write(record)
                count++
            }
        }
        fmt.Printf("Wrote %d lines to: %s\n", count, filename)
    }
}
import requests, json, time, csv

API_KEY = # ENTER YOUR API KEY

# Limit the number of records to pull (to prevent accidentally using up 
# more credits than expected when testing out this code).
MAX_NUM_RECORDS_LIMIT = 150 # The maximum number of records to retrieve
USE_MAX_NUM_RECORDS_LIMIT = True # Set to False to pull all available records

PDL_URL = "https://api.peopledatalabs.com/v5/company/search"

H = {
  'Content-Type': "application/json",
  'X-api-key': API_KEY
}

ES_QUERY = {
  'query': {
    'bool': {
      'must': [
        {'term': {'industry': "automotive"}},
        {'term': {'location.metro': "detroit, michigan"}}
      ]
    }
  }
}

P = {
  'query': json.dumps(ES_QUERY),
  'size': 100,
  'pretty': True
}

# Pull all results in multiple batches
batch = 1
all_records = []
start_time = time.time()
found_all_records = False
continue_scrolling = True

while continue_scrolling and not found_all_records: 

  # Check if we reached the maximum number of records we wanted to pull
  if USE_MAX_NUM_RECORDS_LIMIT:
    num_records_to_request = MAX_NUM_RECORDS_LIMIT - len(all_records)
    P['size'] = max(0, min(100, num_records_to_request))
    if num_records_to_request == 0:
      print(f"Stopping - reached maximum number of records to pull "
            f"[MAX_NUM_RECORDS_LIMIT = {MAX_NUM_RECORDS_LIMIT}]")
      break

  # Send Response
  response = requests.get(
    PDL_URL,
    headers=H,
    params=P
  ).json()

  # Check response status code:
  if response['status'] == 200:
    all_records.extend(response['data'])
    print(f"Retrieved {len(response['data'])} records in batch {batch} "
          f"- {response['total'] - len(all_records)} records remaining")
  else:
    print(f"Error retrieving some records:\n\t"
          f"[{response['status']} - {response['error']['type']}] "
          f"{response['error']['message']}")
  
  # Get scroll_token from response
  if 'scroll_token' in response:
    P['scroll_token'] = response['scroll_token']
  else:
    continue_scrolling = False
    print(f"Unable to continue scrolling")

  batch += 1
  found_all_records = (len(all_records) == response['total'])
  time.sleep(6) # avoid hitting rate limit thresholds
 
end_time = time.time()
runtime = end_time - start_time
        
print(f"Successfully recovered {len(all_records)} profiles in "
      f"{batch} batches [{round(runtime, 2)} seconds]")

# Save profiles to csv (utility function)
def save_profiles_to_csv(profiles, filename, fields=[], delim=','):
  # Define header fields
  if fields == [] and len(profiles) > 0:
      fields = profiles[0].keys()
  # Write csv file
  with open(filename, 'w') as csvfile:
    writer = csv.writer(csvfile, delimiter=delim)
    # Write Header:
    writer.writerow(fields)
    # Write Body:
    count = 0
    for profile in profiles:
      writer.writerow([ profile[field] for field in fields ])
      count += 1
  print(f"Wrote {count} lines to: '{filename}'")

# Use utility function to save profiles to csv    
csv_header_fields = ['name', 'website', "linkedin_url",
                     'size', 'tags']
csv_filename = "all_company_profiles.csv"
save_profiles_to_csv(all_records, csv_filename, csv_header_fields)
import requests, json, time, csv

API_KEY = # ENTER YOUR API KEY

# Limit the number of records to pull (to prevent accidentally using up 
# more credits than expected when testing out this code).
MAX_NUM_RECORDS_LIMIT = 150 # The maximum number of records to retrieve
USE_MAX_NUM_RECORDS_LIMIT = True # Set to False to pull all available records

PDL_URL = "https://api.peopledatalabs.com/v5/company/search"

H = {
  'Content-Type': "application/json",
  'X-api-key': API_KEY
}

SQL_QUERY = \
f"""
  SELECT * FROM company
  WHERE industry = 'automotive'
  AND location.metro='detroit, michigan';
"""

P = {
  'sql': SQL_QUERY,
  'size': 100,
  'pretty': True
}

# Pull all results in multiple batches
batch = 1
all_records = []
start_time = time.time()
found_all_records = False
continue_scrolling = True

while continue_scrolling and not found_all_records: 

  # Check if we reached the maximum number of records we wanted to pull
  if USE_MAX_NUM_RECORDS_LIMIT:
    num_records_to_request = MAX_NUM_RECORDS_LIMIT - len(all_records)
    P['size'] = max(0, min(100, num_records_to_request))
    if num_records_to_request == 0:
      print(f"Stopping - reached maximum number of records to pull "
            f"[MAX_NUM_RECORDS_LIMIT = {MAX_NUM_RECORDS_LIMIT}]")
      break

  # Send Response
  response = requests.get(
    PDL_URL,
    headers=H,
    params=P
  ).json()

  # Check response status code:
  if response['status'] == 200:
    all_records.extend(response['data'])
    print(f"Retrieved {len(response['data'])} records in batch {batch} "
          f"- {response['total'] - len(all_records)} records remaining")
  else:
    print(f"Error retrieving some records:\n\t"
          f"[{response['status']} - {response['error']['type']}] "
          f"{response['error']['message']}")
  
  # Get scroll_token from response
  if 'scroll_token' in response:
    P['scroll_token'] = response['scroll_token']
  else:
    continue_scrolling = False
    print(f"Unable to continue scrolling")

  batch += 1
  found_all_records = (len(all_records) == response['total'])
  time.sleep(6) # avoid hitting rate limit thresholds
 
end_time = time.time()
runtime = end_time - start_time
        
print(f"Successfully recovered {len(all_records)} profiles in "
      f"{batch} batches [{round(runtime, 2)} seconds]")

# Save profiles to csv (utility function)
def save_profiles_to_csv(profiles, filename, fields=[], delim=','):
  # Define header fields
  if fields == [] and len(profiles) > 0:
      fields = profiles[0].keys()
  # Write csv file
  with open(filename, 'w') as csvfile:
    writer = csv.writer(csvfile, delimiter=delim)
    # Write Header:
    writer.writerow(fields)
    # Write Body:
    count = 0
    for profile in profiles:
      writer.writerow([ profile[field] for field in fields ])
      count += 1
  print(f"Wrote {count} lines to: '{filename}'")

# Use utility function to save profiles to csv    
csv_header_fields = ['name', 'website', "linkedin_url",
                     'size', 'tags']
csv_filename = "all_company_profiles.csv"
save_profiles_to_csv(all_records, csv_filename, csv_header_fields)

Affiliate Lookup

(search by affiliated companies)

"I want to find all companies that are affiliated with Amazon."

import json

# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY

# Create a client, specifying an API key
client = PDLPY(
    api_key="YOUR API KEY",
)

# https://pdl-prod-schema.s3-us-west-2.amazonaws.com/14.0/enums/industry.txt
# for enumerated possible values of industry

# https://pdl-prod-schema.s3-us-west-2.amazonaws.com/14.0/enums/job_company_size.txt
# for enumerated possible values of company sizes

ES_QUERY = {
  "query": {
    "bool": {
      "must": [
        {"term": {"affiliated_profiles": "amazon"}}
      ]
    }
  }
}

P = {
    "query": ES_QUERY,
    "size": 100
}

response = client.company.search(**P).json()

if response["status"] == 200:
  
  data = response['data']
  
  with open("my_pdl_search.jsonl", "w") as out:
    for record in data:
      out.write(json.dumps(record) + "\n")
  
  print(f"successfully grabbed {len(data)} records from pdl")
  print(f"{response['total']} total pdl records exist matching this query")
else:
  print("NOTE. The eager beaver was not so eager. See error and try again.")
  print("error:", response)
import json

# See https://github.com/peopledatalabs/peopledatalabs-python
from peopledatalabs import PDLPY

# Create a client, specifying an API key
client = PDLPY(
    api_key="YOUR API KEY",
)

# https://pdl-prod-schema.s3-us-west-2.amazonaws.com/14.0/enums/industry.txt
# for enumerated possible values of industry

# https://pdl-prod-schema.s3-us-west-2.amazonaws.com/14.0/enums/job_company_size.txt
# for enumerated possible values of company sizes

SQL_QUERY = \
f"""
  SELECT * FROM company
  WHERE affiliated_profiles = 'amazon';
"""

P = {
  'sql': SQL_QUERY,
  'size': 100
}

response = client.company.search(**P).json()

if response["status"] == 200:
  
  data = response['data']
  
  with open("my_pdl_search.jsonl", "w") as out:
    for record in data:
      out.write(json.dumps(record) + "\n")
  
  print(f"successfully grabbed {len(data)} records from pdl")
  print(f"{response['total']} total pdl records exist matching this query")
else:
  print("NOTE. The eager beaver was not so eager. See error and try again.")
  print("error:", response)
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';

import fs from 'fs';

const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });

const esQuery = {
  query: {
    bool: {
      must:[
        {"term": {"affiliated_profiles": "amazon"}}
      ]
    }
  }
}

const params = {
  searchQuery: esQuery, 
  size: 100,
}

PDLJSClient.company.search.elastic(params).then((data) => {
    fs.writeFile("my_pdl_search.jsonl", Buffer.from(JSON.stringify(data.data)), (err) => {
        if (err) throw err;
    });
    console.log("successfully grabbed " + data.data.length + " records from pdl");
    console.log(data["total"] + " total pdl records exist matching this query")
}).catch((error) => {
    console.log("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
    console.log(error);
});
// See https://github.com/peopledatalabs/peopledatalabs-js
import PDLJS from 'peopledatalabs';

import fs from 'fs';

const PDLJSClient = new PDLJS({ apiKey: "YOUR API KEY" });

const sqlQuery = `SELECT * FROM company
                    WHERE affiliated_profiles = 'amazon';`;

var params = {
    searchQuery: sqlQuery, 
    size: 100
}

PDLJSClient.company.search.sql(params).then((data) => {
    fs.writeFile("my_pdl_search.jsonl", Buffer.from(JSON.stringify(data.data)), (err) => {
        if (err) throw err;
    });
    console.log("successfully grabbed " + data.data.length + " records from pdl");
    console.log(data["total"] + " total pdl records exist matching this query")
}).catch((error) => {
    console.log("NOTE. The carrier pigeons lost motivation in flight. See error and try again.")
    console.log(error);
});
require 'json'

# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'

Peopledatalabs.api_key = 'YOUR API KEY'

# https://pdl-prod-schema.s3-us-west-2.amazonaws.com/14.0/enums/industry.txt
# for enumerated possible values of industry

# https://pdl-prod-schema.s3-us-west-2.amazonaws.com/14.0/enums/job_company_size.txt
# for enumerated possible values of company sizes

ES_QUERY = {
  "query": {
    "bool": {
      "must": [
        {"term": {"affiliated_profiles": "amazon"}}
      ]
    }
  }
}

response = Peopledatalabs::Search.company(searchType: 'elastic', query: ES_QUERY, size: 100, pretty: true)

if response['status'] == 200
    data = response['data']
    File.open("my_pdl_search.jsonl", "w") do |out|
        data.each { |record| out.write(JSON.dump(record) + "\n") }
    end
    puts "successfully grabbed #{data.length()} records from pdl"
    puts "#{response['total']} total pdl records exist matching this query"
else
    puts "NOTE. The carrier pigeons lost motivation in flight. See error and try again."
    puts "Error: #{response}"
end
require 'json'

# See https://github.com/peopledatalabs/peopledatalabs-ruby
require 'peopledatalabs'

Peopledatalabs.api_key = 'YOUR API KEY'

# https://pdl-prod-schema.s3-us-west-2.amazonaws.com/14.0/enums/industry.txt
# for enumerated possible values of industry

# https://pdl-prod-schema.s3-us-west-2.amazonaws.com/14.0/enums/job_company_size.txt
# for enumerated possible values of company sizes

SQL_QUERY = \
"""
  SELECT * FROM company
  WHERE affiliated_profiles = 'amazon';
"""

response = Peopledatalabs::Search.company(searchType: 'sql', query: SQL_QUERY, size: 100, pretty: true)

if response['status'] == 200
    data = response['data']
    File.open("my_pdl_search.jsonl", "w") do |out|
        data.each { |record| out.write(JSON.dump(record) + "\n") }
    end
    puts "successfully grabbed #{data.length()} records from pdl"
    puts "#{response['total']} total pdl records exist matching this query"
else
    puts "NOTE. The carrier pigeons lost motivation in flight. See error and try again."
    puts "Error: #{response}"
end
package main

import(
    "fmt"
    "os"
    "encoding/json"
)

import (
    pdl "github.com/peopledatalabs/peopledatalabs-go"
    pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)

func main() {
    apiKey := "YOUR API KEY"
    // Set API KEY as env variable
    // apiKey := os.Getenv("API_KEY")

    client := pdl.New(apiKey)

    // https://pdl-prod-schema.s3-us-west-2.amazonaws.com/14.0/enums/industry.txt
    // for enumerated possible values of industry

    // https://pdl-prod-schema.s3-us-west-2.amazonaws.com/14.0/enums/job_company_size.txt
    // for enumerated possible values of company sizes

    elasticSearchQuery := map[string]interface{} {
        "query": map[string]interface{} {
            "bool": map[string]interface{} {
                "must": []map[string]interface{} {
                    {"term": map[string]interface{}{"affiliated_profiles": "amazon"}},
                },
            },
        },
    }

    params := pdlmodel.SearchParams {
        BaseParams: pdlmodel.BaseParams {
            Size: 100,
        },
        SearchBaseParams: pdlmodel.SearchBaseParams {
            Query: elasticSearchQuery,
        },
    }

    response, err := client.Company.Search(params)
    if err == nil {
        data := response.Data
        out, outErr := os.Create("my_pdl_search.jsonl")
        defer out.Close()
        if (outErr == nil) {
            for i := range data {
                record, jsonErr := json.Marshal(data[i])
                if (jsonErr == nil) {
                    out.WriteString(string(record) + "\n")
                }
            }
            out.Sync()
        }
        fmt.Printf("successfully grabbed %d records from pdl\n", len(data))
        fmt.Printf("%d total pdl records exist matching this query\n", response.Total)
    } else {
        fmt.Println("NOTE. The eager beaver was not so eager. See error and try again.")
        fmt.Println("Error:", err)
    } 
}
package main

import(
    "fmt"
    "os"
    "encoding/json"
)

import (
    pdl "github.com/peopledatalabs/peopledatalabs-go"
    pdlmodel "github.com/peopledatalabs/peopledatalabs-go/model"
)

func main() {
    apiKey := "YOUR API KEY"
    // Set API KEY as env variable
    // apiKey := os.Getenv("API_KEY")

    client := pdl.New(apiKey)

    // https://pdl-prod-schema.s3-us-west-2.amazonaws.com/14.0/enums/industry.txt
    // for enumerated possible values of industry

    // https://pdl-prod-schema.s3-us-west-2.amazonaws.com/14.0/enums/job_company_size.txt
    // for enumerated possible values of company sizes

    sqlQuery := "SELECT * FROM company" +
        " WHERE affiliated_profiles = 'amazon';"

    params := pdlmodel.SearchParams {
        BaseParams: pdlmodel.BaseParams {
            Size: 100,
            Pretty: true,
        },
        SearchBaseParams: pdlmodel.SearchBaseParams {
            SQL: sqlQuery,
        },
    }

    response, err := client.Company.Search(params)
    if err == nil {
        data := response.Data
        out, outErr := os.Create("my_pdl_search.jsonl")
        defer out.Close()
        if (outErr == nil) {
            for i := range data {
                record, jsonErr := json.Marshal(data[i])
                if (jsonErr == nil) {
                    out.WriteString(string(record) + "\n")
                }
            }
            out.Sync()
        }
        fmt.Printf("successfully grabbed %d records from pdl\n", len(data))
        fmt.Printf("%d total pdl records exist matching this query\n", response.Total)
    } else {
        fmt.Println("NOTE. The eager beaver was not so eager. See error and try again.")
        fmt.Println("Error:", err)
    } 
}
import requests, json
API_KEY = #YOUR API KEY

PDL_URL = "https://api.peopledatalabs.com/v5/company/search"

H = {
"Content-Type": "application/json",
"X-api-key": API_KEY
}

ES_QUERY = {
  "query": {
    "bool": {
      "must": [
        {"term": {"affiliated_profiles": "amazon"}}
      ]
    }
  }
}

P = {
    "query": json.dumps(ES_QUERY),
    "size": 100
}

response = requests.get(
    PDL_URL,
    headers=H,
    params=P
).json()

if response["status"] == 200:
  data = response['data']
  
  with open("my_pdl_search.jsonl", "w") as out:
    for record in data:
      out.write(json.dumps(record) + "\n")

  print(f"successfully grabbed {len(response['data'])} records from pdl")
  print(f"{response['total']} total pdl records exist matching this query")
else:
  print("NOTE. The eager beaver was not so eager. See error and try again.")
  print("error:", response)
import requests, json
API_KEY = #YOUR API KEY

PDL_URL = "https://api.peopledatalabs.com/v5/company/search"

H = {
"Content-Type": "application/json",
"X-api-key": API_KEY
}

SQL_QUERY = \
f"""
  SELECT * FROM company
  WHERE affiliated_profiles = 'amazon';
"""

P = {
  'sql': SQL_QUERY,
  'size': 100
}

response = requests.get(
  PDL_URL,
  headers=H,
  params=P
).json()

if response["status"] == 200:
  
  data = response['data']
  
  with open("my_pdl_search.jsonl", "w") as out:
    for record in data:
      out.write(json.dumps(record) + "\n")

  print(f"successfully grabbed {len(response['data'])} records from pdl")
  print(f"{response['total']} total pdl records exist matching this query")
else:
  print("NOTE. The eager beaver was not so eager. See error and try again.")
  print("error:", response)

Did this page help you?