scripts/patent_search.py

#!/usr/bin/env python3
"""
USPTO PatentSearch API Helper

Provides functions for searching and retrieving patent data using the USPTO
PatentSearch API (ElasticSearch-based system, replaced legacy PatentsView in May 2025).

Requires:
    - requests library: pip install requests
    - USPTO API key from https://account.uspto.gov/api-manager/

Environment variables:
    USPTO_API_KEY - Your USPTO API key
"""

import os
import sys
import json
import requests
from typing import Dict, List, Optional, Any
from datetime import datetime


class PatentSearchClient:
    """Client for USPTO PatentSearch API."""

    BASE_URL = "https://search.patentsview.org/api/v1"

    def __init__(self, api_key: Optional[str] = None):
        """
        Initialize client with API key.

        Args:
            api_key: USPTO API key (if not provided, uses USPTO_API_KEY env var)
        """
        self.api_key = api_key or os.getenv("USPTO_API_KEY")
        if not self.api_key:
            raise ValueError("API key required. Set USPTO_API_KEY environment variable or pass to constructor.")

        self.headers = {
            "X-Api-Key": self.api_key,
            "Content-Type": "application/json"
        }

    def _request(self, endpoint: str, query: Dict, fields: Optional[List[str]] = None,
                 sort: Optional[List[Dict]] = None, options: Optional[Dict] = None) -> Dict:
        """
        Make a request to the PatentSearch API.

        Args:
            endpoint: API endpoint (e.g., "patent", "inventor")
            query: Query dictionary
            fields: List of fields to return
            sort: Sort specification
            options: Pagination and other options

        Returns:
            API response as dictionary
        """
        url = f"{self.BASE_URL}/{endpoint}"

        data = {"q": query}
        if fields:
            data["f"] = fields
        if sort:
            data["s"] = sort
        if options:
            data["o"] = options

        response = requests.post(url, headers=self.headers, json=data)
        response.raise_for_status()

        return response.json()

    def search_patents(self, query: Dict, fields: Optional[List[str]] = None,
                       sort: Optional[List[Dict]] = None, page: int = 1,
                       per_page: int = 100) -> Dict:
        """
        Search for patents.

        Args:
            query: Query dictionary (see PatentSearch API docs for syntax)
            fields: Fields to return (defaults to essential fields)
            sort: Sort specification
            page: Page number
            per_page: Results per page (max 1000)

        Returns:
            Search results with patents array

        Example:
            # Search by keyword
            results = client.search_patents({
                "patent_abstract": {"_text_all": ["machine", "learning"]}
            })

            # Search by date range
            results = client.search_patents({
                "patent_date": {"_gte": "2024-01-01", "_lte": "2024-12-31"}
            })
        """
        if fields is None:
            fields = [
                "patent_number", "patent_title", "patent_date",
                "patent_abstract", "assignee_organization",
                "inventor_name", "cpc_subclass_id"
            ]

        if sort is None:
            sort = [{"patent_date": "desc"}]

        options = {"page": page, "per_page": min(per_page, 1000)}

        return self._request("patent", query, fields, sort, options)

    def get_patent(self, patent_number: str) -> Optional[Dict]:
        """
        Get details for a specific patent by number.

        Args:
            patent_number: Patent number (with or without commas)

        Returns:
            Patent data dictionary or None if not found
        """
        # Remove commas from patent number
        patent_number = patent_number.replace(",", "")

        query = {"patent_number": patent_number}
        fields = [
            "patent_number", "patent_title", "patent_date", "patent_abstract",
            "patent_type", "inventor_name", "assignee_organization",
            "cpc_subclass_id", "cited_patent_number", "citedby_patent_number"
        ]

        result = self._request("patent", query, fields)

        if result.get("patents"):
            return result["patents"][0]
        return None

    def search_by_inventor(self, inventor_name: str, **kwargs) -> Dict:
        """
        Search patents by inventor name.

        Args:
            inventor_name: Inventor name (use _text_phrase for exact match)
            **kwargs: Additional search parameters

        Returns:
            Search results
        """
        query = {"inventor_name": {"_text_phrase": inventor_name}}
        return self.search_patents(query, **kwargs)

    def search_by_assignee(self, assignee_name: str, **kwargs) -> Dict:
        """
        Search patents by assignee/company name.

        Args:
            assignee_name: Assignee/company name
            **kwargs: Additional search parameters

        Returns:
            Search results
        """
        query = {"assignee_organization": {"_text_any": assignee_name.split()}}
        return self.search_patents(query, **kwargs)

    def search_by_classification(self, cpc_code: str, **kwargs) -> Dict:
        """
        Search patents by CPC classification code.

        Args:
            cpc_code: CPC subclass code (e.g., "H04N", "G06F")
            **kwargs: Additional search parameters

        Returns:
            Search results
        """
        query = {"cpc_subclass_id": cpc_code}
        return self.search_patents(query, **kwargs)

    def search_by_date_range(self, start_date: str, end_date: str, **kwargs) -> Dict:
        """
        Search patents by date range.

        Args:
            start_date: Start date (YYYY-MM-DD)
            end_date: End date (YYYY-MM-DD)
            **kwargs: Additional search parameters

        Returns:
            Search results
        """
        query = {
            "patent_date": {
                "_gte": start_date,
                "_lte": end_date
            }
        }
        return self.search_patents(query, **kwargs)

    def advanced_search(self, keywords: List[str], assignee: Optional[str] = None,
                        start_date: Optional[str] = None, end_date: Optional[str] = None,
                        cpc_codes: Optional[List[str]] = None, **kwargs) -> Dict:
        """
        Perform advanced search with multiple criteria.

        Args:
            keywords: List of keywords to search in abstract/title
            assignee: Assignee/company name
            start_date: Start date (YYYY-MM-DD)
            end_date: End date (YYYY-MM-DD)
            cpc_codes: List of CPC classification codes
            **kwargs: Additional search parameters

        Returns:
            Search results
        """
        conditions = []

        # Keyword search in abstract
        if keywords:
            conditions.append({
                "patent_abstract": {"_text_all": keywords}
            })

        # Assignee filter
        if assignee:
            conditions.append({
                "assignee_organization": {"_text_any": assignee.split()}
            })

        # Date range
        if start_date and end_date:
            conditions.append({
                "patent_date": {"_gte": start_date, "_lte": end_date}
            })

        # CPC classification
        if cpc_codes:
            conditions.append({
                "cpc_subclass_id": cpc_codes
            })

        query = {"_and": conditions} if len(conditions) > 1 else conditions[0]

        return self.search_patents(query, **kwargs)


def main():
    """Command-line interface for patent search."""
    if len(sys.argv) < 2:
        print("Usage:")
        print("  python patent_search.py <patent_number>")
        print("  python patent_search.py --inventor <name>")
        print("  python patent_search.py --assignee <company>")
        print("  python patent_search.py --keywords <word1> <word2> ...")
        sys.exit(1)

    client = PatentSearchClient()

    try:
        if sys.argv[1] == "--inventor":
            results = client.search_by_inventor(" ".join(sys.argv[2:]))
        elif sys.argv[1] == "--assignee":
            results = client.search_by_assignee(" ".join(sys.argv[2:]))
        elif sys.argv[1] == "--keywords":
            query = {"patent_abstract": {"_text_all": sys.argv[2:]}}
            results = client.search_patents(query)
        else:
            # Assume patent number
            patent = client.get_patent(sys.argv[1])
            if patent:
                results = {"patents": [patent], "count": 1, "total_hits": 1}
            else:
                print(f"Patent {sys.argv[1]} not found")
                sys.exit(1)

        # Print results
        print(json.dumps(results, indent=2))

    except Exception as e:
        print(f"Error: {e}", file=sys.stderr)
        sys.exit(1)


if __name__ == "__main__":
    main()
← Back to uspto-database