To help models understand PDF content, we put into the model’s context both the extracted text and an image of each page. The model can then use both the text and the images to generate a response. This is useful, for example, if diagrams contain key information that isn’t in the text.
import osimport base64import requestsfrom openai import OpenAI# Create a file in OpenAIopenai_client = OpenAI()pdf_url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" # This is a dummy PDF file, you can use any PDF file you wantresponse = requests.get(pdf_url)file_data = response.contentfile = openai_client.files.create(file=file_data, purpose="user_data")# Create a client for KeywordsAI proxyclient = OpenAI( base_url="https://api.keywordsai.co/api", api_key=os.getenv("KEYWORDSAI_API_KEY_TEST"),)model = "gpt-4.1"file_content = [ {"type": "text", "text": "What's this file about?"}, { "type": "file", "file": { "file_id": file.id, }, }]response = client.chat.completions.create( model=model, messages=[ { "role": "user", "content": file_content, } ],)assert response is not None