Click here for the relevant post that explains what this is: Automated Popular English Fiction Genre Classification System
import openai
import os
import csv
import time
# Set up API key
openai.api_key = 'your_chatgpt_api_key'
# Directory containing text files and output CSV path
input_folder = r"C:\your_directory_address"
output_csv = r"C:\where_you_want_the_csv\gptapi_genre_analysis_results.csv"
# Token and rate limits
MAX_TOKENS_PER_FILE = 2000
BASE_RATE_LIMIT_DELAY = 4.0 # Base delay for rate limiting
MAX_RETRIES = 5 # Max retry attempts per file for rate limits
# Initialize CSV and write headers
with open(output_csv, mode='w', newline='', encoding='utf-8') as csv_file:
writer = csv.writer(csv_file)
writer.writerow(['File Name', 'Science Fiction Rating', 'SF Level (None/Mild/Moderate/High)', 'Plot Description', 'OCR Quality', 'Genre 1', 'Genre 2', 'Genre 3'])
# Function to analyze text using ChatCompletion with retry logic for server errors and rate limits
def analyze_text(file_name, content):
if len(content.split()) > MAX_TOKENS_PER_FILE:
content = " ".join(content.split()[:MAX_TOKENS_PER_FILE])
# Define messages for Chat API with clear labels
messages = [
{"role": "system", "content": "You are an expert in early 20th-century science fiction and popular fiction literature."},
{"role": "user", "content": (
f"Analyze the following text for science fiction elements and suggest genres based on 19th and early 20th-century fiction categories:\n\n{content}\n\n"
"Output the following information, with no extra commentary:\n"
"1. Science Fiction Rating: Rate from 0 to 10, based only on scientifically plausible or speculative elements grounded in science.\n"
"2. SF Level: Briefly state the level as 'none,' 'mild,' 'moderate,' or 'high,' indicating the degree of science fiction elements.\n"
"3. Plot Description: Describe the general plot in one to two sentences (30 words or fewer), focusing on genre-relevant elements only.\n"
"4. OCR Quality: Rate from A (clearly readable) to E (unreadable).\n"
"5. Genres: Based on the plot description and themes, identify up to three genres from the following list:\n"
" - Gothic, Sensation, Social Problems, Didactic, Adventure, Utopian, Dystopian, Colonial, Imperialist, Domestic, Spiritualist, War, New Woman, Allegorical, Historical, Horror, Supernatural, Crime, Detective, Invention, Planetary, Lost Race, Romance, Western, Travel, Prediction\n"
"If the story combines elements of multiple genres, provide them in separate fields, such as 'historical war' or 'prediction romance'."
)}
]
delay = BASE_RATE_LIMIT_DELAY
retries = 0
server_error_retries = 3 # Limit server error retries to 3 attempts
while retries < MAX_RETRIES:
try:
# Make API request
response = openai.ChatCompletion.create(
model="gpt-4o",
messages=messages,
max_tokens=100,
temperature=0.2
)
# Parse API response
result_text = response.choices[0].message['content'].strip()
lines = result_text.splitlines()
sci_fi_rating, sf_level, plot_description, ocr_quality = None, None, None, None
genre_1, genre_2, genre_3 = None, None, None
for line in lines:
if "Science Fiction Rating:" in line:
sci_fi_rating = line.split(":", 1)[1].strip()
elif "SF Level:" in line:
sf_level = line.split(":", 1)[1].strip()
elif "Plot Description:" in line:
plot_description = line.split(":", 1)[1].strip()
elif "OCR Quality:" in line:
ocr_quality = line.split(":", 1)[1].strip()
elif "Genres:" in line:
# Genres can be split into multiple columns
genres = line.split(":", 1)[1].strip().split(',')
genre_1 = genres[0].strip() if len(genres) > 0 else None
genre_2 = genres[1].strip() if len(genres) > 1 else None
genre_3 = genres[2].strip() if len(genres) > 2 else None
# Check that all fields are filled
if sci_fi_rating and sf_level and plot_description and ocr_quality:
return [os.path.basename(file_name), sci_fi_rating, sf_level, plot_description, ocr_quality, genre_1, genre_2, genre_3]
else:
print(f"Incomplete response for file {os.path.basename(file_name)}.")
return None
except openai.error.RateLimitError as e:
print(f"Rate limit reached for file {os.path.basename(file_name)}. Retrying in {delay} seconds...")
time.sleep(delay)
delay *= 2 # Exponential backoff for rate limits
retries += 1
except openai.error.APIError as e:
# Handle server errors with a separate retry mechanism
if "server_error" in str(e) and server_error_retries > 0:
print(f"Server error encountered for file {os.path.basename(file_name)}. Retrying in 60 seconds...")
time.sleep(60)
server_error_retries -= 1
else:
print(f"Server error for file {os.path.basename(file_name)} after retries. Moving to next file.")
return None
except Exception as e:
print(f"Error processing file {os.path.basename(file_name)}: {e}")
return None
print(f"Max retries reached for file {os.path.basename(file_name)}. Skipping.")
return None
# Process each file in the folder
for file_name in os.listdir(input_folder):
file_path = os.path.join(input_folder, file_name)
if os.path.isfile(file_path) and file_name.endswith('.txt'):
try:
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
if len(content.strip()) == 0:
print(f"File {os.path.basename(file_path)} is empty. Skipping.")
continue
# Analyze text and write result to CSV if valid
result = analyze_text(file_name, content)
if result:
with open(output_csv, mode='a', newline='', encoding='utf-8') as csv_file:
writer = csv.writer(csv_file)
writer.writerow(result)
# Delay to respect base rate limit before next file
time.sleep(BASE_RATE_LIMIT_DELAY)
except Exception as e:
print(f"Error reading file {os.path.basename(file_path)}: {e}")
print("Batch processing complete.")
Example of output:
File Name | Science Fiction Rating | SF Level (None/Mild/Moderate/High) | Plot Description | OCR Quality | Genre 1 | Genre 2 | Genre 3 |
---|---|---|---|---|---|---|---|
1901 A Dead Finger.txt | 2 | mild | A man encounters a mysterious, disembodied finger with a life of its own, causing him distress and confusion. | C | Horror | Supernatural | Sensation |
1901 Something to His Advantage.txt | 0 | none | A mysterious man confronts another about a murder, leading to revelations about identity and past actions. | B | Crime | Detective | Sensation |
1901 The Prodigal of Glencourt A Romance of Maoriland.txt | 0 | none | A man named Sydney Black, recovering from illness, navigates a criminal underworld in New Zealand with Maori allies and outlaws. | B | Colonial | Adventure | Crime |
1901 The Cankerworm.txt | 0 | none | A young woman, Linda, faces personal turmoil and societal pressures as she seeks her missing husband, with the help of a devoted friend. | C | Domestic | Social Problems | Romance |
1901 Old House in Cripplegate.txt | 0 | none | A young man faces familial pressure to abandon his love interest due to his uncle’s unresolved past grievances. | C | Domestic | Historical | Romance |
1901 A Lesson in Love A Complete Story.txt | 0 | none | A man contemplates marriage as a career necessity while navigating social expectations and romantic interests. | C | Domestic | Romance | Social Problems |
1901 An Old Mans Darling.txt | 0 | none | A young girl encounters a fortune-teller who predicts a future filled with love, hate, and sorrow, leading to introspection. | C | Romance | Supernatural | Gothic |
1901 Salomy Janes Kiss.txt | 0 | none | A vigilante group captures two horse thieves, but one escapes after a surprising kiss from a local girl. | C | Western | Adventure | Crime |
1901 The Hearts Mistake.txt | 0 | none | Sylvia becomes a companion to a widow, Mrs. Seymour, as they travel and navigate social expectations and personal ambitions. | B | Domestic | Social Problems | Travel |
1901 Lady Margots Leap.txt | 0 | none | A young woman, Lilian, under the watchful eye of her aunt, secretly meets a charming man with a mysterious past. | B | Gothic | Domestic | Romance |
1901 The Landlord of the Big Flume Hotel.txt | 0 | none | A divorced couple reunites at a hotel, discussing their past and potential future relationships. | C | Domestic | Western | |
1901 Nikolas Farewell.txt | 2 | mild | A group of friends in Venice encounter the mysterious Nikola, whose presence evokes fear and curiosity due to his past experiments and enigmatic nature. | B | Adventure | Supernatural | Gothic |
If you wish to use this code in your research, you’re most welcome. Please leave a comment. If you use it in your paper, please cite it. MLA example here:
Hogan, Neil. “Automated Popular English Fiction Genre Classification System for 19th and Early 20th Century Australian Newspaper Fiction Stories.” Retrieving “Science in Fiction” from Early 20th Century Australian Newspapers, Neil Hogan, 2 Nov. 2024, neilhogan.com/automated-popular-english-fiction-genre-classification-system-for-19th-and-early-20th-century-australian-newspaper-fiction-stories/