mirror of
https://github.com/dontriskit/awesome-ai-system-prompts
synced 2026-04-09 10:31:46 +02:00
BlackboxAI
This commit is contained in:
1628
Blackbox.ai/Blackbox-Agent.md
Normal file
1628
Blackbox.ai/Blackbox-Agent.md
Normal file
File diff suppressed because it is too large
Load Diff
144
Blackbox.ai/Blackbox-Complete.ts
Normal file
144
Blackbox.ai/Blackbox-Complete.ts
Normal file
@@ -0,0 +1,144 @@
|
||||
/**
|
||||
* Blackbox Extension Prompt Template (VS Code) - Condensed
|
||||
*/
|
||||
|
||||
// --- Common Context Interface ---
|
||||
interface VscodeEditorContext {
|
||||
selection?: string;
|
||||
fullCode: string;
|
||||
languageId: string;
|
||||
prefix: string; // Code before cursor
|
||||
suffix?: string; // Code after cursor
|
||||
neighboringCode?: { above: string; below: string };
|
||||
gitDiff?: string;
|
||||
multipleFileContents?: { filePath: string; content: string }[];
|
||||
chatHistory?: { user?: string; blackbox?: string }[];
|
||||
}
|
||||
|
||||
// ==================================
|
||||
// 1. Inline Code Editing/Generation (Ctrl+I)
|
||||
// ==================================
|
||||
|
||||
const INLINE_EDIT_SYSTEM_PROMPT = `You are a coding assistant specializing in code completion and editing. Your task is to modify the selected code based on the prompt, considering the entire code file for context. Follow these guidelines:
|
||||
- Generate the modified code that should replace the selected portion.
|
||||
- Return ONLY the modified code snippet, without any markdown formatting, natural language explanations, or triple backticks.
|
||||
- Ensure the modified code integrates seamlessly with the rest of the file.
|
||||
- Maintain consistent style, indentation, and naming conventions with the existing code.
|
||||
- Strictly answer with code only`;
|
||||
|
||||
function createInlineEditUserPrompt(prompt: string, context: VscodeEditorContext): string {
|
||||
return `## Selected Code
|
||||
[START SELECTION]
|
||||
${context.selection || ""}
|
||||
[END SELECTION]
|
||||
|
||||
## Entire Code File
|
||||
[START ENTIRE FILE]
|
||||
${context.fullCode}
|
||||
[END FILE]
|
||||
|
||||
Generate the modified code that should replace the selected portion. If there is no selection, generate code that should be inserted at the cursor position. Strictly answer with code only:
|
||||
Prompt: ${prompt}`;
|
||||
}
|
||||
|
||||
/*
|
||||
Conceptual API Call Structure:
|
||||
[
|
||||
{ role: "system", content: INLINE_EDIT_SYSTEM_PROMPT },
|
||||
{ role: "user", content: createInlineEditUserPrompt(userInstruction, context) }
|
||||
]
|
||||
*/
|
||||
|
||||
// ============================
|
||||
// 2. Code Completion (Typing Pause)
|
||||
// ============================
|
||||
// Note: Actual prompt structure is internal to the Blackbox API.
|
||||
|
||||
function createCodeCompletionInput(context: VscodeEditorContext, userId: string, premiumStatus: boolean, autocompleteVersion: 'quality' | 'speed'): any {
|
||||
return {
|
||||
userId: userId,
|
||||
languageId: context.languageId,
|
||||
prompt: context.prefix,
|
||||
contextAbove: context.neighboringCode?.above,
|
||||
contextBelow: context.neighboringCode?.below,
|
||||
source: "visual studio",
|
||||
premiumStatus: premiumStatus,
|
||||
autocompleteVersion: autocompleteVersion,
|
||||
};
|
||||
}
|
||||
|
||||
// ============================
|
||||
// 3. Code Search (// ? Query)
|
||||
// ============================
|
||||
// Note: Actual prompt structure is internal to the Blackbox API.
|
||||
|
||||
function createCodeSearchInput(query: string, userId: string): any {
|
||||
return {
|
||||
userId: userId,
|
||||
textInput: query,
|
||||
source: "visual studio",
|
||||
};
|
||||
}
|
||||
|
||||
// ============================
|
||||
// 4. Blackbox AI Chat (Side Panel / Commands)
|
||||
// ============================
|
||||
// Note: Uses a webview; prompts are handled by the webview's backend.
|
||||
// Context is passed from the extension to the webview.
|
||||
|
||||
interface ChatMessage { user?: string; blackbox?: string; }
|
||||
interface ChatPromptInput { // Structure passed *to* webview or used by its backend
|
||||
userMessage: string;
|
||||
context?: VscodeEditorContext;
|
||||
chatHistory: ChatMessage[];
|
||||
commandTrigger?: string; // e.g., 'explain_code', 'comment_code'
|
||||
workspaceId?: string;
|
||||
}
|
||||
|
||||
// --- Example User Prompts Sent to Chat ---
|
||||
const explainCodePrompt = (code: string, languageId: string) => `\`\`\`${languageId}\n${code}\n\`\`\`\n\nExplain this code`;
|
||||
const improveCodePrompt = (code: string, languageId: string) => `\`\`\`${languageId}\n${code}\n\`\`\`\n\nRewrite this code better`;
|
||||
const suggestCodePrompt = (codeAbove: string, languageId: string) => `\`\`\`${languageId}\n${codeAbove}\n\`\`\`\n\ngive 1 suggestion to continue this code. give code only.`;
|
||||
const commentCodeInstruction = `give me this code with proper commenting. comments should clear consice. stay focused, this is very important for my career.`; // Code provided as context
|
||||
|
||||
// ==================================
|
||||
// 5. Commit Message Generation (SCM Integration)
|
||||
// ==================================
|
||||
|
||||
function createCommitMessageInput(context: VscodeEditorContext, userId: string): any {
|
||||
return {
|
||||
userId: userId,
|
||||
diff: context.gitDiff,
|
||||
source: "visual studio" // or 'source control'
|
||||
};
|
||||
}
|
||||
|
||||
// ============================
|
||||
// 6. README Generation (Command)
|
||||
// ============================
|
||||
|
||||
function createReadmeInput(context: VscodeEditorContext, userId: string): any {
|
||||
const allFilesString = context.multipleFileContents
|
||||
?.map(file => `File: ${file.filePath}\n\n${file.content}`)
|
||||
.join('\n\n---\n\n');
|
||||
|
||||
return {
|
||||
userId: userId,
|
||||
allFiles: allFilesString,
|
||||
};
|
||||
}
|
||||
|
||||
// ============================
|
||||
// 7. Code Review / Editor Chat (Older Command)
|
||||
// ============================
|
||||
|
||||
function createEditorChatInput(context: VscodeEditorContext): any {
|
||||
let userContentWithLine = "";
|
||||
context.fullCode.split("\n").forEach((line, index) => {
|
||||
userContentWithLine += `${index + 1}: ${line}\n`;
|
||||
});
|
||||
return {
|
||||
language: context.languageId,
|
||||
code: userContentWithLine
|
||||
};
|
||||
}
|
||||
1
Blackbox.ai/README.md
Normal file
1
Blackbox.ai/README.md
Normal file
@@ -0,0 +1 @@
|
||||
extractec from ~/.vscode/extensions/blackboxapp.blackboxagent-3.1.36/dist using extraction.py
|
||||
76
Blackbox.ai/extraction-scripts/v0.py
Normal file
76
Blackbox.ai/extraction-scripts/v0.py
Normal file
@@ -0,0 +1,76 @@
|
||||
import re
|
||||
import os
|
||||
|
||||
def extract_prompt_templates(filepath):
|
||||
"""
|
||||
Extracts potential prompt templates (primarily multi-line template literals)
|
||||
from a JavaScript/TypeScript file.
|
||||
|
||||
Args:
|
||||
filepath (str): The path to the .js or .ts file.
|
||||
|
||||
Returns:
|
||||
list: A list of potential prompt template strings.
|
||||
"""
|
||||
if not os.path.exists(filepath):
|
||||
print(f"Error: File not found at {filepath}")
|
||||
return []
|
||||
|
||||
try:
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
except Exception as e:
|
||||
print(f"Error reading file {filepath}: {e}")
|
||||
return []
|
||||
|
||||
# Regex to find template literals (strings enclosed in backticks `` ` ``)
|
||||
# It handles escaped backticks (\\`) and embedded expressions (${...}) within the literal.
|
||||
# It tries its best but might need refinement based on complex nested cases.
|
||||
# Using re.DOTALL so '.' matches newline characters as well.
|
||||
prompt_template_regex = r'`((?:\\`|[^`])*)`' # Simplified but effective for most cases
|
||||
|
||||
# More robust regex handling potential nested structures (might be slower)
|
||||
# prompt_template_regex = r'`(?:[^`\\]*(?:\\.[^`\\]*)*)*`'
|
||||
|
||||
# Alternative focusing on structure (less likely if minified)
|
||||
# assignment_regex = r'(?:const|let|var)\s+([\w\$]+)\s*=\s*(`(?:\\`|[^`])*`);'
|
||||
|
||||
found_templates = []
|
||||
|
||||
matches = re.findall(prompt_template_regex, content, re.DOTALL)
|
||||
|
||||
print(f"Found {len(matches)} potential template literals.")
|
||||
|
||||
for match_content in matches:
|
||||
# The regex group captures the content *inside* the backticks
|
||||
template = match_content.strip()
|
||||
|
||||
# Basic filtering: Keep templates that are multi-line, contain XML-like tags,
|
||||
# or are reasonably long, as these are more likely to be actual prompts.
|
||||
if '\n' in template or ('<' in template and '>' in template) or len(template) > 100:
|
||||
# Optional: Remove common JS/TS code patterns if they are mistakenly captured
|
||||
# (e.g., if a template literal *only* contains CSS or HTML)
|
||||
# This requires more sophisticated filtering. For now, we keep most long/complex ones.
|
||||
found_templates.append(template)
|
||||
|
||||
return found_templates
|
||||
|
||||
# --- Main Execution ---
|
||||
if __name__ == "__main__":
|
||||
# IMPORTANT: Replace this with the actual path to your extension.js file
|
||||
file_to_analyze = "extension.js"
|
||||
# Or provide the full path:
|
||||
# file_to_analyze = "/path/to/your/project/extension.js"
|
||||
|
||||
print(f"Analyzing file: {file_to_analyze}")
|
||||
|
||||
templates = extract_prompt_templates(file_to_analyze)
|
||||
|
||||
if templates:
|
||||
print(f"\n--- Extracted {len(templates)} Potential Prompt Templates ---")
|
||||
for i, template in enumerate(templates):
|
||||
print(f"\n--- Template {i+1} ---")
|
||||
print(template)
|
||||
print("--------------------")
|
||||
else:
|
||||
print("\nNo likely prompt templates (long/multi-line/tagged template literals) found.")
|
||||
77
Blackbox.ai/extraction-scripts/v1a.py
Normal file
77
Blackbox.ai/extraction-scripts/v1a.py
Normal file
@@ -0,0 +1,77 @@
|
||||
import re
|
||||
import os
|
||||
|
||||
def extract_prompt_templates(filepath, output_filepath="extracted_prompts.txt"):
|
||||
"""
|
||||
Extracts potential prompt templates (primarily multi-line template literals)
|
||||
from a JavaScript/TypeScript file and saves them to an output file.
|
||||
|
||||
Args:
|
||||
filepath (str): The path to the .js or .ts file.
|
||||
output_filepath (str): The path where the extracted templates will be saved.
|
||||
|
||||
Returns:
|
||||
int: The number of potential templates saved to the file, or -1 on error.
|
||||
"""
|
||||
if not os.path.exists(filepath):
|
||||
print(f"Error: Input file not found at {filepath}")
|
||||
return -1
|
||||
|
||||
try:
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
except Exception as e:
|
||||
print(f"Error reading input file {filepath}: {e}")
|
||||
return -1
|
||||
|
||||
# Regex to find template literals (strings enclosed in backticks `` ` ``)
|
||||
# Handles escaped backticks (\\`) and embedded expressions (${...})
|
||||
prompt_template_regex = r'`((?:\\`|[^`])*)`'
|
||||
|
||||
found_templates = []
|
||||
templates_saved_count = 0
|
||||
|
||||
try:
|
||||
matches = re.findall(prompt_template_regex, content, re.DOTALL)
|
||||
print(f"Found {len(matches)} potential template literals in the source.")
|
||||
|
||||
with open(output_filepath, 'w', encoding='utf-8') as outfile:
|
||||
outfile.write(f"--- Extracted Potential Prompt Templates from: {filepath} ---\n\n")
|
||||
|
||||
for i, match_content in enumerate(matches):
|
||||
template = match_content.strip()
|
||||
|
||||
# Basic filtering (multi-line, contains tags, or reasonably long)
|
||||
if '\n' in template or ('<' in template and '>' in template) or len(template) > 100:
|
||||
outfile.write(f"--- Template {templates_saved_count + 1} ---\n")
|
||||
outfile.write(template)
|
||||
outfile.write("\n\n--------------------\n\n")
|
||||
templates_saved_count += 1
|
||||
|
||||
print(f"Successfully saved {templates_saved_count} potential templates to: {output_filepath}")
|
||||
return templates_saved_count
|
||||
|
||||
except Exception as e:
|
||||
print(f"An error occurred during extraction or writing to file: {e}")
|
||||
return -1
|
||||
|
||||
# --- Main Execution ---
|
||||
if __name__ == "__main__":
|
||||
# IMPORTANT: Replace this with the actual path to your extension.js file
|
||||
file_to_analyze = "extension.js"
|
||||
# Or provide the full path:
|
||||
# file_to_analyze = "/path/to/your/project/extension.js"
|
||||
|
||||
# Define the output file name
|
||||
output_file = "extracted_prompts.txt"
|
||||
|
||||
print(f"Analyzing file: {file_to_analyze}")
|
||||
|
||||
count = extract_prompt_templates(file_to_analyze, output_file)
|
||||
|
||||
if count > 0:
|
||||
print(f"Extraction complete. Check the file '{output_file}' for results.")
|
||||
elif count == 0:
|
||||
print(f"\nNo likely prompt templates (long/multi-line/tagged template literals) found or saved to '{output_file}'.")
|
||||
else:
|
||||
print("Extraction failed due to an error.")
|
||||
136
Blackbox.ai/extraction-scripts/v1b.py
Normal file
136
Blackbox.ai/extraction-scripts/v1b.py
Normal file
@@ -0,0 +1,136 @@
|
||||
import re
|
||||
import os
|
||||
|
||||
def extract_prompt_templates(filepath, output_filepath="extracted_prompts.txt", min_length=200):
|
||||
"""
|
||||
Extracts potential prompt templates from a JS/TS file, attempting to filter
|
||||
out non-prompt template literals (like HTML/CSS/JS code snippets).
|
||||
|
||||
Args:
|
||||
filepath (str): Path to the .js or .ts file.
|
||||
output_filepath (str): Path to save the extracted templates.
|
||||
min_length (int): Minimum character length for a template to be considered.
|
||||
|
||||
Returns:
|
||||
int: Number of potential templates saved, or -1 on error.
|
||||
"""
|
||||
if not os.path.exists(filepath):
|
||||
print(f"Error: Input file not found at {filepath}")
|
||||
return -1
|
||||
|
||||
try:
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
except Exception as e:
|
||||
print(f"Error reading input file {filepath}: {e}")
|
||||
return -1
|
||||
|
||||
# Regex for template literals
|
||||
template_literal_regex = r'`((?:\\`|[^`])*)`'
|
||||
|
||||
# Keywords strongly suggesting a prompt template
|
||||
prompt_keywords = [
|
||||
'You are BLACKBOXAI', 'TOOL USE', 'RULES', 'Parameters:', 'Usage:',
|
||||
'SYSTEM INFORMATION', 'OBJECTIVE', 'CAPABILITIES', 'MCP SERVERS',
|
||||
'current working directory', 'execute_command', 'read_file',
|
||||
'create_file', 'edit_file', 'replace_in_file', 'browser_action',
|
||||
'ask_followup_question', 'attempt_completion', 'search_code',
|
||||
'search_files', 'list_files', 'tool_name', 'parameter1_name',
|
||||
'brainstorm_plan'
|
||||
# Add more specific keywords if needed
|
||||
]
|
||||
# Convert to lowercase for case-insensitive matching
|
||||
prompt_keywords_lower = {kw.lower() for kw in prompt_keywords}
|
||||
|
||||
# Keywords/patterns strongly suggesting it's *not* a prompt (HTML/CSS/JS boilerplate)
|
||||
noise_keywords = [
|
||||
'<!DOCTYPE html>', '<html lang=', '<head>', '<body>', '<script', '<style',
|
||||
'function(', '=> {', 'class extends', 'export class', 'import {', 'require(',
|
||||
'window.addEventListener', 'document.querySelector', '.CodeMirror',
|
||||
'acquireVsCodeApi', 'const vscode =', 'module.exports', 'props', 'state',
|
||||
'React.', 'Vue.', 'angular.', 'getElementById', 'createElement',
|
||||
'padding:', 'margin:', 'color:', 'background-color:', 'font-size:',
|
||||
'display: flex', 'position: absolute', 'z-index:', 'border-radius:',
|
||||
'webpack', 'eslint', 'JSON.stringify', 'JSON.parse', 'console.log',
|
||||
'# sourceMappingURL=' # Common in minified JS
|
||||
# Add more specific noise patterns if needed
|
||||
]
|
||||
noise_keywords_lower = {kw.lower() for kw in noise_keywords}
|
||||
|
||||
# Regex to find XML-like tool tags, e.g., <tool_name>
|
||||
tool_tag_regex = re.compile(r'<\w+(_\w+)*>')
|
||||
|
||||
templates_saved_count = 0
|
||||
total_literals_found = 0
|
||||
|
||||
try:
|
||||
matches = re.findall(template_literal_regex, content, re.DOTALL)
|
||||
total_literals_found = len(matches)
|
||||
print(f"Found {total_literals_found} total template literals in the source.")
|
||||
|
||||
with open(output_filepath, 'w', encoding='utf-8') as outfile:
|
||||
outfile.write(f"--- Extracted Potential Prompt Templates from: {filepath} ---\n")
|
||||
outfile.write(f"--- (Filtered from {total_literals_found} total template literals found) ---\n\n")
|
||||
|
||||
for i, match_content in enumerate(matches):
|
||||
template = match_content.strip()
|
||||
template_lower = template.lower()
|
||||
is_potential_prompt = False
|
||||
|
||||
# --- Filtering Logic ---
|
||||
if len(template) < min_length:
|
||||
continue # Too short
|
||||
|
||||
# Check for strong positive indicators
|
||||
has_prompt_keyword = any(kw in template_lower for kw in prompt_keywords_lower)
|
||||
has_tool_tag = bool(tool_tag_regex.search(template))
|
||||
|
||||
# Check for strong negative indicators
|
||||
has_noise_keyword = any(kw in template_lower for kw in noise_keywords_lower)
|
||||
|
||||
# More specific noise check (e.g., looks like pure HTML)
|
||||
is_likely_html_css = template_lower.startswith(('<!doctype', '<html', '<style', 'body {', 'div {', '.','#')) and not has_prompt_keyword
|
||||
|
||||
# --- Decision ---
|
||||
# Keep if it has prompt keywords or tool tags, AND is not clearly noise
|
||||
if (has_prompt_keyword or has_tool_tag) and not has_noise_keyword and not is_likely_html_css:
|
||||
is_potential_prompt = True
|
||||
# Keep if it's very long and doesn't have strong noise indicators (might catch prompts without keywords)
|
||||
elif len(template) > 1000 and not has_noise_keyword and not is_likely_html_css:
|
||||
is_potential_prompt = True
|
||||
|
||||
|
||||
if is_potential_prompt:
|
||||
templates_saved_count += 1
|
||||
outfile.write(f"--- Template {templates_saved_count} (Original Index: {i+1}) ---\n")
|
||||
outfile.write(template)
|
||||
outfile.write("\n\n--------------------\n\n")
|
||||
# --- End Filtering Logic ---
|
||||
|
||||
print(f"Successfully saved {templates_saved_count} potential templates to: {output_filepath}")
|
||||
return templates_saved_count
|
||||
|
||||
except Exception as e:
|
||||
print(f"An error occurred during extraction or writing to file: {e}")
|
||||
return -1
|
||||
|
||||
# --- Main Execution ---
|
||||
if __name__ == "__main__":
|
||||
# IMPORTANT: Replace this with the actual path to your extension.js file
|
||||
file_to_analyze = "extension.js"
|
||||
# Or provide the full path:
|
||||
# file_to_analyze = "/path/to/your/project/extension.js"
|
||||
|
||||
output_file = "extracted_prompts_filtered.txt" # Changed output name
|
||||
|
||||
print(f"Analyzing file: {file_to_analyze}")
|
||||
|
||||
count = extract_prompt_templates(file_to_analyze, output_file)
|
||||
|
||||
if count > 0:
|
||||
print(f"Extraction complete. Check the file '{output_file}' for results.")
|
||||
elif count == 0:
|
||||
print(f"\nNo likely prompt templates matching the criteria found or saved to '{output_file}'.")
|
||||
print("Consider adjusting filtering keywords or min_length if prompts are missed.")
|
||||
else:
|
||||
print("Extraction failed due to an error.")
|
||||
154
Blackbox.ai/extraction-scripts/v2.py
Normal file
154
Blackbox.ai/extraction-scripts/v2.py
Normal file
@@ -0,0 +1,154 @@
|
||||
import re
|
||||
import os
|
||||
|
||||
def is_likely_code_or_markup(text, text_lower):
|
||||
"""
|
||||
Heuristically checks if a string is more likely code, HTML, or CSS
|
||||
than a natural language prompt.
|
||||
"""
|
||||
# 1. Check for common code keywords/patterns (increase sensitivity)
|
||||
code_keywords = [
|
||||
'function(', '=> {', 'class ', 'constructor(', ' Symbol(', '.prototype',
|
||||
'addEventListener', 'querySelector', 'getElementById', 'createElement',
|
||||
'Object.assign', 'Object.defineProperty', 'Promise.resolve', 'Promise.reject',
|
||||
'async (', 'await ', 'require(', 'import {', 'export default', 'module.exports',
|
||||
'console.log', 'console.error', 'try {', '} catch (', ' for (', ' while (',
|
||||
'arguments.length', 'this.', '.call(null', '.bind(this', '.map(', '.filter(', '.reduce(',
|
||||
'.forEach(', '.test(', '.exec(', '.match(', '.replace(', '.split(', '.join(',
|
||||
'JSON.stringify', 'JSON.parse', 'new Error(', 'throw new ', '# sourceMappingURL=',
|
||||
'static {' # Added from example
|
||||
]
|
||||
if any(kw in text_lower for kw in code_keywords):
|
||||
# Check ratio if a keyword is found, maybe it's just mentioned in a prompt
|
||||
code_symbols = len(re.findall(r'[{}()\[\];=.,+\-*/&|!<>?:%]', text))
|
||||
words = len(re.findall(r'\b\w+\b', text))
|
||||
if words == 0 or code_symbols / (code_symbols + words) > 0.25: # High ratio of symbols
|
||||
return True
|
||||
# Low ratio might still be a prompt mentioning a keyword
|
||||
|
||||
# 2. Check for common HTML/CSS patterns
|
||||
html_css_keywords = [
|
||||
'<!DOCTYPE html>', '<html', '<head>', '<body', '<script', '<style',
|
||||
'padding:', 'margin:', 'color:', 'background-color:', 'font-size:',
|
||||
'display: flex', 'position: absolute', 'z-index:', 'border-radius:',
|
||||
'.CodeMirror', 'w-button', 'w-form' # From examples
|
||||
]
|
||||
if any(kw in text_lower for kw in html_css_keywords):
|
||||
return True # Pretty likely not a prompt
|
||||
|
||||
# Check for high density of HTML tags
|
||||
html_tags = len(re.findall(r'<[/!]?\s*\w+', text))
|
||||
if html_tags > 5 and html_tags / len(text.split()) > 0.1: # More than 1 tag per 10 words
|
||||
return True
|
||||
|
||||
# Check for high density of CSS rules
|
||||
css_rules = len(re.findall(r'[{};:]', text))
|
||||
if css_rules > 10 and css_rules / len(text) > 0.05: # High density of CSS characters
|
||||
# Check if it *also* lacks prompt keywords to be more sure
|
||||
prompt_keywords_check = ['tool use', 'rules', 'parameters', 'usage', 'objective', '<tool_name>']
|
||||
if not any(pk in text_lower for pk in prompt_keywords_check):
|
||||
return True
|
||||
|
||||
# Check for the HTML entity list pattern (like Template 4)
|
||||
html_entities = len(re.findall(r'&[#a-zA-Z0-9]+;', text))
|
||||
if html_entities > 20 and html_entities / len(text) > 0.02: # High density of entities
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def extract_prompt_templates(filepath, output_filepath="extracted_prompts_filtered_v2.txt", min_length=150):
|
||||
"""
|
||||
Extracts potential prompt templates, attempting to strongly filter out non-prompts.
|
||||
"""
|
||||
if not os.path.exists(filepath):
|
||||
print(f"Error: Input file not found at {filepath}")
|
||||
return -1
|
||||
|
||||
try:
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
except Exception as e:
|
||||
print(f"Error reading input file {filepath}: {e}")
|
||||
return -1
|
||||
|
||||
template_literal_regex = r'`((?:\\`|[^`])*)`'
|
||||
|
||||
# Increased specificity for prompt keywords/structures
|
||||
strong_prompt_keywords = [
|
||||
'You are BLACKBOXAI', '====\nTOOL USE\n====', '====\nRULES\n====',
|
||||
'====\nSYSTEM INFORMATION\n====', '====\nOBJECTIVE\n====',
|
||||
'<execute_command>', '<read_file>', '<create_file>', '<edit_file>',
|
||||
'<replace_in_file>', '<ask_followup_question>', '<attempt_completion>',
|
||||
'brainstorm_plan' # Added from example
|
||||
]
|
||||
other_prompt_keywords = [
|
||||
'Parameters:', 'Usage:', 'Description:', 'current working directory',
|
||||
'search_code', 'search_files', 'list_files', 'browser_action',
|
||||
'tool_name', 'parameter1_name', 'MCP SERVERS', 'CAPABILITIES'
|
||||
]
|
||||
strong_prompt_keywords_lower = {kw.lower() for kw in strong_prompt_keywords}
|
||||
other_prompt_keywords_lower = {kw.lower() for kw in other_prompt_keywords}
|
||||
|
||||
templates_saved_count = 0
|
||||
total_literals_found = 0
|
||||
|
||||
try:
|
||||
matches = re.findall(template_literal_regex, content, re.DOTALL)
|
||||
total_literals_found = len(matches)
|
||||
print(f"Found {total_literals_found} total template literals in the source.")
|
||||
|
||||
with open(output_filepath, 'w', encoding='utf-8') as outfile:
|
||||
outfile.write(f"--- Extracted Potential Prompt Templates from: {filepath} ---\n")
|
||||
outfile.write(f"--- (Filtered from {total_literals_found} total template literals found) ---\n\n")
|
||||
|
||||
for i, match_content in enumerate(matches):
|
||||
template = match_content.strip()
|
||||
template_lower = template.lower()
|
||||
is_potential_prompt = False
|
||||
|
||||
# --- Filtering Logic ---
|
||||
if len(template) < min_length:
|
||||
continue
|
||||
|
||||
# Check for strong positive indicators
|
||||
has_strong_prompt_keyword = any(kw in template_lower for kw in strong_prompt_keywords_lower)
|
||||
has_other_prompt_keywords_count = sum(1 for kw in other_prompt_keywords_lower if kw in template_lower)
|
||||
|
||||
# Check for strong negative indicators (more aggressively)
|
||||
if is_likely_code_or_markup(template, template_lower):
|
||||
continue # Skip if it looks like code/markup
|
||||
|
||||
# --- Decision ---
|
||||
# Require at least one strong keyword OR multiple (e.g., 3+) other keywords
|
||||
if has_strong_prompt_keyword or has_other_prompt_keywords_count >= 3:
|
||||
is_potential_prompt = True
|
||||
|
||||
if is_potential_prompt:
|
||||
templates_saved_count += 1
|
||||
outfile.write(f"--- Template {templates_saved_count} (Original Index: {i+1}) ---\n")
|
||||
outfile.write(template)
|
||||
outfile.write("\n\n--------------------\n\n")
|
||||
# --- End Filtering Logic ---
|
||||
|
||||
print(f"Successfully saved {templates_saved_count} potential templates to: {output_filepath}")
|
||||
return templates_saved_count
|
||||
|
||||
except Exception as e:
|
||||
print(f"An error occurred during extraction or writing to file: {e}")
|
||||
return -1
|
||||
|
||||
# --- Main Execution ---
|
||||
if __name__ == "__main__":
|
||||
file_to_analyze = "extension.js"
|
||||
output_file = "extracted_prompts_filtered_v2.txt" # New output name
|
||||
|
||||
print(f"Analyzing file: {file_to_analyze}")
|
||||
count = extract_prompt_templates(file_to_analyze, output_file)
|
||||
|
||||
if count > 0:
|
||||
print(f"Extraction complete. Check the file '{output_file}' for results.")
|
||||
elif count == 0:
|
||||
print(f"\nNo likely prompt templates matching the more stringent criteria found or saved to '{output_file}'.")
|
||||
print("Consider adjusting filtering keywords or min_length if prompts are missed.")
|
||||
else:
|
||||
print("Extraction failed due to an error.")
|
||||
206
Blackbox.ai/extraction-scripts/v4.py
Normal file
206
Blackbox.ai/extraction-scripts/v4.py
Normal file
@@ -0,0 +1,206 @@
|
||||
import re
|
||||
import os
|
||||
|
||||
def is_likely_code_or_markup(text, text_lower):
|
||||
"""
|
||||
Heuristically checks if a string is more likely code, HTML, or CSS
|
||||
than a natural language prompt. Adjusted to be less sensitive to syntax
|
||||
if strong prompt indicators are present elsewhere.
|
||||
"""
|
||||
# Reduced list of very common code keywords that might appear in prompts
|
||||
# but less likely to dominate unless it *is* code.
|
||||
code_keywords = [
|
||||
'function(', ' class ', ' constructor(', ' Symbol(', '.prototype', # Structure
|
||||
'addEventListener', 'querySelector', 'getElementById', 'createElement', # DOM specific
|
||||
'Object.assign', 'Object.defineProperty', 'Promise.resolve', 'Promise.reject', # Object/Promise
|
||||
'module.exports', 'export default', 'import {', # Module system
|
||||
'console.log', 'console.error', # Logging (less reliable on its own)
|
||||
' try {', '} catch (', ' for (', ' while (', # Control flow
|
||||
'.map(', '.filter(', '.reduce(', '.forEach(', # Array methods often in code blocks
|
||||
'JSON.stringify', 'JSON.parse', 'new Error(', 'throw new ',
|
||||
'# sourceMappingURL=' # Definite noise
|
||||
]
|
||||
# Keywords strongly suggesting JS/TS but could appear in prompts describing code
|
||||
ambiguous_code_keywords = ['async (', 'await ', 'this.', '=> {']
|
||||
|
||||
code_keyword_count = sum(1 for kw in code_keywords if kw in text_lower)
|
||||
ambiguous_code_keyword_count = sum(1 for kw in ambiguous_code_keywords if kw in text_lower)
|
||||
|
||||
# Check for common HTML/CSS patterns
|
||||
html_css_keywords = [
|
||||
'<!DOCTYPE html>', '<html', '<head>', '<body', '</script>', '</style>', # Closing tags added
|
||||
'padding:', 'margin:', 'color:', 'background-color:', 'font-size:',
|
||||
'display: flex', 'position: absolute', 'z-index:', 'border-radius:',
|
||||
'.CodeMirror', 'w-button', 'w-form', '::placeholder', ':-ms-input-placeholder' # Added from examples
|
||||
]
|
||||
html_css_keyword_count = sum(1 for kw in html_css_keywords if kw in text_lower)
|
||||
|
||||
# Symbol/Tag Ratios
|
||||
code_symbols = len(re.findall(r'[{}()\[\];=.,+\-*/&|!<>?:%]', text))
|
||||
words = len(re.findall(r'\b\w+\b', text))
|
||||
word_count = words if words > 0 else 1
|
||||
symbol_ratio = code_symbols / (code_symbols + word_count)
|
||||
|
||||
html_tags = len(re.findall(r'<[/!]?\s*\w+', text))
|
||||
html_tag_ratio = html_tags / word_count if word_count > 0 else 0
|
||||
|
||||
css_rules = len(re.findall(r'[{};:]', text))
|
||||
css_char_ratio = css_rules / len(text) if len(text) > 0 else 0
|
||||
|
||||
html_entities = len(re.findall(r'&[#a-zA-Z0-9]+;', text))
|
||||
entity_ratio = html_entities / len(text) if len(text) > 0 else 0
|
||||
|
||||
# --- Decision Logic for Noise ---
|
||||
# Very high symbol ratio, few code words -> likely data/minified (like Template 607-609)
|
||||
if symbol_ratio > 0.45 and code_keyword_count < 1 and ambiguous_code_keyword_count < 1:
|
||||
return True
|
||||
# Multiple specific code keywords + high symbol ratio suggests actual code block
|
||||
if code_keyword_count >= 2 and symbol_ratio > 0.25:
|
||||
return True
|
||||
# Or several ambiguous ones + high symbols
|
||||
if ambiguous_code_keyword_count >= 2 and symbol_ratio > 0.30:
|
||||
return True
|
||||
# Web/CSS keywords are strong indicators of noise
|
||||
if html_css_keyword_count >= 2 or html_tag_ratio > 0.1:
|
||||
return True
|
||||
if css_char_ratio > 0.07:
|
||||
return True
|
||||
# High density of HTML entities (like Template 4)
|
||||
if entity_ratio > 0.05 and html_entities > 15:
|
||||
return True
|
||||
|
||||
return False # Otherwise, might be a prompt
|
||||
|
||||
def extract_prompt_templates(filepath, output_filepath="extracted_prompts_filtered_v4.txt", min_length=150):
|
||||
"""
|
||||
Extracts potential prompt templates, attempting to strongly filter out non-prompts.
|
||||
Version 4: Fine-tuned noise detection and keyword priority.
|
||||
"""
|
||||
if not os.path.exists(filepath):
|
||||
print(f"Error: Input file not found at {filepath}")
|
||||
return -1
|
||||
|
||||
try:
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
except Exception as e:
|
||||
print(f"Error reading input file {filepath}: {e}")
|
||||
return -1
|
||||
|
||||
template_literal_regex = r'`((?:\\`|[^`])*)`'
|
||||
|
||||
# Keywords indicating a high probability of being a prompt
|
||||
very_strong_prompt_keywords = [
|
||||
'you are blackboxai', # Case insensitive check below
|
||||
'you are a helpful assistant',
|
||||
]
|
||||
# Structure markers are also very strong indicators
|
||||
structure_markers = [
|
||||
'====\nTOOL USE\n====', '====\nRULES\n====',
|
||||
'====\nSYSTEM INFORMATION\n====', '====\nOBJECTIVE\n====',
|
||||
'====\nCAPABILITIES\n====', '====\nMCP SERVERS\n====',
|
||||
'--- START OF EXAMPLE ---', '--- END OF EXAMPLE ---'
|
||||
]
|
||||
# Specific tool tags
|
||||
tool_tags = [
|
||||
'<execute_command>', '<read_file>', '<create_file>', '<edit_file>',
|
||||
'<replace_in_file>', '<ask_followup_question>', '<attempt_completion>',
|
||||
'<brainstorm_plan>', '<search_code>', '<search_files>', '<list_files>',
|
||||
'<browser_action>', '<use_mcp_tool>', '<access_mcp_resource>', '<tool_name>'
|
||||
]
|
||||
other_prompt_keywords = [
|
||||
'parameters:', 'usage:', 'description:', 'current working directory',
|
||||
'tool use formatting', 'tool use guidelines', '# tools', 'mcp servers are not always necessary'
|
||||
]
|
||||
|
||||
very_strong_lower = {kw.lower() for kw in very_strong_prompt_keywords}
|
||||
structure_lower = {kw.lower() for kw in structure_markers}
|
||||
tool_tags_lower = {kw.lower() for kw in tool_tags}
|
||||
other_lower = {kw.lower() for kw in other_prompt_keywords}
|
||||
|
||||
templates_saved_count = 0
|
||||
total_literals_found = 0
|
||||
|
||||
try:
|
||||
matches = re.findall(template_literal_regex, content, re.DOTALL)
|
||||
total_literals_found = len(matches)
|
||||
print(f"Found {total_literals_found} total template literals in the source.")
|
||||
|
||||
with open(output_filepath, 'w', encoding='utf-8') as outfile:
|
||||
outfile.write(f"--- Extracted Potential Prompt Templates from: {filepath} ---\n")
|
||||
outfile.write(f"--- (Filtered from {total_literals_found} total template literals found, v4 logic) ---\n\n")
|
||||
|
||||
for i, match_content in enumerate(matches):
|
||||
template = match_content.strip()
|
||||
template_lower = template.lower()
|
||||
is_potential_prompt = False
|
||||
|
||||
# --- Filtering Logic ---
|
||||
if len(template) < min_length:
|
||||
continue
|
||||
|
||||
# 1. Check for VERY strong starting keywords first
|
||||
# Use slicing for performance if templates are huge
|
||||
prefix_lower = template_lower[:100] # Check first 100 chars
|
||||
starts_with_very_strong = any(prefix_lower.startswith(kw) for kw in very_strong_lower)
|
||||
|
||||
# 2. If not starting strongly, check if it looks like noise
|
||||
likely_noise = False
|
||||
if not starts_with_very_strong:
|
||||
likely_noise = is_likely_code_or_markup(template, template_lower)
|
||||
|
||||
if likely_noise:
|
||||
continue
|
||||
|
||||
# 3. Check for other strong prompt indicators (structure, tools)
|
||||
has_structure_marker = any(kw in template_lower for kw in structure_lower)
|
||||
has_tool_tag = any(kw in template_lower for kw in tool_tags_lower)
|
||||
has_other_prompt_keywords_count = sum(1 for kw in other_lower if kw in template_lower)
|
||||
|
||||
# --- Decision ---
|
||||
# Keep if:
|
||||
# - It starts with a very strong keyword
|
||||
# - OR it has structure markers OR multiple tool tags (strong indicators)
|
||||
# - OR it has at least one tool tag AND multiple other keywords
|
||||
# - OR it has many (4+) other keywords (might be a prompt without tags)
|
||||
if starts_with_very_strong:
|
||||
is_potential_prompt = True
|
||||
elif not likely_noise: # Only proceed if not flagged as noise
|
||||
tool_tag_count = sum(1 for tag in tool_tags_lower if tag in template_lower)
|
||||
if has_structure_marker or tool_tag_count >= 2:
|
||||
is_potential_prompt = True
|
||||
elif tool_tag_count >= 1 and has_other_prompt_keywords_count >= 2:
|
||||
is_potential_prompt = True
|
||||
elif has_other_prompt_keywords_count >= 4:
|
||||
is_potential_prompt = True
|
||||
|
||||
|
||||
if is_potential_prompt:
|
||||
templates_saved_count += 1
|
||||
outfile.write(f"--- Template {templates_saved_count} (Original Index: {i+1}) ---\n")
|
||||
outfile.write(template)
|
||||
outfile.write("\n\n--------------------\n\n")
|
||||
# --- End Filtering Logic ---
|
||||
|
||||
print(f"Successfully saved {templates_saved_count} potential templates to: {output_filepath}")
|
||||
return templates_saved_count
|
||||
|
||||
except Exception as e:
|
||||
print(f"An error occurred during extraction or writing to file: {e}")
|
||||
return -1
|
||||
|
||||
# --- Main Execution ---
|
||||
if __name__ == "__main__":
|
||||
file_to_analyze = "extension.js"
|
||||
output_file = "extracted_prompts_filtered_v4.txt" # New output name
|
||||
|
||||
print(f"Analyzing file: {file_to_analyze}")
|
||||
count = extract_prompt_templates(file_to_analyze, output_file)
|
||||
|
||||
if count > 0:
|
||||
print(f"Extraction complete. Check the file '{output_file}' for results.")
|
||||
elif count == 0:
|
||||
print(f"\nNo likely prompt templates matching the v4 criteria found or saved to '{output_file}'.")
|
||||
print("Consider adjusting filtering keywords or min_length if prompts are missed.")
|
||||
else:
|
||||
print("Extraction failed due to an error.")
|
||||
Reference in New Issue
Block a user