# Import necessary libraries
from flask import Flask, request, jsonify
from docx import Document
import fitz  # PyMuPDF
import spacy
import traceback
import os
import tempfile
import re
import pyap
import requests

# import usaddress

# Initialize Flask app
app = Flask(__name__)

# Load spaCy English model
nlp = spacy.load("en_core_web_sm")

# Function to read text from a DOCX file
def read_text_from_docx(docx_path):
    doc = Document(docx_path)
    text = ""
    for paragraph in doc.paragraphs:
        text += paragraph.text + '\n'
    return text

# Function to read text from a PDF file
def read_text_from_pdf(pdf_path):
    text = ""
    with fitz.open(pdf_path) as pdf_doc:
        for page_num in range(pdf_doc.page_count):
            page = pdf_doc[page_num]
            text += page.get_text() + '\n'
    return text

# Function to read text from a TXT file
def read_text_from_txt(txt_path):
    with open(txt_path, 'r') as file:
        text = file.read()
    return text

# Function to extract entities (persons, organizations, locations, dates, emails, phones, and education) from text
def extract_entities(text):
    doc = nlp(text)
    entities = {
        'persons': [ent.text for ent in doc.ents if ent.label_ == 'PERSON'],
        # 'organizations': [ent.text for ent in doc.ents if ent.label_ == 'ORG'],
        'locations': [ent.text for ent in doc.ents if ent.label_ == 'LOC'],
        'dates': [ent.text for ent in doc.ents if ent.label_ == 'DATE'],
        'emails':extract_emails(text),
        'phones': extract_phone_numbers(text),
        'education': extract_education_nlp(text),
        # 'Institute': [ent.text for ent in doc.ents if ent.label_ == 'Institute'],
        # 'INTERNSHIP': [ent.text for ent in doc.ents if ent.label_ == 'INTERNSHIP'],
        'expertise':extract_medical_expertise(text),
        # 'address':extract_addresses(text) # this is working but the address not  was  not  get single it  will get  multipule address basesed on the resume  uplode  text
      

    }

    return entities

def validate_address_fun(address):
    # Make API call to Bing Maps to validate the address
    # Replace 'YOUR_BING_MAPS_API_KEY' with your actual API key
    api_key = 'AmREBa2I8sJ_e4p_G4UA00Y-pFpncDf8jG9J0f75aMZszZyXTfnYCdxWWyTuZSC1'
    base_url = 'http://dev.virtualearth.net/REST/v1/Locations'
    params = {
        'q': address,
        'key': api_key,
    }
    response = requests.get(base_url, params=params)
    data = response.json()

    # Check if the address was successfully validated
    if 'resourceSets' in data and data['resourceSets']:
        resources = data['resourceSets'][0]['resources']
        if resources:
            return resources[0]['address']['formattedAddress']

    return None

def extract_addresses(text):
    # Extract all lines from the text
    lines = text.split('\n')

    # Convert the text to lowercase once
    lower_text = text.lower()

    # Iterate through each line and attempt to validate addresses
    validated_addresses = []
    for line in lines:
        # Validate the address using Bing Maps API
        validated_address1 = validate_address_fun(line)

        # Check if the address is not None before converting to lowercase
        if validated_address1 is not None:
            lower_validated_address1 = validated_address1.lower()

            # Check if the lowercase address is found in the lowercase text
            if lower_validated_address1 in lower_text:
                print(validated_address1)
                validated_addresses.append(validated_address1)

    return validated_addresses


def extract_medical_expertise(text):
#     expertise_patterns = [
#     'Medical Doctor', 'MD',
#     'Nurse Practitioner', 'NP',
#     'Registered Nurse', 'RN',
#     'Pharmacist', 'PharmD',
#     'Physical Therapist', 'PT',
#     'Occupational Therapist', 'OT',
#     'Clinical Psychologist', 'PsyD',
#     'Medical Laboratory Technologist', 'MLT',
#     # Add more patterns as needed
# ]


    expertise_patterns= [
    'Computer Engineer',
    'Software Developer',
    'Web Developer',
    'Systems Analyst',
    'Data Scientist',
    'Machine Learning Engineer',
    'Full Stack Developer',
    'DevOps Engineer',
    'Network Engineer',
    'Database Administrator',
    'Cybersecurity Analyst',
    'C# Developer',
    'Java Developer',
    'Python Developer',
    'HTML/CSS Developer',
    'JavaScript Developer',
    'React.js Developer',
    'Angular Developer',
    'Node.js Developer',
    'SQL Developer',
    'Cloud Engineer',
    'Big Data Engineer',
    'Mobile App Developer',
    'Frontend Developer',
    'Backend Developer','C','C++','Flutter','OOPJ','OOP','API','HTML','CSS','Dart',
    # Add more patterns as needed
]
    doc = nlp(text)
    included_terms = set()  # Set to track included terms
    expertise = []

    for ent in doc.ents:
        if ent.text in expertise_patterns and ent.text not in included_terms:
            expertise.append(ent.text)
            included_terms.add(ent.text)
    return expertise


def extract_emails(text):
    # Use a regular expression to find email addresses
    email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
    emails = re.findall(email_pattern, text)
    return emails
def extract_phone_numbers(text):
    # Use a regular expression to find phone numbers
    phone_pattern = re.compile(r'''
        (?:\+\d{1,4}\s?)?                # Optional country code (e.g., +91, +1)
        (?:\(\d{1,4}\)\s?)?               # Optional area code in parentheses
        \d{10}                            # Main phone number exactly 10 digits
        (?:[.\s-]\d{1,4})?                # Optional extension or additional digits separated by ., space, or -
        \b                                # Word boundary to ensure the number is not part of a larger word
    ''', re.VERBOSE)
    phone_numbers = re.findall(phone_pattern, text)
    return phone_numbers

# Function to extract education details from text
def extract_education_nlp(text):
    doc = nlp(text)
    education_details = []

    # Define patterns for medical education
    medical_education_patterns = [
        'B.SC.(CA & IT)',
        'M.SC.(CA & IT)',
        'B.DC', 'M.SC', 'B.SC', 'BCOM', 'MCOM', 'BCA', 'MCA',
        'HSC', 'SSC', "BACHELOR'S", "MASTER'S", "HIGHER EDUCATION",'CHNA 1','Computer Network Engineer',
        'H.S.C','S.S.C ',
    ]
    # medical_education_patterns =[
    # "High School Diploma", "HSD",
    # "Certified Nursing Assistant", "CNA",
    # "Licensed Practical Nurse", "LPN",
    # "Associate's Degree in Nursing", "ADN",
    # "Bachelor of Science in Nursing", "BSN",
    # "Master of Science in Nursing", "MSN",
    # "Doctor of Nursing Practice", "DNP",
    # "Doctor of Medicine", "MD",
    # "Doctor of Osteopathic Medicine", "DO",
    # "Doctor of Pharmacy", "PharmD",
    # "Doctor of Dental Medicine", "DMD",
    # "Doctor of Veterinary Medicine", "DVM",
    # "Doctor of Physical Therapy", "DPT",
    # "Bachelor of Science in Medical Technology", "BSMT",
    # "Bachelor of Science in Radiologic Technology", "BSRT",
    # "Bachelor of Science in Respiratory Therapy", "BSRT",
    # "Doctor of Optometry", "OD",
    # "High School Diploma", "HSD",
    # "Associate's Degree in Computer Science", "AS CS",
    # "Associate's Degree in Software Development", "AS SD",
    # "Bachelor's Degree in Computer Science", "BSc CS",
    # "Bachelor's Degree in Software Engineering", "BSc SE",
    # "Bachelor's Degree in Computer Engineering", "BSc CE",
    # "Bachelor of Computer Applications", "BCA",
    # "Bachelor of Science in Information Technology", "BSCIT",
    # "Master's Degree in Computer Science", "MSc CS",
    # "Master's Degree in Software Engineering", "MSc SE",
    # "Master's Degree in Computer Engineering", "MSc CE",
    # "Master of Computer Applications", "MCA",
    # "Master of Science in Information Technology", "MSCIT",
    # "Ph.D. in Computer Science", "Ph.D. CS",
    # "Ph.D. in Software Engineering", "Ph.D. SE",
    # "Certification in Full Stack Development", "FSD",
    # "Certification in Mobile App Development", "MAD",
    # "Certification in DevOps", "DevOps"
    # ]


    education_details1 = []

    # Extract matches for education patterns
    for pattern in medical_education_patterns:
        matches = re.findall(pattern, text, re.IGNORECASE)
        if matches:
            education_details1.extend(matches)

    # Reverse the education details
    # education_details1.reverse()

    years = [int(ent.text) for ent in doc.ents if ent.label_ == 'DATE' and ent.text.isdigit()]
    if years == sorted(years, reverse=False):
        education_details1.reverse()
        
    # Initialize index variable
    i = 0

    # Iterate over entities in the document
    for ent in doc.ents:

        if ent.label_ == 'DATE':
            duration = ent.text
            education_info = {
                'duration': duration,
                'institution': '',
                'degree': '',
                'specialization': '',
                'school': '',
                'result': ''
            }

            # Iterate over tokens in the same sentence
            for token in ent.sent:
                if token.ent_type_ == 'ORG':
                    education_info['institution'] = token.text
                elif token.ent_type_ == 'GPE' and not education_info['institution']:
                    education_info['institution'] = token.text
                elif token.ent_type_ == 'NORP':
                    education_info['school'] = token.text

            # Extract degree information using spaCy's linguistic features
            if ent.root.head.pos_ == 'NOUN' or ent.root.head.pos_ != 'NOUN':
                if education_details1:
                    education_info['degree'] = education_details1[i % len(education_details1)]
                    i += 1

            # Extract result or percentage
            for child in ent.root.children:
                if child.ent_type_ == 'CARDINAL' and '%' in child.text:
                    education_info['result'] = child.text

            # Extract specialization
            if token.ent_type_ == 'PRODUCT':
                education_info['specialization'] = token.text

            education_details.append(education_info)

    return education_details

# Function to read document based on its type (docx, pdf, txt)
def read_document(file, file_type):
    if file_type == 'docx':
        return read_text_from_docx(file)
    elif file_type == 'pdf':
        # Save the uploaded PDF to a temporary file
        temp_file, temp_file_path = tempfile.mkstemp(suffix='.pdf')
        with os.fdopen(temp_file, 'wb') as temp:
            temp.write(file.read())
        text = read_text_from_pdf(temp_file_path)
        os.remove(temp_file_path)  # Remove the temporary file
        return text
    elif file_type == 'txt':
        return read_text_from_txt(file)
    else:
        return "Unsupported file type"

# Endpoint for file upload
@app.route('/upload', methods=['POST'])
def upload_file():
    if 'file' not in request.files:
        return jsonify({'error': 'No file provided'}), 400

    uploaded_file = request.files['file']

    if uploaded_file.filename == '':
        return jsonify({'error': 'No file selected'}), 400

    file_type = uploaded_file.filename.rsplit('.', 1)[1].lower()

    if file_type not in ['docx', 'pdf', 'txt']:
        return jsonify({'error': 'Unsupported file type'}), 400

    try:
        text = read_document(uploaded_file, file_type)
        entities = extract_entities(text)
        return jsonify({'entities': entities}), 200

    except Exception as e:
        print("Error:", str(e))
        traceback.print_exc()  # Print the full traceback
        return jsonify({'error': str(e)}), 500

# Run the Flask app
if __name__ == '__main__':
    app.run(debug=True)
















# from flask import Flask, request, jsonify
# from pyresparser import ResumeParser
# import traceback
# import os
# import tempfile
# import spacy

# app = Flask(__name__)

# # Load spaCy English model
# nlp = spacy.load("en_core_web_sm")

# # Set spaCy model for pyresparser
# os.environ["SPACY_MODEL"] = "en_core_web_sm"

# def read_document(file, file_type):
#     if file_type == 'docx':
#         text = ResumeParser(file).get_extracted_data()
#     elif file_type == 'pdf':
#         text = ResumeParser(file).get_extracted_data()
#     elif file_type == 'txt':
#         text = ResumeParser(file).get_extracted_data()
#     else:
#         return "Unsupported file type"

#     return text

# @app.route('/upload', methods=['POST'])
# def upload_file():
#     if 'file' not in request.files:
#         return jsonify({'error': 'No file provided'}), 400

#     uploaded_file = request.files['file']

#     if uploaded_file.filename == '':
#         return jsonify({'error': 'No file selected'}), 400

#     file_type = uploaded_file.filename.rsplit('.', 1)[1].lower()

#     try:
#         text = read_document(uploaded_file, file_type)
#         # Further processing using spaCy or other methods
#         entities = extract_entities(text)
#         return jsonify({'entities': entities}), 200

#     except Exception as e:
#         print("Error:", str(e))
#         traceback.print_exc()  # Print the full traceback
#         return jsonify({'error': str(e)}), 500

# if __name__ == '__main__':
#     app.run(debug=True)
























# from flask import Flask, request, jsonify
# from pyresparser import extract_text

# app = Flask(__name__)

# @app.route('/upload_resume', methods=['POST'])
# def upload_resume():
#     # Check if the post request has the file part
#     if 'file' not in request.files:
#         return jsonify({'error': 'No file part'})

#     resume_file = request.files['file']

#     # Check if a file is selected
#     if resume_file.filename == '':
#         return jsonify({'error': 'No selected file'})

#     # Extract data from the resume using pyresparser
#     try:
#         parsed_data = parse_resume(resume_file)
#         return jsonify(parsed_data)
#     except Exception as e:
#         return jsonify({'error': str(e)}), 500

# def parse_resume(resume_file):
#     # Extract information from the resume using pyresparser
#     data = extract_text(resume_file)

#     # Extract relevant information
#     extracted_data = {
#         'name': data.get('name', ''),
#         'email': data.get('email', ''),
#         'phone': data.get('mobile_number', ''),
#         'skills': data.get('skills', []),
#         'education': data.get('degree', ''),
#         'experience': data.get('experience', []),
#     }

#     return extracted_data

# if __name__ == '__main__':
#     app.run(debug=True)