Practical use of pathlib - Printable Version +- Python Forum (https://python-forum.io) +-- Forum: General (https://python-forum.io/forum-1.html) +--- Forum: Code sharing (https://python-forum.io/forum-5.html) +--- Thread: Practical use of pathlib (/thread-5699.html) |
Practical use of pathlib - Larz60+ - Oct-17-2017 The app below uses pathlib objects to read the header record from national fips code files and print each. Eventually it will process each into an easy access format, but for proof of concept, I am only printing out the first line of each. module ReadFipsCodeData.py: from pathlib import Path, PurePath import json class ReadFipsCodeData: def __init__(self): self.fips_dirs = [ ] self.homepath = Path('.') self.datapath = self.homepath / 'data' self.code_descr_filepath = self.datapath / 'code_descr.json' self.fips_class_filepath = self.datapath / 'fips_class.json' self.code_datapath = self.homepath / '..' / 'data' / 'Codes' / 'ANSI-FIPS-Codes' / 'AllFiles' with self.code_descr_filepath.open('r') as jn: self.code_categories = json.load(jn) with self.fips_class_filepath.open('r') as jn: self.fips_class = json.load(jn) def build_national_code_db(self): for filename, value in self.code_categories.items(): f = value['filename'] filepath = self.code_datapath / f delimiter = value['delim'] with filepath.open('r', encoding='ISO-8859-1') as fin: file_data = fin.readlines() # Just read first line for now (should be heading in each case) # This can be matched to self.code_categories (value here) dictionary to get # description of file, and info on each field header = True for line in file_data: line = line.strip() if len(line) == 0: continue line = line.split(delimiter) if header: print(f'\nfilename: {f}') print(f'Header: {line}') header = False def main(): cj = ReadFipsCodeData() cj.build_national_code_db() if __name__ == '__main__': main()This produces the following output: The two dictionaries that are used to get file information are created with the following application:CreateFIPScodeDescJson.py: from pathlib import Path, PurePath import json class CreateFIPScodeDescJson: def __init__(self): self.homepath = Path('.') self.datapath = self.homepath / 'data' self.code_descr_filepath = self.datapath / 'code_descr.json' self.fips_class_filepath = self.datapath / 'fips_class.json' self.fips_class = { 'H1': 'identifies an active county or statistically equivalent entity that does not qualify' ' under subclass C7 or H6.', 'H4': 'identifies a legally defined inactive or nonfunctioning county or statistically ' 'equivalent entity that does not qualify under subclass H6.', 'H5': 'identifies census areas in Alaska, a statistical county equivalent entity.', 'H6': 'identifies a county or statistically equivalent entity that is areally coextensive ' 'or governmentally consolidated with an incorporated place, part of an incorporated ' 'place, or a consolidated city.', 'C7': 'identifies an incorporated place that is an independent city; that is, it also ' 'serves as a county equivalent because it is not part of any county, and a minor ' 'civil division (MCD) equivalent because it is not part of any MCD.' } self.code_categories = { 'national_aia.txt': { 'delim': ',', 'filename': 'national_aia.txt', 'fields': { 'AIANNHCE': { 'length': '4', 'type': 'String', 'descr': 'Current American Indian/Alaska Native/Native Hawaiian area census code' }, 'AIANNHNAME': { 'length': '', 'type': 'String', 'descr': 'American Indian Area name and legal/statistical area description' } }, }, 'national_cd113.txt': { 'delim': 'spaces', 'filename': 'national_cd113.txt', 'fields': { 'STATE': { 'length': '2', 'type': 'String', 'descr': 'State Postal Code' }, 'STATEFP': { 'length': '2', 'type': 'String', 'descr': 'State FIPS CODE' }, 'CD113FP': { 'length': '4', 'type': 'String', 'descr': '113th Congressional District FIPS Code' }, 'NAMELSAD': { 'length': '41', 'type': 'String', 'descr': 'Current name and the translated legal/statistical area description' ' for the congressional district' } }, }, 'national_county.txt': { 'delim': ',', 'filename': 'national_county.txt', 'fields': { 'STATE': { 'length': '2', 'type': 'String', 'descr': 'State Postal Code' }, 'STATEFP': { 'length': '2', 'type': 'String', 'descr': 'State FIPS Code' }, 'COUNTYFP': { 'length': '3', 'type': 'String', 'descr': 'County FIPS Code' }, 'COUNTYNAME': { 'length': '100', 'type': 'String', 'descr': 'County Name and Legal/Statistical Area Description' }, 'CLASSFP': { 'length': '2', 'type': 'String', 'descr': 'FIPS Class Code' } }, }, 'national_cousub.txt': { 'delim': ',', 'filename': 'national_cousub.txt', 'fields': { 'STATE': { 'length': '2', 'type': 'String', 'descr': 'State Postal Code' }, 'STATEFP': { 'length': '2', 'type': 'String', 'descr': 'Current state FIPS code' }, 'COUNTYFP': { 'length': '3', 'type': 'String', 'descr': 'Current county FIPS code' }, 'NAME': { 'length': '100', 'type': 'String', 'descr': 'Current county subdivision name' }, 'COUSUBFP': { 'length': '5', 'type': 'String', 'descr': 'Current county subdivision FIPS code' }, 'NAMELSAD': { 'length': '100', 'type': 'String', 'descr': 'Current name and the translated legal/statistical area description code ' 'for county subdivision' }, 'FUNCSTAT': { 'length': '1', 'type': 'String', 'descr': 'Current functional status' }, } }, 'national_places.txt': { 'delim': '|', 'filename': 'national_places.txt', 'fields': { 'STATE': { 'length': '2', 'type': 'String', 'descr': 'State Postal Code' }, 'STATEFP': { 'length': '2', 'type': 'String', 'descr': 'Current state FIPS code' }, 'PLACEFP': { 'length': '5', 'type': 'String', 'descr': 'Current place FIPS code' }, 'PLACENAME': { 'length': '100', 'type': 'String', 'descr': 'Current place name' }, 'TYPE': { 'length': '100', 'type': 'String', 'descr': 'Place Name Type' }, 'FUNCSTAT': { 'length': '1', 'type': 'String', 'descr': 'Current functional status' }, 'COUNTY': { 'length': '100', 'type': 'String', 'descr': 'Current County Name' } } }, 'national_schdist.txt': { 'delim': ',', 'filename': 'national_schdist.txt', 'fields': { 'STATE': { 'length': '2', 'type': 'String', 'descr': 'State Postal Code' }, 'STATEFP': { 'length': '2', 'type': 'String', 'descr': 'Current state FIPS code' }, 'LEA': { 'length': '5', 'type': 'String', 'descr': 'Federal Local Education Agency ID' }, 'SDNAME': { 'length': '100', 'type': 'String', 'descr': 'Current unified school district name' }, 'TYPE': { 'length': '100', 'type': 'String', 'descr': 'Current school district type' } }, }, 'national_vtd.txt': { 'delim': '|', 'filename': 'national_vtd.txt', 'fields': { 'STATE': { 'length': '2', 'type': 'String', 'descr': 'State Postal Code' }, 'STATEFP': { 'length': '2', 'type': 'String', 'descr': 'Current state FIPS code' }, 'COUNTYFP': { 'length': '3', 'type': 'String', 'descr': 'County FIPS Code' }, 'COUNTYNAME': { 'length': '100', 'type': 'String', 'descr': 'County Name and Legal/Statistical Area Description' }, 'VTDST':{ 'length': '', 'type': '', 'descr': 'Voter District' }, 'VTDNAME':{ 'length': '', 'type': '', 'descr': 'Voter District Name' } }, } } # save code class dict with self.fips_class_filepath.open('w') as jout: json.dump(self.fips_class, jout) # Save code description dict with self.code_descr_filepath.open('w') as jout: json.dump(self.code_categories, jout) if __name__ == '__main__': CreateFIPScodeDescJson()This type of file definition allows for file content verification Example, new version of ReadFipsCodeData.py with verification: from pathlib import Path, PurePath import json class ReadFipsCodeData: def __init__(self): self.fips_dirs = [ ] self.homepath = Path('.') self.datapath = self.homepath / 'data' self.code_descr_filepath = self.datapath / 'code_descr.json' self.fips_class_filepath = self.datapath / 'fips_class.json' self.code_datapath = self.homepath / '..' / 'data' / 'Codes' / 'ANSI-FIPS-Codes' / 'AllFiles' with self.code_descr_filepath.open('r') as jn: self.code_categories = json.load(jn) with self.fips_class_filepath.open('r') as jn: self.fips_class = json.load(jn) def verify_file_content(self, header, filename, file_format): try: print(f'\n{filename}') print(f'Header: {header}') for fieldname in header: field = file_format['fields'][fieldname] print(f'\n fieldname: {fieldname}') print(f" length: {field['length']}, type: {field['type']}") print(f" {field['descr']}") return True except: raise FileNotFoundError def build_national_code_db(self): try: for filename, value in self.code_categories.items(): f = value['filename'] filepath = self.code_datapath / f delimiter = value['delim'] with filepath.open('r', encoding='ISO-8859-1') as fin: file_data = fin.readlines() # Just read first line for now (should be heading in each case) # This can be matched to self.code_categories (value here) dictionary to get # description of file, and info on each field header_found = False for line in file_data: line = line.strip() if len(line) == 0: continue if delimiter == '': line = line.split() else: line = line.split(delimiter) if header_found: # Process remainder of file pass else: if self.verify_file_content(line, filename, value): header_found = True break except FileNotFoundError: print(f'The directory {self.code_datapath.resolve()} does not contain FIPS data files.') print(f'Please supply proper directory name') def main(): cj = ReadFipsCodeData() cj.build_national_code_db() if __name__ == '__main__': main()which produces:
|