Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
ae8eca7
FOLIOSYNC-7 create FolioToHyacinth::MarcDownloader; paginate source-r…
wtomaszewska Oct 1, 2025
d7451c1
FOLIOSYNC-3 save SRS MARC if 965hyacinth value is found; add method f…
wtomaszewska Oct 1, 2025
79e280d
FOLIOSYNC-7 modify source records retrieval to return MARC object; us…
wtomaszewska Oct 2, 2025
8d7b6c5
FOLIOSYNC-7 use yml config to determine downloads dir, use folio-api …
wtomaszewska Oct 2, 2025
50e14e5
FOLIOSYNC-7 add tests for MarcDownloader class, catch errors
wtomaszewska Oct 8, 2025
bcb65da
FOLIOSYNC-7 use github reference when using folio api gem
wtomaszewska Oct 8, 2025
47bbc45
FOLIOSYNC-8 set up Hyacinth API client with Finders module
wtomaszewska Oct 9, 2025
67cc551
FOLIOSYNC-7 throw an exception if 001 field is missing and skip savin…
wtomaszewska Oct 10, 2025
3ea5ff2
FOLIOSYNC-8 working create + update methods; use Retriable and Farada…
wtomaszewska Oct 14, 2025
7e00c42
FOLIOSYNC-7 update MarcDownloader + tests to match the new find_sourc…
wtomaszewska Oct 16, 2025
c73c6e3
FOLIOSYNC-7 use the main branch when using Folio API Client
wtomaszewska Oct 16, 2025
f58033e
Merge branch 'feature/FOLIOSYNC-7-prepare-files-for-hyacinth-sync' of…
wtomaszewska Oct 16, 2025
3dca47b
FOLIOSYNC-8 move gem-specific files under hyacinth_api directory; use…
wtomaszewska Oct 16, 2025
549a136
FOLIOSYNC-8 add tests for FolioSync::Hyacinth::Client; rubocop fixes,…
wtomaszewska Oct 16, 2025
3524e79
FOLIOSYNC-8 add hyacinth.yml template, add symlinks
wtomaszewska Oct 16, 2025
1fce0d3
Merge pull request #39 from cul/feature/FOLIOSYNC-8-port-hyacinth-cli…
wtomaszewska Oct 17, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@
/config/folio.yml
/config/folio_sync.yml
/config/folio_requests.yml
/config/folio_to_hyacinth.yml
/config/hyacinth.yml

# Ignore Mac system files
.DS_Store
Expand Down
25 changes: 19 additions & 6 deletions .rubocop_todo.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# This configuration was generated by
# `rubocop --auto-gen-config`
# on 2025-09-19 00:18:41 UTC using RuboCop version 1.78.0.
# on 2025-10-16 16:06:50 UTC using RuboCop version 1.78.0.
# The point is for the user to remove these configuration records
# one by one as the offenses are removed from the code base.
# Note that changes in the inspected code, or installation of new
# versions of RuboCop, may require this file to be generated again.

# Offense count: 17
# Offense count: 19
# Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes.
Metrics/AbcSize:
Max: 34
Expand All @@ -16,12 +16,12 @@ Metrics/AbcSize:
Metrics/ClassLength:
Max: 144

# Offense count: 1
# Offense count: 2
# Configuration parameters: AllowedMethods, AllowedPatterns.
Metrics/CyclomaticComplexity:
Max: 8

# Offense count: 18
# Offense count: 21
# Configuration parameters: CountComments, Max, CountAsOne, AllowedMethods, AllowedPatterns.
Metrics/MethodLength:
Exclude:
Expand All @@ -35,7 +35,19 @@ Metrics/MethodLength:
- 'lib/folio_sync/archives_space_to_folio/job_result_processor.rb'
- 'lib/folio_sync/archives_space_to_folio/marc_record_enhancer.rb'
- 'lib/folio_sync/archives_space_to_folio/record_processor.rb'
- 'lib/folio_sync/folio_to_hyacinth/marc_downloader.rb'
- 'lib/folio_sync/rake/error_logger.rb'
- 'lib/hyacinth_api/digital_objects.rb'

# Offense count: 1
# This cop supports unsafe autocorrection (--autocorrect-all).
# Configuration parameters: Whitelist, AllowedMethods, AllowedReceivers.
# Whitelist: find_by_sql, find_by_token_for
# AllowedMethods: find_by_sql, find_by_token_for
# AllowedReceivers: Gem::Specification, page
Rails/DynamicFindBy:
Exclude:
- 'lib/tasks/hyacinth_sync.rake'

# Offense count: 2
# Configuration parameters: Include.
Expand All @@ -53,15 +65,16 @@ Rails/FindEach:
Exclude:
- 'lib/tasks/test_create_or_update.rake'

# Offense count: 2
# Offense count: 4
# This cop supports unsafe autocorrection (--autocorrect-all).
# Configuration parameters: Include.
# Include: **/app/**/*.rb, **/config/**/*.rb, db/**/*.rb, **/lib/**/*.rb
Rails/Output:
Exclude:
- 'lib/folio_sync/rake/env_validator.rb'
- 'lib/hyacinth_api/digital_objects.rb'

# Offense count: 15
# Offense count: 24
# This cop supports safe autocorrection (--autocorrect).
# Configuration parameters: AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, IgnoreCopDirectives, AllowedPatterns, SplitStrings.
# URISchemes: http, https
Expand Down
5 changes: 4 additions & 1 deletion Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,10 @@ gem 'actionmailer', '~> 8.0.2'

gem 'marc'

gem 'folio_api_client', '~> 0.4.0'
# gem 'folio_api_client', '~> 0.4.0'
gem "folio_api_client", github: "cul/folio_api_client", branch: "main"

gem 'faraday', '~> 2.14'

gem 'retriable', '~> 3.1'

Expand Down
19 changes: 13 additions & 6 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
GIT
remote: https://github.com/cul/folio_api_client.git
revision: 31b4fa338695801922fd63659d1a07ced3f7ab54
branch: main
specs:
folio_api_client (0.4.3)
faraday (~> 2.13)
marc (~> 1.3)
zeitwerk (~> 2.7)

GEM
remote: https://rubygems.org/
specs:
Expand Down Expand Up @@ -134,16 +144,12 @@ GEM
factory_bot_rails (6.5.0)
factory_bot (~> 6.5)
railties (>= 6.1.0)
faraday (2.13.2)
faraday (2.14.0)
faraday-net_http (>= 2.0, < 3.5)
json
logger
faraday-net_http (3.4.1)
net-http (>= 0.5.0)
folio_api_client (0.4.0)
faraday (~> 2.13)
marc (~> 1.3)
zeitwerk (~> 2.7)
globalid (1.2.1)
activesupport (>= 6.1)
httparty (0.23.1)
Expand Down Expand Up @@ -426,7 +432,8 @@ DEPENDENCIES
capistrano-rails (~> 1.4)
debug
factory_bot_rails
folio_api_client (~> 0.4.0)
faraday (~> 2.14)
folio_api_client!
kamal
marc
mysql2
Expand Down
5 changes: 5 additions & 0 deletions config/application.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,16 @@

module FolioSync
class Application < Rails::Application
# API client configurations
config.archivesspace = config_for(:archivesspace)
config.folio = config_for(:folio)
config.hyacinth = config_for(:hyacinth)

# Script configurations
config.folio_sync = config_for(:folio_sync)
config.folio_requests = config_for(:folio_requests)
config.folio_holdings = config_for(:folio_holdings)
config.folio_to_hyacinth = config_for(:folio_to_hyacinth)

# Initialize configuration defaults for originally generated Rails version.
config.load_defaults 8.0
Expand Down
2 changes: 2 additions & 0 deletions config/deploy.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,10 @@
'config/database.yml',
'config/folio.yml',
'config/archivesspace.yml',
'config/hyacinth.yml',
'config/folio_sync.yml',
'config/folio_requests.yml',
'config/folio_to_hyacinth.yml',
'config/master.key'

# Default value for linked_dirs is []
Expand Down
10 changes: 10 additions & 0 deletions config/initializers/folio_sync.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,19 @@
begin
logger = Logger.new($stdout)
folio_config = Rails.configuration.folio_sync
folio_to_hyacinth_config = Rails.configuration.folio_to_hyacinth
aspace_to_folio_config = folio_config[:aspace_to_folio]

if folio_config.blank?
logger.error('FOLIO sync configuration is missing or empty')
throw 'Please make sure the folio_sync.yml file is present and properly configured.'
end

if folio_to_hyacinth_config.blank?
logger.error('FOLIO to Hyacinth configuration is missing or empty')
throw 'Please make sure the folio_to_hyacinth.yml file is present and properly configured.'
end

aspace_instances = aspace_to_folio_config[:aspace_instances] || {}

if aspace_instances.empty?
Expand Down Expand Up @@ -38,6 +44,10 @@
throw 'Please make sure the folio_sync.yml file contains a prepared_marc_directory'
end

# Prepare downloads directory for FOLIO to Hyacinth sync
FileUtils.mkdir_p(folio_to_hyacinth_config[:download_directory])

# Prepare subdirectories for ArchivesSpace to FOLIO sync
aspace_instances.each_key do |instance_name|
downloads_instance_dir = File.join(downloaded_files_dir, instance_name.to_s)
prepared_instance_dir = File.join(prepared_files_dir, instance_name.to_s)
Expand Down
5 changes: 5 additions & 0 deletions config/templates/folio_to_hyacinth.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
development:
download_directory: <%= Rails.root.join('tmp/working_data/development/folio_to_hyacinth/downloaded_files') %>

test:
download_directory: <%= Rails.root.join('tmp/working_data/test/folio_to_hyacinth/downloaded_files') %>
9 changes: 9 additions & 0 deletions config/templates/hyacinth.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
development:
url: https://example-dev.library.edu/api
email: example-email
password: example-password

test:
url: https://example-test.library.edu/api
email: example-email
password: example-password
1 change: 1 addition & 0 deletions lib/folio_sync/exceptions.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ class ArchivesSpaceRequestError < FolioSyncException; end
class FolioRequestError < FolioSyncException; end

class InvalidDatabaseState < FolioSyncException; end
class Missing001Field < FolioSyncException; end

class JobExecutionStartTimeoutError < FolioSyncException; end
class JobExecutionInactivityTimeoutError < FolioSyncException; end
Expand Down
79 changes: 79 additions & 0 deletions lib/folio_sync/folio_to_hyacinth/marc_downloader.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# frozen_string_literal: true

class FolioSync::FolioToHyacinth::MarcDownloader
attr_reader :downloading_errors

def initialize
@folio_client = FolioSync::Folio::Client.instance
@folio_reader = FolioSync::Folio::Reader.new
@downloading_errors = []
end

# Downloads all SRS MARC bibliographic records that have a 965 field that has a subfield $a value of '965hyacinth'
# AND were modified within the last `last_x_hours` hours.
# A modified_since value of `nil` means that we want to download ALL '965hyacinth' records, regardless of modification time.
def download_965hyacinth_marc_records(last_x_hours = nil)
modified_since = Time.now.utc - (3600 * last_x_hours) if last_x_hours
modified_since_utc = modified_since&.utc&.iso8601
Rails.logger.info(
"Downloading MARC with 965hyacinth#{modified_since_utc ? " modified since: #{modified_since_utc}" : ' (all records)'}"
)

@folio_client.find_source_marc_records(modified_since: modified_since_utc, with_965_value: '965hyacinth') do |parsed_record|
# The returned MARC record has been filtered to include records with "965hyacinth" identifiers
# but we want to double-check that the identifier lives in the 965$a field.
if has_965hyacinth_field?(parsed_record)
begin
save_marc_record_to_file(parsed_record)
rescue StandardError => e
record_id = extract_id(parsed_record) || 'unknown'
error_message = "Failed to save MARC record #{record_id}: #{e.message}"
@downloading_errors << error_message
Rails.logger.error(error_message)
end
end
end
end

# @param [Hash] marc_record A MARC record represented as a Hash
def has_965hyacinth_field?(marc_record)
fields = marc_record['fields']

fields.any? do |field|
next unless field['965']

field['965']['subfields']&.any? { |subfield| subfield['a'] == '965hyacinth' }
end
end

def save_marc_record_to_file(marc_record)
config = Rails.configuration.folio_to_hyacinth
filename = extract_id(marc_record)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Later in this file, I proposed that extract_id return nil if an 001 isn't present. If this method returns nil here, I think it would be good to raise an exception (something like FolioSync::Exceptions::Missing001, which would extend FolioSyncException, and FolioSyncException extends StandardError so it would be caught by your line 27 rescue block).


raise FolioSync::Exceptions::Missing001Field, 'MARC record is missing required 001 field' if filename.nil?

file_path = File.join(config[:download_directory], "#{filename}.mrc")
formatted_marc = MARC::Record.new_from_hash(marc_record)

Rails.logger.info("Saving MARC record with 001=#{filename} to #{file_path}")
File.binwrite(file_path, formatted_marc)
end

# Downloads a single SRS MARC record to the download directory. Raises an exception if the record with the given `folio_hrid`
# does NOT have at least one 965 field with a subfield $a value of '965hyacinth'.
def download_single_965hyacinth_marc_record(folio_hrid)
source_record = @folio_client.find_source_record(instance_record_hrid: folio_hrid)
marc_record = source_record['parsedRecord']['content'] if source_record

unless has_965hyacinth_field?(marc_record)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good move adding this check here!

raise "Source record with HRID #{folio_hrid} doesn't have a 965 field with subfield $a value of '965hyacinth'."
end

save_marc_record_to_file(marc_record)
end

def extract_id(marc_record)
field_001 = marc_record['fields']&.find { |f| f['001'] }
field_001 ? field_001['001'] : nil
end
Comment on lines 75 to 78
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be a problem if the record is missing an 001 (and hopefully that will never happen!), so I think it would be good to have this method return nil if no 001 is present instead of returning string 'unknown'. I left a couple of other comments above about the side effects of returning nil.

The overall restructuring I'm proposing will prevent the (hopefully rare/impossible) case where we would write our an "unknown.mrc" file to disk. And I think the code change would also make it clearer that a record with a missing 001 value is an exceptional case that would cause issues generally.

end
15 changes: 15 additions & 0 deletions lib/folio_sync/hyacinth/client.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# frozen_string_literal: true

class FolioSync::Hyacinth::Client < HyacinthApi::Client
# HyacinthApi will be extracted to a gem in the future
def self.instance
@instance ||= self.new(
HyacinthApi::Configuration.new(
url: Rails.configuration.hyacinth['url'],
email: Rails.configuration.hyacinth['email'],
password: Rails.configuration.hyacinth['password']
)
)
@instance
end
end
66 changes: 66 additions & 0 deletions lib/hyacinth_api/client.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# frozen_string_literal: true

class HyacinthApi::Client
include HyacinthApi::Finders
include HyacinthApi::DigitalObjects

attr_reader :config

def initialize(config)
@config = config
@auth_token = Base64.strict_encode64("#{@config.email}:#{@config.password}")
end

def self.instance
@instance ||= new
end

# Core HTTP methods
def get(path, params = {})
response = connection.get(path, params)
handle_response(response)
end

def post(path, data = {})
response = connection.post(path, data.to_json)
handle_response(response)
end

def put(path, data = {})
response = connection.put(path, data.to_json)
handle_response(response)
end

def delete(path)
response = connection.delete(path)
handle_response(response)
end

def connection
@connection ||= Faraday.new(
url: @config.url,
headers: headers,
request: { timeout: @config.timeout }
) do |faraday|
faraday.adapter Faraday.default_adapter
faraday.use Faraday::Response::RaiseError
end
end

def headers
{
'Accept' => 'application/json, text/plain',
'Content-Type' => 'application/json',
'Authorization' => "Basic #{@auth_token}"
}
end

def handle_response(response)
return {} if response.body.blank?

JSON.parse(response.body)
rescue JSON::ParserError => e
Rails.logger.error("Invalid JSON response: #{response.body}")
raise "Invalid JSON response: #{e.message}"
end
end
16 changes: 16 additions & 0 deletions lib/hyacinth_api/configuration.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# frozen_string_literal: true

module HyacinthApi
class Configuration
DEFAULT_TIMEOUT = 60

attr_reader :url, :email, :password, :timeout

def initialize(url:, email:, password:, timeout: DEFAULT_TIMEOUT)
@url = url
@email = email
@password = password
@timeout = timeout
end
end
end
Loading