Merge pull request #13109 from mkllnk/product-type-lookup

DFC import: Find broader taxon if we don't have a specific one
This commit is contained in:
Maikel
2025-02-04 15:56:13 +11:00
committed by GitHub
5 changed files with 100 additions and 102 deletions

View File

@@ -1,47 +0,0 @@
# frozen_string_literal: true
require 'singleton'
class DfcProductTypeFactory
include Singleton
def self.for(dfc_id)
instance.for(dfc_id)
end
def initialize
@product_types = {}
populate_product_types
end
def for(dfc_id)
@product_types[dfc_id]
end
private
def populate_product_types
DfcLoader.connector.PRODUCT_TYPES.topConcepts.each do |product_type|
record_type(DfcLoader.connector.PRODUCT_TYPES, product_type.to_s)
end
end
def record_type(product_type_object, product_type)
current_product_type = product_type_object.public_send(product_type.to_s)
id = current_product_type.semanticId
@product_types[id] = current_product_type
# Narrower product types are defined as class method on the current product type object
narrowers = current_product_type.methods(false).sort
# Leaf node
return if narrowers.empty?
narrowers.each do |narrower|
# recursive call
record_type(current_product_type, narrower)
end
end
end

View File

@@ -0,0 +1,31 @@
# frozen_string_literal: true
class ProductTypeImporter < DfcBuilder
# Try to find the taxon closest matching the given product type.
# If we don't find any matching taxon, we return a random one.
def self.taxon(product_type)
priority_list = [product_type, *list_broaders(product_type)].compact
# Optimistic querying.
# We could query all broader taxons in one but then we need to still sort
# them locally and use more memory. That would be a pessimistic query.
# Consider caching the result instead.
taxons = priority_list.lazy.map do |type|
Spree::Taxon.find_by(dfc_id: type.semanticId)
end.compact
taxons.first || Spree::Taxon.first
end
def self.list_broaders(type)
return [] if type.nil?
broaders = type.broaders.map do |id|
DataFoodConsortium::Connector::SKOSParser.concepts[id]
end
broaders + broaders.flat_map do |broader|
list_broaders(broader)
end
end
end

View File

@@ -107,15 +107,11 @@ class SuppliedProductBuilder < DfcBuilder
def self.product_type(variant)
taxon_dfc_id = variant.primary_taxon&.dfc_id
DfcProductTypeFactory.for(taxon_dfc_id)
DataFoodConsortium::Connector::SKOSParser.concepts[taxon_dfc_id]
end
def self.taxon(supplied_product)
dfc_id = supplied_product.productType&.semanticId
# Every product needs a primary taxon to be valid. So if we don't have
# one or can't find it we just take a random one.
Spree::Taxon.find_by(dfc_id:) || Spree::Taxon.first
ProductTypeImporter.taxon(supplied_product.productType)
end
private_class_method :product_type, :taxon

View File

@@ -1,49 +0,0 @@
# frozen_string_literal: true
require_relative "../spec_helper"
RSpec.describe DfcProductTypeFactory do
describe ".for" do
let(:dfc_id) {
"https://github.com/datafoodconsortium/taxonomies/releases/latest/download/productTypes.rdf#drink"
}
it "assigns a top level product type" do
drink = DfcLoader.connector.PRODUCT_TYPES.DRINK
expect(described_class.for(dfc_id).semanticId).to eq drink.semanticId
end
context "with second level product type" do
let(:dfc_id) {
"https://github.com/datafoodconsortium/taxonomies/releases/latest/download/productTypes.rdf#soft-drink"
}
it "assigns a second level product type" do
soft_drink = DfcLoader.connector.PRODUCT_TYPES.DRINK.SOFT_DRINK
expect(described_class.for(dfc_id).semanticId).to eq soft_drink.semanticId
end
end
context "with leaf level product type" do
let(:dfc_id) {
"https://github.com/datafoodconsortium/taxonomies/releases/latest/download/productTypes.rdf#lemonade"
}
it "assigns a leaf level product type" do
lemonade = DfcLoader.connector.PRODUCT_TYPES.DRINK.SOFT_DRINK.LEMONADE
expect(described_class.for(dfc_id).semanticId).to eq lemonade.semanticId
end
end
context "with non existing product type" do
let(:dfc_id) { "other" }
it "returns nil" do
expect(described_class.for(dfc_id)).to be_nil
end
end
end
end

View File

@@ -0,0 +1,67 @@
# frozen_string_literal: true
require_relative "../spec_helper"
RSpec.describe ProductTypeImporter do
let(:drink) {
DfcLoader.connector.PRODUCT_TYPES.DRINK
}
let(:soft_drink) {
DfcLoader.connector.PRODUCT_TYPES.DRINK.SOFT_DRINK
}
let(:lemonade) {
DfcLoader.connector.PRODUCT_TYPES.DRINK.SOFT_DRINK.LEMONADE
}
describe ".taxon" do
it "finds a linked taxon" do
create(:taxon, dfc_id: soft_drink.semanticId)
lemonade_taxon = create(:taxon, dfc_id: lemonade.semanticId)
expect(described_class.taxon(lemonade)).to eq lemonade_taxon
end
it "falls back to a broader taxon" do
drink_taxon = create(:taxon, dfc_id: drink.semanticId)
expect(described_class.taxon(lemonade)).to eq drink_taxon
end
it "returns random taxon when none can be found" do
only_taxon = create(:taxon)
expect(described_class.taxon(lemonade)).to eq only_taxon
end
it "queries the database only until it found a taxon" do
soft_drink_taxon = create(:taxon, dfc_id: soft_drink.semanticId)
expect {
expect(described_class.taxon(lemonade)).to eq soft_drink_taxon
}.to query_database [
"Spree::Taxon Load", # query for lemonade, not found
"Spree::Taxon Load", # query for soft drink, found
# no query for drink
]
end
end
describe ".list_broaders" do
it "returns an empty array if no type is given" do
list = described_class.list_broaders(nil)
expect(list).to eq []
end
it "can return an empty list for top concepts" do
list = described_class.list_broaders(drink)
expect(list).to eq []
end
it "lists the broader concepts of a type" do
list = described_class.list_broaders(soft_drink)
expect(list).to eq [drink]
end
it "lists all the broader concepts to the top concepts" do
list = described_class.list_broaders(lemonade)
expect(list).to eq [soft_drink, drink]
end
end
end