I'm attempting to optimize extraction of values obtained from a REST API which returns json values in an array.
Here's an minimal, complete, and verifiable example that reflects exactly what I'm doing.
```
USE tempdb;
DROP TABLE IF EXISTS dbo.json_test;
CREATE TABLE dbo.json_test
(
json_test_id int NOT NULL
IDENTITY(1,1)
, some_uniqueidentifier uniqueidentifier NULL
, some_varchar varchar(100) NULL
, the_json nvarchar(max) NULL
);
INSERT INTO dbo.json_test (some_uniqueidentifier, some_varchar, the_json)
SELECT
some_uniqueidentifier = NEWID()
, some_varchar = CONVERT(varchar(100), CRYPT_GEN_RANDOM(64), 1)
, the_json = (
SELECT st.*
FROM sys.tables st
CROSS JOIN sys.tables st2
WHERE st.object_id = t.object_id FOR JSON AUTO
)
FROM sys.tables t;
;WITH src AS
(
SELECT jt.some_uniqueidentifier
, jt.some_varchar
, top_array.[key]
, top_array.[value]
FROM dbo.json_test jt
CROSS APPLY OPENJSON(jt.the_json, N'lax $') top_array
),
src2 AS
(
SELECT src.some_uniqueidentifier
, src.some_varchar
, src.[key]
, src.[value]
, inner_key = inner_array.[key]
, inner_value = inner_array.[value]
FROM src
CROSS APPLY OPENJSON(src.[value], N'lax $') inner_array
)
SELECT src2.some_uniqueidentifier
, src2.some_varchar
, src2.[key]
, [name] = MAX(CASE WHEN src2.[inner_key] = 'name' THEN src2.[inner_value] ELSE NULL END)
, [object_id] = MAX(CASE WHEN src2.[inner_key] = 'object_id' THEN src2.[inner_value] ELSE NULL END)
, [principal_id] = MAX(CASE WHEN src2.[inner_key] = 'principal_id' THEN src2.[inner_value] ELSE NULL END)
, [schema_id] = MAX(CASE WHEN src2.[inner_key] = 'schema_id' THEN src2.[inner_value] ELSE NULL END)
, [parent_object_id] = MAX(CASE WHEN src2.[inner_key] = 'parent_object_id' THEN src2.[inner_value] ELSE NULL END)
, [type] = MAX(CASE WHEN src2.[inner_key] = 'type' THEN src2.[inner_value] ELSE NULL END)
, [type_desc] = MAX(CASE WHEN src2.[inner_key] = 'type_desc' THEN src2.[inner_value] ELSE NULL END)
, [create_date] = MAX(CASE WHEN src2.[inner_key] = 'create_date' THEN src2.[inner_value] ELSE NULL END)
, [modify_date] = MAX(CASE WHEN src2.[inner_key] = 'modify_date' THEN src2.[inner_value] ELSE NULL END)
, [is_ms_shipped] = MAX(CASE WHEN src2.[inner_key] = 'is_ms_shipped' THEN src2.[inner_value] ELSE NULL END)
, [is_published] = MAX(CASE WHEN src2.[inner_key] = 'is_published' THEN src2.[inner_value] ELSE NULL END)
, [is_schema_published] = MAX(CASE WHEN src2.[inner_key] = 'is_schema_published' THEN src2.[inner_value] ELSE NULL END)
, [lob_data_space_id] = MAX(CASE WHEN src2.[inner_key] = 'lob_data_space_id' THEN src2.[inner_value] ELSE NULL END)
, [filestream_data_space_id] = MAX(CASE WHEN src2.[inner_key] = 'filestream_data_space_id' THEN src2.[inner_value] ELSE NULL END)
, [max_column_id_used] = MAX(CASE WHEN src2.[inner_key] = 'max_column_id_used' THEN src2.[inner_value] ELSE NULL END)
, [lock_on_bulk_load] = MAX(CASE WHEN src2.[inner_key] = 'lock_on_bulk_load' THEN src2.[inner_value] ELSE NULL END)
, [uses_ansi_nulls] = MAX(CASE WHEN src2.[inner_key] = 'uses_ansi_nulls' THEN src2.[inner_value] ELSE NULL END)
, [is_replicated] = MAX(CASE WHEN src2.[inner_key] = 'is_replicated' THEN src2.[inner_value] ELSE NULL END)
, [has_replication_filter] = MAX(CASE WHEN src2.[inner_key] = 'has_replication_filter' THEN src2.[inner_value] ELSE NULL END)
, [is_merge_published] = MAX(CASE WHEN src2.[inner_key] = 'is_merge_published' THEN src2.[inner_value] ELSE NULL END)
, [is_sync_tran_subscribed] = MAX(CASE WHEN src2.[inner_key] = 'is_sync_tran_subscribed' THEN src2.[inner_value] ELSE NULL END)
, [has_unchecked_assembly_data] = MAX(CASE WHEN src2.[inner_key] = 'has_unchecked_assembly_data' THEN src2.[inner_value] ELSE NULL END)
, [text_in_row_limit] = MAX(CASE WHEN src2.[inner_key] = 'text_in_row_limit' THEN src2.[inner_value] ELSE NULL END)
, [large_value_types_out_of_row] = MAX(CASE WHEN src2.[inner_key] = 'large_value_types_out_of_row' THEN src2.[inner_value] ELSE NULL END)
, [is_tracked_by_cdc] = MAX(CASE WHEN src2.[inner_key] = 'is_tracked_by_cdc' THEN src2.[inner_value] ELSE NULL END)
, [lock_escalation] = MAX(CASE WHEN src2.[inner_key] = 'lock_escalation' THEN src2.[inner_value] ELSE NULL END)
, [lock_escalation_desc] = MAX(CASE WHEN src2.[inner_key] = 'lock_escalation_desc' THEN src2.[inner_value] ELSE NULL END)
, [is_filetable] = MAX(CASE WHEN src2.[inner_key] = 'is_filetable' THEN src2.[inner_value] ELSE NULL END)
, [is_memory_optimized] = MAX(CASE WHEN src2.[inner_key] = 'is_memory_optimized' THEN src2.[inner_value] ELSE NULL END)
, [durability] = MAX(CASE WHEN src2.[inner_key] = 'durability' THEN src2.[inner_value] ELSE NULL END)
, [durability_desc] = MAX(CASE WHEN src2.[inner_key] = 'durability_desc' THEN src2.[inner_value] ELSE NULL END)
, [temporal_type] = MAX(CASE WHEN src2.[inner_key] = 'temporal_type' THEN src2.[inner_value] ELSE NULL END)
, [temporal_type_desc] = MAX(CASE WHEN src2.[inner_key] = 'temporal_type_desc' THEN src2.[inner_value] ELSE NULL END)
, [history_table_id] = MAX(CASE WHEN src2.[inner_key] = 'history_table_id' THEN src2.[inner_value] ELSE NULL END)
, [is_remote_data_archive_enabled] = MAX(CASE WHEN src2.[inner_key] = 'is_remote_data_archive_enabled' THEN src2.[inner_value] ELSE NULL END)
, [is_external] = MAX(CASE WHEN src2.[inner_key] = 'is_external' THEN src2.[inner_value] ELSE NULL END)
, [history_retention_period] = MAX(CASE WHEN src2.[inner_key] = 'history_retention_period' THEN src2.[inner_value] ELSE NULL END)
, [history_retention_period_unit] = MAX(CASE WHEN src2.[inner_key] = 'history_retention_period_unit' THEN src2.[inner_value] ELSE NULL END)
, [history_retention_period_unit_desc] = MAX(CASE WHEN src2.[inner_key] = 'history_retention_period_unit_desc' THEN src2.[inner_value] ELSE NULL END)
, [is_node] = MAX(CASE WHEN src2.[inner_key] = 'is_node' THEN src2.[inner_value] ELSE NULL END)
, [is_edge] = MAX(CASE WHEN src2.[inner_key] = 'is_edge' THEN src2.[inner_value] ELSE NULL END)
FROM src2
GROUP BY src2.some_uniqueidentifier
, src2.some_varchar
, src2.[key]
ORDER BY src2.some_uniqueidentifier
, src2.some_varchar
, src2.[key];
```
The [query plan](https://www.brentozar.com/pastetheplan/?id=Skcotv1AO) uses a couple of nested loops joins regardless of how many rows are contained in the input table. Presumably that's an artifact of using the `CROSS APPLY` operator. For your fun, I've already set up a [DB Fiddle](https://dbfiddle.uk/?rdbms=sqlserver_2019&sample=adventureworks&fiddle=a548cf359ff55fa77a16677cc05050d6).
Is there a more efficient way of getting the data out of json format into a "real" set of columns?
I created the code above using my on-prem SQL Server 2019 instance, however the target will be Azure SQL Database, so all the latest-and-greatest options are available.